Hello community, here is the log from the commit of package youtube-dl for openSUSE:Factory checked in at 2019-02-19 12:00:00 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/youtube-dl (Old) and /work/SRC/openSUSE:Factory/.youtube-dl.new.28833 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "youtube-dl" Tue Feb 19 12:00:00 2019 rev:96 rq:676860 version:2019.02.18 Changes: -------- --- /work/SRC/openSUSE:Factory/youtube-dl/python-youtube-dl.changes 2019-02-13 10:10:11.901487845 +0100 +++ /work/SRC/openSUSE:Factory/.youtube-dl.new.28833/python-youtube-dl.changes 2019-02-19 12:00:22.353139284 +0100 @@ -1,0 +2,25 @@ +Sun Feb 17 22:01:42 UTC 2019 - Sebastien CHAVAUX <[email protected]> + +- Update to new upstream release 2019.02.18 + * [tvp:website] Fix and improve extraction + * [tvp] Detect unavailable videos + * [tvp] Fix description extraction and make thumbnail optional + * [linuxacademy] Add support for linuxacademy.com (#12207) + * [bilibili] Update keys (#19233) + * [udemy] Extend URL regular expressions (#14330, #15883) + * [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126) + * [noovo] Fix extraction (#19230) + * [rai] Relax URL regular expression (#19232) + * [vshare] Pass Referer to download request (#19205, #19221) + * [openload] Add support for oload.live (#19222) + * [imgur] Use video id as title fallback (#18590) + * [twitch] Add new source format detection approach (#19193) + * [tvplayhome] Fix video id extraction (#19190) + * [tvplayhome] Fix episode metadata extraction (#19190) + * [rutube:embed] Fix extraction (#19163) + * [rutube:embed] Add support private videos (#19163) + * [soundcloud] Extract more metadata + * [trunews] Add support for trunews.com (#19153) + * [linkedin:learning] Extract chapter_number and chapter_id (#19162) + +------------------------------------------------------------------- --- /work/SRC/openSUSE:Factory/youtube-dl/youtube-dl.changes 2019-02-13 10:10:14.081487286 +0100 +++ /work/SRC/openSUSE:Factory/.youtube-dl.new.28833/youtube-dl.changes 2019-02-19 12:00:22.633139137 +0100 @@ -1,0 +2,25 @@ +Sun Feb 17 21:50:30 UTC 2019 - Sebastien CHAVAUX <[email protected]> + +- Update to new upstream release 2019.02.18 + * [tvp:website] Fix and improve extraction + * [tvp] Detect unavailable videos + * [tvp] Fix description extraction and make thumbnail optional + * [linuxacademy] Add support for linuxacademy.com (#12207) + * [bilibili] Update keys (#19233) + * [udemy] Extend URL regular expressions (#14330, #15883) + * [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126) + * [noovo] Fix extraction (#19230) + * [rai] Relax URL regular expression (#19232) + * [vshare] Pass Referer to download request (#19205, #19221) + * [openload] Add support for oload.live (#19222) + * [imgur] Use video id as title fallback (#18590) + * [twitch] Add new source format detection approach (#19193) + * [tvplayhome] Fix video id extraction (#19190) + * [tvplayhome] Fix episode metadata extraction (#19190) + * [rutube:embed] Fix extraction (#19163) + * [rutube:embed] Add support private videos (#19163) + * [soundcloud] Extract more metadata + * [trunews] Add support for trunews.com (#19153) + * [linkedin:learning] Extract chapter_number and chapter_id (#19162) + +------------------------------------------------------------------- Old: ---- youtube-dl-2019.02.08.tar.gz youtube-dl-2019.02.08.tar.gz.sig New: ---- youtube-dl-2019.02.18.tar.gz youtube-dl-2019.02.18.tar.gz.sig ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.U6Smuu/_old 2019-02-19 12:00:26.953136869 +0100 +++ /var/tmp/diff_new_pack.U6Smuu/_new 2019-02-19 12:00:26.957136867 +0100 @@ -19,7 +19,7 @@ %define modname youtube-dl %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-youtube-dl -Version: 2019.02.08 +Version: 2019.02.18 Release: 0 Summary: A python module for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 ++++++ youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.U6Smuu/_old 2019-02-19 12:00:26.981136854 +0100 +++ /var/tmp/diff_new_pack.U6Smuu/_new 2019-02-19 12:00:26.981136854 +0100 @@ -17,7 +17,7 @@ Name: youtube-dl -Version: 2019.02.08 +Version: 2019.02.18 Release: 0 Summary: A tool for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 ++++++ youtube-dl-2019.02.08.tar.gz -> youtube-dl-2019.02.18.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/ChangeLog new/youtube-dl/ChangeLog --- old/youtube-dl/ChangeLog 2019-02-07 19:12:48.000000000 +0100 +++ new/youtube-dl/ChangeLog 2019-02-17 20:11:07.000000000 +0100 @@ -1,3 +1,28 @@ +version 2019.02.18 + +Extractors +* [tvp:website] Fix and improve extraction ++ [tvp] Detect unavailable videos +* [tvp] Fix description extraction and make thumbnail optional ++ [linuxacademy] Add support for linuxacademy.com (#12207) +* [bilibili] Update keys (#19233) +* [udemy] Extend URL regular expressions (#14330, #15883) +* [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126) +* [noovo] Fix extraction (#19230) +* [rai] Relax URL regular expression (#19232) ++ [vshare] Pass Referer to download request (#19205, #19221) ++ [openload] Add support for oload.live (#19222) +* [imgur] Use video id as title fallback (#18590) ++ [twitch] Add new source format detection approach (#19193) +* [tvplayhome] Fix video id extraction (#19190) +* [tvplayhome] Fix episode metadata extraction (#19190) +* [rutube:embed] Fix extraction (#19163) ++ [rutube:embed] Add support private videos (#19163) ++ [soundcloud] Extract more metadata ++ [trunews] Add support for trunews.com (#19153) ++ [linkedin:learning] Extract chapter_number and chapter_id (#19162) + + version 2019.02.08 Core diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/docs/supportedsites.md new/youtube-dl/docs/supportedsites.md --- old/youtube-dl/docs/supportedsites.md 2019-02-07 19:12:51.000000000 +0100 +++ new/youtube-dl/docs/supportedsites.md 2019-02-17 20:11:11.000000000 +0100 @@ -458,6 +458,7 @@ - **LineTV** - **linkedin:learning** - **linkedin:learning:course** + - **LinuxAcademy** - **LiTV** - **LiveLeak** - **LiveLeakEmbed** @@ -915,6 +916,7 @@ - **ToypicsUser**: Toypics user profile - **TrailerAddict** (Currently broken) - **Trilulilu** + - **TruNews** - **TruTV** - **Tube8** - **TubiTv** Binary files old/youtube-dl/youtube-dl and new/youtube-dl/youtube-dl differ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/bilibili.py new/youtube-dl/youtube_dl/extractor/bilibili.py --- old/youtube-dl/youtube_dl/extractor/bilibili.py 2019-01-30 00:22:52.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/bilibili.py 2019-02-16 20:40:21.000000000 +0100 @@ -93,8 +93,8 @@ }] }] - _APP_KEY = '84956560bc028eb7' - _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e' + _APP_KEY = 'iVGUTjsxvpLeuDCf' + _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt' def _report_error(self, result): if 'message' in result: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/extractors.py new/youtube-dl/youtube_dl/extractor/extractors.py --- old/youtube-dl/youtube_dl/extractor/extractors.py 2019-01-30 00:22:58.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/extractors.py 2019-02-16 20:40:21.000000000 +0100 @@ -593,6 +593,7 @@ LinkedInLearningIE, LinkedInLearningCourseIE, ) +from .linuxacademy import LinuxAcademyIE from .litv import LiTVIE from .liveleak import ( LiveLeakIE, @@ -1171,6 +1172,7 @@ from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE +from .trunews import TruNewsIE from .trutv import TruTVIE from .tube8 import Tube8IE from .tubitv import TubiTvIE @@ -1216,7 +1218,7 @@ from .tvp import ( TVPEmbedIE, TVPIE, - TVPSeriesIE, + TVPWebsiteIE, ) from .tvplay import ( TVPlayIE, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/imgur.py new/youtube-dl/youtube_dl/extractor/imgur.py --- old/youtube-dl/youtube_dl/extractor/imgur.py 2019-01-30 00:22:52.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/imgur.py 2019-02-16 20:40:21.000000000 +0100 @@ -27,6 +27,10 @@ }, { 'url': 'https://i.imgur.com/crGpqCV.mp4', 'only_matching': True, + }, { + # no title + 'url': 'https://i.imgur.com/jxBXAMC.gifv', + 'only_matching': True, }] def _real_extract(self, url): @@ -87,7 +91,7 @@ return { 'id': video_id, 'formats': formats, - 'title': self._og_search_title(webpage), + 'title': self._og_search_title(webpage, default=video_id), } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/linkedin.py new/youtube-dl/youtube_dl/extractor/linkedin.py --- old/youtube-dl/youtube_dl/extractor/linkedin.py 2019-01-30 00:22:52.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/linkedin.py 2019-02-16 20:40:21.000000000 +0100 @@ -34,12 +34,15 @@ 'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value, }, query=query)['elements'][0] - def _get_video_id(self, urn, course_slug, video_slug): + def _get_urn_id(self, video_data): + urn = video_data.get('urn') if urn: mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn) if mobj: return mobj.group(1) - return '%s/%s' % (course_slug, video_slug) + + def _get_video_id(self, video_data, course_slug, video_slug): + return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug) def _real_initialize(self): email, password = self._get_login_info() @@ -123,7 +126,7 @@ self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr')) return { - 'id': self._get_video_id(video_data.get('urn'), course_slug, video_slug), + 'id': self._get_video_id(video_data, course_slug, video_slug), 'title': title, 'formats': formats, 'thumbnail': video_data.get('defaultThumbnail'), @@ -154,18 +157,21 @@ course_data = self._call_api(course_slug, 'chapters,description,title') entries = [] - for chapter in course_data.get('chapters', []): + for chapter_number, chapter in enumerate(course_data.get('chapters', []), 1): chapter_title = chapter.get('title') + chapter_id = self._get_urn_id(chapter) for video in chapter.get('videos', []): video_slug = video.get('slug') if not video_slug: continue entries.append({ '_type': 'url_transparent', - 'id': self._get_video_id(video.get('urn'), course_slug, video_slug), + 'id': self._get_video_id(video, course_slug, video_slug), 'title': video.get('title'), 'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug), 'chapter': chapter_title, + 'chapter_number': chapter_number, + 'chapter_id': chapter_id, 'ie_key': LinkedInLearningIE.ie_key(), }) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/linuxacademy.py new/youtube-dl/youtube_dl/extractor/linuxacademy.py --- old/youtube-dl/youtube_dl/extractor/linuxacademy.py 1970-01-01 01:00:00.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/linuxacademy.py 2019-02-16 20:40:21.000000000 +0100 @@ -0,0 +1,174 @@ +from __future__ import unicode_literals + +import json +import random +import re + +from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_HTTPError, + compat_str, +) +from ..utils import ( + ExtractorError, + orderedSet, + unescapeHTML, + urlencode_postdata, + urljoin, +) + + +class LinuxAcademyIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?linuxacademy\.com/cp/ + (?: + courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)| + modules/view/id/(?P<course_id>\d+) + ) + ''' + _TESTS = [{ + 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154', + 'info_dict': { + 'id': '1498-2', + 'ext': 'mp4', + 'title': "Introduction to the Practitioner's Brief", + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Requires Linux Academy account credentials', + }, { + 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2', + 'only_matching': True, + }, { + 'url': 'https://linuxacademy.com/cp/modules/view/id/154', + 'info_dict': { + 'id': '154', + 'title': 'AWS Certified Cloud Practitioner', + 'description': 'md5:039db7e60e4aac9cf43630e0a75fa834', + }, + 'playlist_count': 41, + 'skip': 'Requires Linux Academy account credentials', + }] + + _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize' + _ORIGIN_URL = 'https://linuxacademy.com' + _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx' + _NETRC_MACHINE = 'linuxacademy' + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + def random_string(): + return ''.join([ + random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~') + for _ in range(32)]) + + webpage, urlh = self._download_webpage_handle( + self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ + 'client_id': self._CLIENT_ID, + 'response_type': 'token id_token', + 'redirect_uri': self._ORIGIN_URL, + 'scope': 'openid email user_impersonation profile', + 'audience': self._ORIGIN_URL, + 'state': random_string(), + 'nonce': random_string(), + }) + + login_data = self._parse_json( + self._search_regex( + r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, + 'login info', group='value'), None, + transform_source=lambda x: compat_b64decode(x).decode('utf-8') + )['extraParams'] + + login_data.update({ + 'client_id': self._CLIENT_ID, + 'redirect_uri': self._ORIGIN_URL, + 'tenant': 'lacausers', + 'connection': 'Username-Password-Authentication', + 'username': username, + 'password': password, + 'sso': 'true', + }) + + login_state_url = compat_str(urlh.geturl()) + + try: + login_page = self._download_webpage( + 'https://login.linuxacademy.com/usernamepassword/login', None, + 'Downloading login page', data=json.dumps(login_data).encode(), + headers={ + 'Content-Type': 'application/json', + 'Origin': 'https://login.linuxacademy.com', + 'Referer': login_state_url, + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + error = self._parse_json(e.cause.read(), None) + message = error.get('description') or error['code'] + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, message), expected=True) + raise + + callback_page, urlh = self._download_webpage_handle( + 'https://login.linuxacademy.com/login/callback', None, + 'Downloading callback page', + data=urlencode_postdata(self._hidden_inputs(login_page)), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'Origin': 'https://login.linuxacademy.com', + 'Referer': login_state_url, + }) + + access_token = self._search_regex( + r'access_token=([^=&]+)', compat_str(urlh.geturl()), + 'access token') + + self._download_webpage( + 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s' + % access_token, None, 'Downloading token validation page') + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id') + item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id) + + webpage = self._download_webpage(url, item_id) + + # course path + if course_id: + entries = [ + self.url_result( + urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key()) + for lesson_url in orderedSet(re.findall( + r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)', + webpage))] + title = unescapeHTML(self._html_search_regex( + (r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)', + r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), + webpage, 'title', default=None, group='value')) + description = unescapeHTML(self._html_search_regex( + r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', + webpage, 'description', default=None, group='value')) + return self.playlist_result(entries, course_id, title, description) + + # single video path + info = self._extract_jwplayer_data( + webpage, item_id, require_title=False, m3u8_id='hls',) + title = self._search_regex( + (r'>Lecture\s*:\s*(?P<value>[^<]+)', + r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, + 'title', group='value') + info.update({ + 'id': item_id, + 'title': title, + }) + return info diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/noovo.py new/youtube-dl/youtube_dl/extractor/noovo.py --- old/youtube-dl/youtube_dl/extractor/noovo.py 2019-01-30 00:22:52.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/noovo.py 2019-02-16 20:40:21.000000000 +0100 @@ -57,7 +57,8 @@ webpage = self._download_webpage(url, video_id) - bc_url = BrightcoveNewIE._extract_url(self, webpage) + brightcove_id = self._search_regex( + r'data-video-id=["\'](\d+)', webpage, 'brightcove id') data = self._parse_json( self._search_regex( @@ -89,7 +90,10 @@ return { '_type': 'url_transparent', 'ie_key': BrightcoveNewIE.ie_key(), - 'url': smuggle_url(bc_url, {'geo_countries': ['CA']}), + 'url': smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': ['CA']}), + 'id': brightcove_id, 'title': title, 'description': description, 'series': series, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/openload.py new/youtube-dl/youtube_dl/extractor/openload.py --- old/youtube-dl/youtube_dl/extractor/openload.py 2019-01-30 00:22:58.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/openload.py 2019-02-16 20:40:21.000000000 +0100 @@ -249,7 +249,7 @@ (?:www\.)? (?: openload\.(?:co|io|link|pw)| - oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw) + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live) ) )/ (?:f|embed)/ @@ -346,6 +346,9 @@ }, { 'url': 'https://oload.pw/f/WyKgK8s94N0', 'only_matching': True, + }, { + 'url': 'https://oload.live/f/-Z58UZ-GR4M', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/rai.py new/youtube-dl/youtube_dl/extractor/rai.py --- old/youtube-dl/youtube_dl/extractor/rai.py 2019-01-30 00:22:52.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/rai.py 2019-02-16 20:40:21.000000000 +0100 @@ -288,7 +288,7 @@ class RaiIE(RaiBaseIE): - _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE + _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE _TESTS = [{ # var uniquename = "ContentItem-..." # data-id="ContentItem-..." @@ -375,6 +375,9 @@ # Direct MMS URL 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html', 'only_matching': True, + }, { + 'url': 'https://www.rainews.it/tgr/marche/notiziari/video/2019/02/ContentItem-6ba945a2-889c-4a80-bdeb-8489c70a8db9.html', + 'only_matching': True, }] def _extract_from_content_id(self, content_id, url): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/rutube.py new/youtube-dl/youtube_dl/extractor/rutube.py --- old/youtube-dl/youtube_dl/extractor/rutube.py 2019-01-30 00:22:52.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/rutube.py 2019-02-16 20:40:21.000000000 +0100 @@ -21,7 +21,17 @@ class RutubeBaseIE(InfoExtractor): - def _extract_video(self, video, video_id=None, require_title=True): + def _download_api_info(self, video_id, query=None): + if not query: + query = {} + query['format'] = 'json' + return self._download_json( + 'http://rutube.ru/api/video/%s/' % video_id, + video_id, 'Downloading video JSON', + 'Unable to download video JSON', query=query) + + @staticmethod + def _extract_info(video, video_id=None, require_title=True): title = video['title'] if require_title else video.get('title') age_limit = video.get('is_adult') @@ -32,7 +42,7 @@ category = try_get(video, lambda x: x['category']['name']) return { - 'id': video.get('id') or video_id, + 'id': video.get('id') or video_id if video_id else video['id'], 'title': title, 'description': video.get('description'), 'thumbnail': video.get('thumbnail_url'), @@ -47,6 +57,42 @@ 'is_live': bool_or_none(video.get('is_livestream')), } + def _download_and_extract_info(self, video_id, query=None): + return self._extract_info( + self._download_api_info(video_id, query=query), video_id) + + def _download_api_options(self, video_id, query=None): + if not query: + query = {} + query['format'] = 'json' + return self._download_json( + 'http://rutube.ru/api/play/options/%s/' % video_id, + video_id, 'Downloading options JSON', + 'Unable to download options JSON', + headers=self.geo_verification_headers(), query=query) + + def _extract_formats(self, options, video_id): + formats = [] + for format_id, format_url in options['video_balancer'].items(): + ext = determine_ext(format_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + format_url, video_id, f4m_id=format_id, fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + }) + self._sort_formats(formats) + return formats + + def _download_and_extract_formats(self, video_id, query=None): + return self._extract_formats( + self._download_api_options(video_id, query=query), video_id) + class RutubeIE(RutubeBaseIE): IE_NAME = 'rutube' @@ -55,13 +101,13 @@ _TESTS = [{ 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', - 'md5': '79938ade01294ef7e27574890d0d3769', + 'md5': '1d24f180fac7a02f3900712e5a5764d6', 'info_dict': { 'id': '3eac3b4561676c17df9132a9a1e62e3e', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Раненный кенгуру забежал в аптеку', 'description': 'http://www.ntdtv.ru ', - 'duration': 80, + 'duration': 81, 'uploader': 'NTDRussian', 'uploader_id': '29790', 'timestamp': 1381943602, @@ -94,39 +140,12 @@ def _real_extract(self, url): video_id = self._match_id(url) - - video = self._download_json( - 'http://rutube.ru/api/video/%s/?format=json' % video_id, - video_id, 'Downloading video JSON') - - info = self._extract_video(video, video_id) - - options = self._download_json( - 'http://rutube.ru/api/play/options/%s/?format=json' % video_id, - video_id, 'Downloading options JSON', - headers=self.geo_verification_headers()) - - formats = [] - for format_id, format_url in options['video_balancer'].items(): - ext = determine_ext(format_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - format_url, video_id, f4m_id=format_id, fatal=False)) - else: - formats.append({ - 'url': format_url, - 'format_id': format_id, - }) - self._sort_formats(formats) - - info['formats'] = formats + info = self._download_and_extract_info(video_id) + info['formats'] = self._download_and_extract_formats(video_id) return info -class RutubeEmbedIE(InfoExtractor): +class RutubeEmbedIE(RutubeBaseIE): IE_NAME = 'rutube:embed' IE_DESC = 'Rutube embedded videos' _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)' @@ -135,7 +154,7 @@ 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', 'info_dict': { 'id': 'a10e53b86e8f349080f718582ce4c661', - 'ext': 'flv', + 'ext': 'mp4', 'timestamp': 1387830582, 'upload_date': '20131223', 'uploader_id': '297833', @@ -149,16 +168,26 @@ }, { 'url': 'http://rutube.ru/play/embed/8083783', 'only_matching': True, + }, { + # private video + 'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ', + 'only_matching': True, }] def _real_extract(self, url): embed_id = self._match_id(url) - webpage = self._download_webpage(url, embed_id) - - canonical_url = self._html_search_regex( - r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage, - 'Canonical URL') - return self.url_result(canonical_url, RutubeIE.ie_key()) + # Query may contain private videos token and should be passed to API + # requests (see #19163) + query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + options = self._download_api_options(embed_id, query) + video_id = options['effective_video'] + formats = self._extract_formats(options, video_id) + info = self._download_and_extract_info(video_id, query) + info.update({ + 'extractor_key': 'Rutube', + 'formats': formats, + }) + return info class RutubePlaylistBaseIE(RutubeBaseIE): @@ -181,7 +210,7 @@ video_url = url_or_none(result.get('video_url')) if not video_url: continue - entry = self._extract_video(result, require_title=False) + entry = self._extract_info(result, require_title=False) entry.update({ '_type': 'url', 'url': video_url, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/soundcloud.py new/youtube-dl/youtube_dl/extractor/soundcloud.py --- old/youtube-dl/youtube_dl/extractor/soundcloud.py 2019-01-30 00:22:58.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/soundcloud.py 2019-02-16 20:40:21.000000000 +0100 @@ -16,7 +16,8 @@ from ..utils import ( ExtractorError, int_or_none, - unified_strdate, + try_get, + unified_timestamp, update_url_query, url_or_none, ) @@ -51,12 +52,17 @@ 'info_dict': { 'id': '62986583', 'ext': 'mp3', - 'upload_date': '20121011', + 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', 'uploader': 'E.T. ExTerrestrial Music', - 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', + 'timestamp': 1349920598, + 'upload_date': '20121011', 'duration': 143, 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, } }, # not streamable song @@ -68,9 +74,14 @@ 'title': 'Goldrushed', 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 'uploader': 'The Royal Concept', + 'timestamp': 1337635207, 'upload_date': '20120521', - 'duration': 227, + 'duration': 30, 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, 'params': { # rtmp @@ -85,11 +96,16 @@ 'id': '123998367', 'ext': 'mp3', 'title': 'Youtube - Dl Test Video \'\' Ä↭', - 'uploader': 'jaimeMF', 'description': 'test chars: \"\'/\\ä↭', + 'uploader': 'jaimeMF', + 'timestamp': 1386604920, 'upload_date': '20131209', 'duration': 9, 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, }, # private link (alt format) @@ -100,11 +116,16 @@ 'id': '123998367', 'ext': 'mp3', 'title': 'Youtube - Dl Test Video \'\' Ä↭', - 'uploader': 'jaimeMF', 'description': 'test chars: \"\'/\\ä↭', + 'uploader': 'jaimeMF', + 'timestamp': 1386604920, 'upload_date': '20131209', 'duration': 9, 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, }, # downloadable song @@ -117,9 +138,14 @@ 'title': 'Bus Brakes', 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66', 'uploader': 'oddsamples', + 'timestamp': 1389232924, 'upload_date': '20140109', 'duration': 17, 'license': 'cc-by-sa', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, }, # private link, downloadable format @@ -132,9 +158,14 @@ 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', 'uploader': 'Ori Uplift Music', + 'timestamp': 1504206263, 'upload_date': '20170831', 'duration': 7449, 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, }, # no album art, use avatar pic for thumbnail @@ -147,10 +178,15 @@ 'title': 'Sideways (Prod. Mad Real)', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'uploader': 'garyvee', + 'timestamp': 1488152409, 'upload_date': '20170226', 'duration': 207, 'thumbnail': r're:https?://.*\.jpg', 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, 'params': { 'skip_download': True, @@ -176,22 +212,33 @@ def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None): track_id = compat_str(info['id']) + title = info['title'] name = full_title or track_id if quiet: self.report_extraction(name) thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url') if isinstance(thumbnail, compat_str): thumbnail = thumbnail.replace('-large', '-t500x500') + username = try_get(info, lambda x: x['user']['username'], compat_str) + + def extract_count(key): + return int_or_none(info.get('%s_count' % key)) + result = { 'id': track_id, - 'uploader': info.get('user', {}).get('username'), - 'upload_date': unified_strdate(info.get('created_at')), - 'title': info['title'], + 'uploader': username, + 'timestamp': unified_timestamp(info.get('created_at')), + 'title': title, 'description': info.get('description'), 'thumbnail': thumbnail, 'duration': int_or_none(info.get('duration'), 1000), 'webpage_url': info.get('permalink_url'), 'license': info.get('license'), + 'view_count': extract_count('playback'), + 'like_count': extract_count('favoritings'), + 'comment_count': extract_count('comment'), + 'repost_count': extract_count('reposts'), + 'genre': info.get('genre'), } formats = [] query = {'client_id': self._CLIENT_ID} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/trunews.py new/youtube-dl/youtube_dl/extractor/trunews.py --- old/youtube-dl/youtube_dl/extractor/trunews.py 1970-01-01 01:00:00.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/trunews.py 2019-02-16 20:40:21.000000000 +0100 @@ -0,0 +1,75 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + dict_get, + float_or_none, + int_or_none, + unified_timestamp, + update_url_query, + url_or_none, +) + + +class TruNewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech', + 'md5': 'a19c024c3906ff954fac9b96ce66bb08', + 'info_dict': { + 'id': '5c5a21e65d3c196e1c0020cc', + 'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech', + 'ext': 'mp4', + 'title': "Will Democrats Stage a Circus During President Trump's State of the Union Speech?", + 'description': 'md5:c583b72147cc92cf21f56a31aff7a670', + 'duration': 3685, + 'timestamp': 1549411440, + 'upload_date': '20190206', + }, + 'add_ie': ['Zype'], + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + video = self._download_json( + 'https://api.zype.com/videos', display_id, query={ + 'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H', + 'per_page': 1, + 'active': 'true', + 'friendly_title': display_id, + })['response'][0] + + zype_id = video['_id'] + + thumbnails = [] + thumbnails_list = video.get('thumbnails') + if isinstance(thumbnails_list, list): + for thumbnail in thumbnails_list: + if not isinstance(thumbnail, dict): + continue + thumbnail_url = url_or_none(thumbnail.get('url')) + if not thumbnail_url: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) + + return { + '_type': 'url_transparent', + 'url': update_url_query( + 'https://player.zype.com/embed/%s.js' % zype_id, + {'api_key': 'X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'}), + 'ie_key': 'Zype', + 'id': zype_id, + 'display_id': display_id, + 'title': video.get('title'), + 'description': dict_get(video, ('description', 'ott_description', 'short_description')), + 'duration': int_or_none(video.get('duration')), + 'timestamp': unified_timestamp(video.get('published_at')), + 'average_rating': float_or_none(video.get('rating')), + 'view_count': int_or_none(video.get('request_count')), + 'thumbnails': thumbnails, + } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/tvp.py new/youtube-dl/youtube_dl/extractor/tvp.py --- old/youtube-dl/youtube_dl/extractor/tvp.py 2019-01-30 00:22:52.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/tvp.py 2019-02-16 20:40:21.000000000 +0100 @@ -1,14 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor from ..utils import ( - determine_ext, clean_html, - get_element_by_attribute, + determine_ext, ExtractorError, + get_element_by_attribute, + orderedSet, ) @@ -19,12 +21,12 @@ _TESTS = [{ 'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536', - 'md5': '8aa518c15e5cc32dfe8db400dc921fbb', + 'md5': 'a21eb0aa862f25414430f15fdfb9e76c', 'info_dict': { 'id': '194536', 'ext': 'mp4', - 'title': 'Czas honoru, I seria – odc. 13', - 'description': 'md5:381afa5bca72655fe94b05cfe82bf53d', + 'title': 'Czas honoru, odc. 13 – Władek', + 'description': 'md5:437f48b93558370b031740546b696e24', }, }, { 'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', @@ -45,6 +47,7 @@ 'title': 'Wiadomości, 28.09.2017, 19:30', 'description': 'Wydanie główne codziennego serwisu informacyjnego.' }, + 'skip': 'HTTP Error 404: Not Found', }, { 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', 'only_matching': True, @@ -75,8 +78,10 @@ return { '_type': 'url_transparent', 'url': 'tvp:' + video_id, - 'description': self._og_search_description(webpage, default=None), - 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description( + webpage, default=None) or self._html_search_meta( + 'description', webpage, default=None), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'ie_key': 'TVPEmbed', } @@ -87,6 +92,15 @@ _VALID_URL = r'(?:tvp:|https?://[^/]+\.tvp\.(?:pl|info)/sess/tvplayer\.php\?.*?object_id=)(?P<id>\d+)' _TESTS = [{ + 'url': 'tvp:194536', + 'md5': 'a21eb0aa862f25414430f15fdfb9e76c', + 'info_dict': { + 'id': '194536', + 'ext': 'mp4', + 'title': 'Czas honoru, odc. 13 – Władek', + }, + }, { + # not available 'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268', 'md5': '8c9cd59d16edabf39331f93bf8a766c7', 'info_dict': { @@ -94,6 +108,7 @@ 'ext': 'mp4', 'title': 'Panorama, 07.12.2015, 15:40', }, + 'skip': 'Transmisja została zakończona lub materiał niedostępny', }, { 'url': 'tvp:22670268', 'only_matching': True, @@ -105,10 +120,13 @@ webpage = self._download_webpage( 'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id) - error_massage = get_element_by_attribute('class', 'msg error', webpage) - if error_massage: + error = self._html_search_regex( + r'(?s)<p[^>]+\bclass=["\']notAvailable__text["\'][^>]*>(.+?)</p>', + webpage, 'error', default=None) or clean_html( + get_element_by_attribute('class', 'msg error', webpage)) + if error: raise ExtractorError('%s said: %s' % ( - self.IE_NAME, clean_html(error_massage)), expected=True) + self.IE_NAME, clean_html(error)), expected=True) title = self._search_regex( r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1', @@ -180,48 +198,55 @@ } -class TVPSeriesIE(InfoExtractor): +class TVPWebsiteIE(InfoExtractor): IE_NAME = 'tvp:series' - _VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$' + _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)' _TESTS = [{ - 'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem', + # series + 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312/video', 'info_dict': { - 'title': 'Ogniem i mieczem', - 'id': '4278026', + 'id': '38678312', }, - 'playlist_count': 4, + 'playlist_count': 115, }, { - 'url': 'http://vod.tvp.pl/audycje/podroze/boso-przez-swiat', + # film + 'url': 'https://vod.tvp.pl/website/gloria,35139666', 'info_dict': { - 'title': 'Boso przez świat', - 'id': '9329207', + 'id': '36637049', + 'ext': 'mp4', + 'title': 'Gloria, Gloria', }, - 'playlist_count': 86, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['TVPEmbed'], + }, { + 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312', + 'only_matching': True, }] - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id, tries=5) - - title = self._html_search_regex( - r'(?s) id=[\'"]path[\'"]>(?:.*? / ){2}(.*?)</span>', webpage, 'series') - playlist_id = self._search_regex(r'nodeId:\s*(\d+)', webpage, 'playlist id') - playlist = self._download_webpage( - 'http://vod.tvp.pl/vod/seriesAjax?type=series&nodeId=%s&recommend' - 'edId=0&sort=&page=0&pageSize=10000' % playlist_id, display_id, tries=5, - note='Downloading playlist') - - videos_paths = re.findall( - '(?s)class="shortTitle">.*?href="(/[^"]+)', playlist) - entries = [ - self.url_result('http://vod.tvp.pl%s' % v_path, ie=TVPIE.ie_key()) - for v_path in videos_paths] + def _entries(self, display_id, playlist_id): + url = 'https://vod.tvp.pl/website/%s,%s/video' % (display_id, playlist_id) + for page_num in itertools.count(1): + page = self._download_webpage( + url, display_id, 'Downloading page %d' % page_num, + query={'page': page_num}) + + video_ids = orderedSet(re.findall( + r'<a[^>]+\bhref=["\']/video/%s,[^,]+,(\d+)' % display_id, + page)) + + if not video_ids: + break + + for video_id in video_ids: + yield self.url_result( + 'tvp:%s' % video_id, ie=TVPEmbedIE.ie_key(), + video_id=video_id) - return { - '_type': 'playlist', - 'id': playlist_id, - 'display_id': display_id, - 'title': title, - 'entries': entries, - } + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id, playlist_id = mobj.group('display_id', 'id') + return self.playlist_result( + self._entries(display_id, playlist_id), playlist_id) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/tvplay.py new/youtube-dl/youtube_dl/extractor/tvplay.py --- old/youtube-dl/youtube_dl/extractor/tvplay.py 2019-01-30 00:22:52.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/tvplay.py 2019-02-16 20:40:21.000000000 +0100 @@ -493,10 +493,9 @@ webpage = self._download_webpage(url, video_id) video_id = self._search_regex( - r'data-asset-id\s*=\s*["\'](\d{5,7})\b', webpage, 'video id', - default=None) + r'data-asset-id\s*=\s*["\'](\d{5,})\b', webpage, 'video id') - if video_id: + if len(video_id) < 8: return self.url_result( 'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id) @@ -537,8 +536,9 @@ r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number', default=None)) episode = self._search_regex( - r'(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'episode', - default=None, group='value') + (r'\bepisode\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', + r'data-subtitle\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, + 'episode', default=None, group='value') episode_number = int_or_none(self._search_regex( r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number', default=None)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/twitch.py new/youtube-dl/youtube_dl/extractor/twitch.py --- old/youtube-dl/youtube_dl/extractor/twitch.py 2019-01-30 00:22:52.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/twitch.py 2019-02-16 20:40:21.000000000 +0100 @@ -136,7 +136,12 @@ source = next(f for f in formats if f['format_id'] == 'Source') source['preference'] = 10 except StopIteration: - pass # No Source stream present + for f in formats: + if '/chunked/' in f['url']: + f.update({ + 'source_preference': 10, + 'format_note': 'Source', + }) self._sort_formats(formats) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/udemy.py new/youtube-dl/youtube_dl/extractor/udemy.py --- old/youtube-dl/youtube_dl/extractor/udemy.py 2019-01-30 00:22:52.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/udemy.py 2019-02-16 20:40:21.000000000 +0100 @@ -29,7 +29,7 @@ IE_NAME = 'udemy' _VALID_URL = r'''(?x) https?:// - www\.udemy\.com/ + (?:[^/]+\.)?udemy\.com/ (?: [^#]+\#/lecture/| lecture/view/?\?lectureId=| @@ -64,6 +64,9 @@ # only outputs rendition 'url': 'https://www.udemy.com/how-you-can-help-your-local-community-5-amazing-examples/learn/v4/t/lecture/3225750?start=0', 'only_matching': True, + }, { + 'url': 'https://wipro.udemy.com/java-tutorial/#/lecture/172757', + 'only_matching': True, }] def _extract_course_info(self, webpage, video_id): @@ -123,10 +126,22 @@ def _download_webpage_handle(self, *args, **kwargs): headers = kwargs.get('headers', {}).copy() - headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4' + headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36' kwargs['headers'] = headers - return super(UdemyIE, self)._download_webpage_handle( + ret = super(UdemyIE, self)._download_webpage_handle( *args, **compat_kwargs(kwargs)) + if not ret: + return ret + webpage, _ = ret + if any(p in webpage for p in ( + '>Please verify you are a human', + 'Access to this page has been denied because we believe you are using automation tools to browse the website', + '"_pxCaptcha"')): + raise ExtractorError( + 'Udemy asks you to solve a CAPTCHA. Login with browser, ' + 'solve CAPTCHA, then export cookies and pass cookie file to ' + 'youtube-dl with --cookies.', expected=True) + return ret def _download_json(self, url_or_request, *args, **kwargs): headers = { @@ -403,8 +418,14 @@ class UdemyCourseIE(UdemyIE): IE_NAME = 'udemy:course' - _VALID_URL = r'https?://(?:www\.)?udemy\.com/(?P<id>[^/?#&]+)' - _TESTS = [] + _VALID_URL = r'https?://(?:[^/]+\.)?udemy\.com/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.udemy.com/java-tutorial/', + 'only_matching': True, + }, { + 'url': 'https://wipro.udemy.com/java-tutorial/', + 'only_matching': True, + }] @classmethod def suitable(cls, url): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/vshare.py new/youtube-dl/youtube_dl/extractor/vshare.py --- old/youtube-dl/youtube_dl/extractor/vshare.py 2019-01-30 00:22:52.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/vshare.py 2019-02-16 20:40:21.000000000 +0100 @@ -48,7 +48,7 @@ webpage = self._download_webpage( 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, - video_id) + video_id, headers={'Referer': url}) title = self._html_search_regex( r'<title>([^<]+)</title>', webpage, 'title') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/version.py new/youtube-dl/youtube_dl/version.py --- old/youtube-dl/youtube_dl/version.py 2019-02-07 19:12:48.000000000 +0100 +++ new/youtube-dl/youtube_dl/version.py 2019-02-17 20:11:07.000000000 +0100 @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.02.08' +__version__ = '2019.02.18'
