Hello community, here is the log from the commit of package youtube-dl for openSUSE:Factory checked in at 2019-01-08 12:29:51 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/youtube-dl (Old) and /work/SRC/openSUSE:Factory/.youtube-dl.new.28833 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "youtube-dl" Tue Jan 8 12:29:51 2019 rev:91 rq:663450 version:2019.01.02 Changes: -------- --- /work/SRC/openSUSE:Factory/youtube-dl/python-youtube-dl.changes 2018-12-11 15:49:16.766110627 +0100 +++ /work/SRC/openSUSE:Factory/.youtube-dl.new.28833/python-youtube-dl.changes 2019-01-08 12:31:43.000060527 +0100 @@ -1,0 +2,26 @@ +Mon Jan 7 21:50:04 UTC 2019 - Jan Engelhardt <[email protected]> + +- Update to new upstream release 2019.01.02 + * Add support for classic.ardmediathek.de, + learning.oreilly.com, embed.acast.com, Scripps Networks watch + domains, lecturio.de, npostart.nl, play.acast.com, + subscription.packtpub.com. + +------------------------------------------------------------------- +Sat Dec 22 15:34:11 UTC 2018 - [email protected] + +- Update to new upstream releease 2018.12.17 + * ard: Improve geo restricted videos extraction + * ard: Fix subtitles extraction + * ard: Improve extraction robustness + * ard: Relax URL regular expression + * acast: Add support for embed.acast.com/play.acast.com + * iprima: Relax URL regular expression + * vrv: Fix initial state extraction + * youtube: Fix mark watched + * safari: Add support for learning.oreilly.com + * youtube: Fix multifeed extraction + * lecturio: Improve subtitles extraction + * uol: Fix format URL extraction + +------------------------------------------------------------------- --- /work/SRC/openSUSE:Factory/youtube-dl/youtube-dl.changes 2018-12-24 11:48:19.933084298 +0100 +++ /work/SRC/openSUSE:Factory/.youtube-dl.new.28833/youtube-dl.changes 2019-01-08 12:31:43.292060209 +0100 @@ -0,0 +1,10 @@ +------------------------------------------------------------------- +Mon Jan 7 21:50:04 UTC 2019 - Jan Engelhardt <[email protected]> + +- Update to new upstream release 2019.01.02 + * Add support for classic.ardmediathek.de, + learning.oreilly.com, embed.acast.com, Scripps Networks watch + domains, lecturio.de, npostart.nl, play.acast.com, + subscription.packtpub.com. + +------------------------------------------------------------------- Old: ---- youtube-dl-2018.12.17.tar.gz youtube-dl-2018.12.17.tar.gz.sig New: ---- youtube-dl-2019.01.02.tar.gz youtube-dl-2019.01.02.tar.gz.sig ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.PJT9ut/_old 2019-01-08 12:31:43.864059588 +0100 +++ /var/tmp/diff_new_pack.PJT9ut/_new 2019-01-08 12:31:43.872059579 +0100 @@ -1,7 +1,7 @@ # # spec file for package python-youtube-dl # -# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -19,7 +19,7 @@ %define modname youtube-dl %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-youtube-dl -Version: 2018.12.17 +Version: 2019.01.02 Release: 0 Summary: A python module for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 ++++++ youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.PJT9ut/_old 2019-01-08 12:31:43.896059553 +0100 +++ /var/tmp/diff_new_pack.PJT9ut/_new 2019-01-08 12:31:43.908059540 +0100 @@ -1,7 +1,7 @@ # # spec file for package youtube-dl # -# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,7 +17,7 @@ Name: youtube-dl -Version: 2018.12.17 +Version: 2019.01.02 Release: 0 Summary: A tool for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 ++++++ youtube-dl-2018.12.17.tar.gz -> youtube-dl-2019.01.02.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/ChangeLog new/youtube-dl/ChangeLog --- old/youtube-dl/ChangeLog 2018-12-16 23:37:46.000000000 +0100 +++ new/youtube-dl/ChangeLog 2019-01-02 17:52:51.000000000 +0100 @@ -1,3 +1,38 @@ +version 2019.01.02 + +Extractors +* [discovery] Use geo verification headers (#17838) ++ [packtpub] Add support for subscription.packtpub.com (#18718) +* [yourporn] Fix extraction (#18583) ++ [acast:channel] Add support for play.acast.com (#18587) ++ [extractors] Add missing age limits (#18621) ++ [rmcdecouverte] Add support for live stream +* [rmcdecouverte] Bypass geo restriction +* [rmcdecouverte] Update URL regular expression (#18595, 18697) +* [manyvids] Fix extraction (#18604, #18614) +* [bitchute] Fix extraction (#18567) + + +version 2018.12.31 + +Extractors ++ [bbc] Add support for another embed pattern (#18643) ++ [npo:live] Add support for npostart.nl (#18644) +* [beeg] Fix extraction (#18610, #18626) +* [youtube] Unescape HTML for series (#18641) ++ [youtube] Extract more format metadata +* [youtube] Detect DRM protected videos (#1774) +* [youtube] Relax HTML5 player regular expressions (#18465, #18466) +* [youtube] Extend HTML5 player regular expression (#17516) ++ [liveleak] Add support for another embed type and restore original + format extraction ++ [crackle] Extract ISM and HTTP formats ++ [twitter] Pass Referer with card request (#18579) +* [mediasite] Extend URL regular expression (#18558) ++ [lecturio] Add support for lecturio.de (#18562) ++ [discovery] Add support for Scripps Networks watch domains (#17947) + + version 2018.12.17 Extractors diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/README.md new/youtube-dl/README.md --- old/youtube-dl/README.md 2018-12-16 23:37:49.000000000 +0100 +++ new/youtube-dl/README.md 2019-01-02 17:52:54.000000000 +0100 @@ -1025,15 +1025,19 @@ 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. -8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. -9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: +8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart): + + $ flake8 youtube_dl/extractor/yourextractor.py + +9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. +10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: $ git add youtube_dl/extractor/extractors.py $ git add youtube_dl/extractor/yourextractor.py $ git commit -m '[yourextractor] Add new extractor' $ git push origin yourextractor -10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. +11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. In any case, thank you very much for your contributions! @@ -1129,11 +1133,33 @@ This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`. -### Make regular expressions flexible +### Regular expressions + +#### Don't capture groups you don't use + +Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing. + +##### Example + +Don't capture id attribute name here since you can't use it for anything anyway. + +Correct: + +```python +r'(?:id|ID)=(?P<id>\d+)' +``` + +Incorrect: +```python +r'(id|ID)=(?P<id>\d+)' +``` + -When using regular expressions try to write them fuzzy and flexible. +#### Make regular expressions relaxed and flexible + +When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on. -#### Example +##### Example Say you need to extract `title` from the following HTML code: @@ -1166,6 +1192,25 @@ webpage, 'title', group='title') ``` +### Long lines policy + +There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse. + +For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit: + +Correct: + +```python +'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' +``` + +Incorrect: + +```python +'https://www.youtube.com/watch?v=FqZTN594JQw&list=' +'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' +``` + ### Use safe conversion functions Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/README.txt new/youtube-dl/README.txt --- old/youtube-dl/README.txt 2018-12-16 23:38:25.000000000 +0100 +++ new/youtube-dl/README.txt 2019-01-02 17:53:26.000000000 +0100 @@ -1419,18 +1419,21 @@ methods and a detailed description of what your extractor should and may return. Add tests and code for as many as you want. 8. Make sure your code follows youtube-dl coding conventions and check - the code with flake8. Also make sure your code works under all - Python versions claimed supported by youtube-dl, namely 2.6, 2.7, - and 3.2+. -9. When the tests pass, add the new files and commit them and push the + the code with flake8: + + $ flake8 youtube_dl/extractor/yourextractor.py + +9. Make sure your code works under all Python versions claimed + supported by youtube-dl, namely 2.6, 2.7, and 3.2+. +10. When the tests pass, add the new files and commit them and push the result, like this: - $ git add youtube_dl/extractor/extractors.py - $ git add youtube_dl/extractor/yourextractor.py - $ git commit -m '[yourextractor] Add new extractor' - $ git push origin yourextractor + $ git add youtube_dl/extractor/extractors.py + $ git add youtube_dl/extractor/yourextractor.py + $ git commit -m '[yourextractor] Add new extractor' + $ git push origin yourextractor -10. Finally, create a pull request. We'll then review and merge it. +11. Finally, create a pull request. We'll then review and merge it. In any case, thank you very much for your contributions! @@ -1559,9 +1562,31 @@ This code will try to extract from meta first and if it fails it will try extracting og:title from a webpage. -Make regular expressions flexible +Regular expressions + +Don't capture groups you don't use -When using regular expressions try to write them fuzzy and flexible. +Capturing group must be an indication that it's used somewhere in the +code. Any group that is not used must be non capturing. + +Example + +Don't capture id attribute name here since you can't use it for anything +anyway. + +Correct: + + r'(?:id|ID)=(?P<id>\d+)' + +Incorrect: + + r'(id|ID)=(?P<id>\d+)' + +Make regular expressions relaxed and flexible + +When using regular expressions try to write them fuzzy, relaxed and +flexible, skipping insignificant parts that are more likely to change, +allowing both single and double quotes for quoted values and so on. Example @@ -1589,6 +1614,24 @@ r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>', webpage, 'title', group='title') +Long lines policy + +There is a soft limit to keep lines of code under 80 characters long. +This means it should be respected if possible and if it does not make +readability and code maintenance worse. + +For example, you should NEVER split long string literals like URLs or +some other often copied entities over multiple lines to fit this limit: + +Correct: + + 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' + +Incorrect: + + 'https://www.youtube.com/watch?v=FqZTN594JQw&list=' + 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' + Use safe conversion functions Wrap all extracted numeric data into safe functions from diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/docs/supportedsites.md new/youtube-dl/docs/supportedsites.md --- old/youtube-dl/docs/supportedsites.md 2018-12-16 23:37:50.000000000 +0100 +++ new/youtube-dl/docs/supportedsites.md 2019-01-02 17:52:54.000000000 +0100 @@ -438,6 +438,7 @@ - **Lecture2Go** - **Lecturio** - **LecturioCourse** + - **LecturioDeCourse** - **LEGO** - **Lemonde** - **Lenta** Binary files old/youtube-dl/youtube-dl and new/youtube-dl/youtube-dl differ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube-dl.1 new/youtube-dl/youtube-dl.1 --- old/youtube-dl/youtube-dl.1 2018-12-16 23:38:26.000000000 +0100 +++ new/youtube-dl/youtube-dl.1 2019-01-02 17:53:27.000000000 +0100 @@ -2095,11 +2095,20 @@ Add tests and code for as many as you want. .IP " 8." 4 Make sure your code follows youtube\-dl coding conventions and check the -code with flake8 (https://pypi.python.org/pypi/flake8). -Also make sure your code works under all -Python (https://www.python.org/) versions claimed supported by -youtube\-dl, namely 2.6, 2.7, and 3.2+. +code with +flake8 (http://flake8.pycqa.org/en/latest/index.html#quickstart): +.RS 4 +.IP +.nf +\f[C] +\ $\ flake8\ youtube_dl/extractor/yourextractor.py +\f[] +.fi +.RE .IP " 9." 4 +Make sure your code works under all Python (https://www.python.org/) +versions claimed supported by youtube\-dl, namely 2.6, 2.7, and 3.2+. +.IP "10." 4 When the tests pass, add (https://git-scm.com/docs/git-add) the new files and commit (https://git-scm.com/docs/git-commit) them and push (https://git-scm.com/docs/git-push) the result, like this: @@ -2107,14 +2116,14 @@ .IP .nf \f[C] -\ $\ git\ add\ youtube_dl/extractor/extractors.py -\ $\ git\ add\ youtube_dl/extractor/yourextractor.py -\ $\ git\ commit\ \-m\ \[aq][yourextractor]\ Add\ new\ extractor\[aq] -\ $\ git\ push\ origin\ yourextractor +$\ git\ add\ youtube_dl/extractor/extractors.py +$\ git\ add\ youtube_dl/extractor/yourextractor.py +$\ git\ commit\ \-m\ \[aq][yourextractor]\ Add\ new\ extractor\[aq] +$\ git\ push\ origin\ yourextractor \f[] .fi .RE -.IP "10." 4 +.IP "11." 4 Finally, create a pull request (https://help.github.com/articles/creating-a-pull-request). We\[aq]ll then review and merge it. @@ -2286,9 +2295,37 @@ .PP This code will try to extract from \f[C]meta\f[] first and if it fails it will try extracting \f[C]og:title\f[] from a \f[C]webpage\f[]. -.SS Make regular expressions flexible +.SS Regular expressions +.SS Don\[aq]t capture groups you don\[aq]t use +.PP +Capturing group must be an indication that it\[aq]s used somewhere in +the code. +Any group that is not used must be non capturing. +.SS Example +.PP +Don\[aq]t capture id attribute name here since you can\[aq]t use it for +anything anyway. +.PP +Correct: +.IP +.nf +\f[C] +r\[aq](?:id|ID)=(?P<id>\\d+)\[aq] +\f[] +.fi +.PP +Incorrect: +.IP +.nf +\f[C] +r\[aq](id|ID)=(?P<id>\\d+)\[aq] +\f[] +.fi +.SS Make regular expressions relaxed and flexible .PP -When using regular expressions try to write them fuzzy and flexible. +When using regular expressions try to write them fuzzy, relaxed and +flexible, skipping insignificant parts that are more likely to change, +allowing both single and double quotes for quoted values and so on. .SS Example .PP Say you need to extract \f[C]title\f[] from the following HTML code: @@ -2331,6 +2368,32 @@ \ \ \ \ webpage,\ \[aq]title\[aq],\ group=\[aq]title\[aq]) \f[] .fi +.SS Long lines policy +.PP +There is a soft limit to keep lines of code under 80 characters long. +This means it should be respected if possible and if it does not make +readability and code maintenance worse. +.PP +For example, you should \f[B]never\f[] split long string literals like +URLs or some other often copied entities over multiple lines to fit this +limit: +.PP +Correct: +.IP +.nf +\f[C] +\[aq]https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4\[aq] +\f[] +.fi +.PP +Incorrect: +.IP +.nf +\f[C] +\[aq]https://www.youtube.com/watch?v=FqZTN594JQw&list=\[aq] +\[aq]PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4\[aq] +\f[] +.fi .SS Use safe conversion functions .PP Wrap all extracted numeric data into safe functions from diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/acast.py new/youtube-dl/youtube_dl/extractor/acast.py --- old/youtube-dl/youtube_dl/extractor/acast.py 2018-12-16 23:36:54.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/acast.py 2019-01-02 17:52:15.000000000 +0100 @@ -79,17 +79,27 @@ class ACastChannelIE(InfoExtractor): IE_NAME = 'acast:channel' - _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)' - _TEST = { - 'url': 'https://www.acast.com/condenasttraveler', + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?acast\.com/| + play\.acast\.com/s/ + ) + (?P<id>[^/#?]+) + ''' + _TESTS = [{ + 'url': 'https://www.acast.com/todayinfocus', 'info_dict': { - 'id': '50544219-29bb-499e-a083-6087f4cb7797', - 'title': 'Condé Nast Traveler Podcast', - 'description': 'md5:98646dee22a5b386626ae31866638fbd', + 'id': '4efc5294-5385-4847-98bd-519799ce5786', + 'title': 'Today in Focus', + 'description': 'md5:9ba5564de5ce897faeb12963f4537a64', }, - 'playlist_mincount': 20, - } - _API_BASE_URL = 'https://www.acast.com/api/' + 'playlist_mincount': 35, + }, { + 'url': 'http://play.acast.com/s/ft-banking-weekly', + 'only_matching': True, + }] + _API_BASE_URL = 'https://play.acast.com/api/' _PAGE_SIZE = 10 @classmethod @@ -102,7 +112,7 @@ channel_slug, note='Download page %d of channel data' % page) for cast in casts: yield self.url_result( - 'https://www.acast.com/%s/%s' % (channel_slug, cast['url']), + 'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']), 'ACast', cast['id']) def _real_extract(self, url): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/audiomack.py new/youtube-dl/youtube_dl/extractor/audiomack.py --- old/youtube-dl/youtube_dl/extractor/audiomack.py 2018-12-16 23:36:43.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/audiomack.py 2019-01-02 17:52:15.000000000 +0100 @@ -62,7 +62,7 @@ # Audiomack wraps a lot of soundcloud tracks in their branded wrapper # if so, pass the work off to the soundcloud extractor if SoundcloudIE.suitable(api_response['url']): - return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'} + return self.url_result(api_response['url'], SoundcloudIE.ie_key()) return { 'id': compat_str(api_response.get('id', album_url_tag)), diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/bbc.py new/youtube-dl/youtube_dl/extractor/bbc.py --- old/youtube-dl/youtube_dl/extractor/bbc.py 2018-12-16 23:36:43.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/bbc.py 2019-01-02 17:52:02.000000000 +0100 @@ -795,6 +795,15 @@ 'uploader': 'Radio 3', 'uploader_id': 'bbc_radio_three', }, + }, { + 'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227', + 'info_dict': { + 'id': 'p06w9tws', + 'ext': 'mp4', + 'title': 'md5:2fabf12a726603193a2879a055f72514', + 'description': 'Learn English words and phrases from this story', + }, + 'add_ie': [BBCCoUkIE.ie_key()], }] @classmethod @@ -945,6 +954,15 @@ if entries: return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) + # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227 + group_id = self._search_regex( + r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX, + webpage, 'group id', default=None) + if playlist_id: + return self.url_result( + 'https://www.bbc.co.uk/programmes/%s' % group_id, + ie=BBCCoUkIE.ie_key()) + # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) programme_id = self._search_regex( [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/beeg.py new/youtube-dl/youtube_dl/extractor/beeg.py --- old/youtube-dl/youtube_dl/extractor/beeg.py 2018-12-16 23:36:43.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/beeg.py 2019-01-02 17:52:02.000000000 +0100 @@ -1,15 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_chr, - compat_ord, - compat_urllib_parse_unquote, -) +from ..compat import compat_str from ..utils import ( int_or_none, - parse_iso8601, - urljoin, + unified_timestamp, ) @@ -36,29 +31,9 @@ webpage = self._download_webpage(url, video_id) - cpl_url = self._search_regex( - r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1', - webpage, 'cpl', default=None, group='url') - - cpl_url = urljoin(url, cpl_url) - - beeg_version, beeg_salt = [None] * 2 - - if cpl_url: - cpl = self._download_webpage( - self._proto_relative_url(cpl_url), video_id, - 'Downloading cpl JS', fatal=False) - if cpl: - beeg_version = int_or_none(self._search_regex( - r'beeg_version\s*=\s*([^\b]+)', cpl, - 'beeg version', default=None)) or self._search_regex( - r'/(\d+)\.js', cpl_url, 'beeg version', default=None) - beeg_salt = self._search_regex( - r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt', - default=None, group='beeg_salt') - - beeg_version = beeg_version or '2185' - beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H' + beeg_version = self._search_regex( + r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version', + default='1546225636701') for api_path in ('', 'api.'): video = self._download_json( @@ -68,37 +43,6 @@ if video: break - def split(o, e): - def cut(s, x): - n.append(s[:x]) - return s[x:] - n = [] - r = len(o) % e - if r > 0: - o = cut(o, r) - while len(o) > e: - o = cut(o, e) - n.append(o) - return n - - def decrypt_key(key): - # Reverse engineered from http://static.beeg.com/cpl/1738.js - a = beeg_salt - e = compat_urllib_parse_unquote(key) - o = ''.join([ - compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21) - for n in range(len(e))]) - return ''.join(split(o, 3)[::-1]) - - def decrypt_url(encrypted_url): - encrypted_url = self._proto_relative_url( - encrypted_url.replace('{DATA_MARKERS}', ''), 'https:') - key = self._search_regex( - r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None) - if not key: - return encrypted_url - return encrypted_url.replace(key, decrypt_key(key)) - formats = [] for format_id, video_url in video.items(): if not video_url: @@ -108,18 +52,20 @@ if not height: continue formats.append({ - 'url': decrypt_url(video_url), + 'url': self._proto_relative_url( + video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'), 'format_id': format_id, 'height': int(height), }) self._sort_formats(formats) title = video['title'] - video_id = video.get('id') or video_id + video_id = compat_str(video.get('id') or video_id) display_id = video.get('code') description = video.get('desc') + series = video.get('ps_name') - timestamp = parse_iso8601(video.get('date'), ' ') + timestamp = unified_timestamp(video.get('date')) duration = int_or_none(video.get('duration')) tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None @@ -129,6 +75,7 @@ 'display_id': display_id, 'title': title, 'description': description, + 'series': series, 'timestamp': timestamp, 'duration': duration, 'tags': tags, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/bitchute.py new/youtube-dl/youtube_dl/extractor/bitchute.py --- old/youtube-dl/youtube_dl/extractor/bitchute.py 2018-12-16 23:36:43.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/bitchute.py 2019-01-02 17:52:15.000000000 +0100 @@ -5,7 +5,10 @@ import re from .common import InfoExtractor -from ..utils import urlencode_postdata +from ..utils import ( + orderedSet, + urlencode_postdata, +) class BitChuteIE(InfoExtractor): @@ -43,10 +46,15 @@ 'description', webpage, 'title', default=None) or self._og_search_description(webpage) + format_urls = [] + for mobj in re.finditer( + r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage): + format_urls.append(mobj.group('url')) + format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage)) + formats = [ - {'url': mobj.group('url')} - for mobj in re.finditer( - r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)] + {'url': format_url} + for format_url in orderedSet(format_urls)] self._sort_formats(formats) description = self._html_search_regex( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/cammodels.py new/youtube-dl/youtube_dl/extractor/cammodels.py --- old/youtube-dl/youtube_dl/extractor/cammodels.py 2018-12-16 23:36:43.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/cammodels.py 2019-01-02 17:52:15.000000000 +0100 @@ -14,6 +14,7 @@ _TESTS = [{ 'url': 'https://www.cammodels.com/cam/AutumnKnight/', 'only_matching': True, + 'age_limit': 18 }] def _real_extract(self, url): @@ -93,4 +94,5 @@ 'title': self._live_title(user_id), 'is_live': True, 'formats': formats, + 'age_limit': 18 } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/camtube.py new/youtube-dl/youtube_dl/extractor/camtube.py --- old/youtube-dl/youtube_dl/extractor/camtube.py 2018-12-16 23:36:43.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/camtube.py 2019-01-02 17:52:15.000000000 +0100 @@ -20,6 +20,7 @@ 'duration': 1274, 'timestamp': 1528018608, 'upload_date': '20180603', + 'age_limit': 18 }, 'params': { 'skip_download': True, @@ -66,4 +67,5 @@ 'like_count': like_count, 'creator': creator, 'formats': formats, + 'age_limit': 18 } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/camwithher.py new/youtube-dl/youtube_dl/extractor/camwithher.py --- old/youtube-dl/youtube_dl/extractor/camwithher.py 2018-12-16 23:36:43.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/camwithher.py 2019-01-02 17:52:15.000000000 +0100 @@ -25,6 +25,7 @@ 'comment_count': int, 'uploader': 'MileenaK', 'upload_date': '20160322', + 'age_limit': 18, }, 'params': { 'skip_download': True, @@ -84,4 +85,5 @@ 'comment_count': comment_count, 'uploader': uploader, 'upload_date': upload_date, + 'age_limit': 18 } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/cnn.py new/youtube-dl/youtube_dl/extractor/cnn.py --- old/youtube-dl/youtube_dl/extractor/cnn.py 2018-12-16 23:36:43.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/cnn.py 2019-01-02 17:52:15.000000000 +0100 @@ -119,11 +119,7 @@ def _real_extract(self, url): webpage = self._download_webpage(url, url_basename(url)) cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url') - return { - '_type': 'url', - 'url': cnn_url, - 'ie_key': CNNIE.ie_key(), - } + return self.url_result(cnn_url, CNNIE.ie_key()) class CNNArticleIE(InfoExtractor): @@ -145,8 +141,4 @@ def _real_extract(self, url): webpage = self._download_webpage(url, url_basename(url)) cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url') - return { - '_type': 'url', - 'url': 'http://cnn.com/video/?/video/' + cnn_url, - 'ie_key': CNNIE.ie_key(), - } + return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key()) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/crackle.py new/youtube-dl/youtube_dl/extractor/crackle.py --- old/youtube-dl/youtube_dl/extractor/crackle.py 2018-12-16 23:36:43.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/crackle.py 2019-01-02 17:52:02.000000000 +0100 @@ -48,6 +48,21 @@ 'only_matching': True, }] + _MEDIA_FILE_SLOTS = { + '360p.mp4': { + 'width': 640, + 'height': 360, + }, + '480p.mp4': { + 'width': 768, + 'height': 432, + }, + '480p_1mbps.mp4': { + 'width': 852, + 'height': 480, + }, + } + def _real_extract(self, url): video_id = self._match_id(url) @@ -95,6 +110,20 @@ elif ext == 'mpd': formats.extend(self._extract_mpd_formats( format_url, video_id, mpd_id='dash', fatal=False)) + elif format_url.endswith('.ism/Manifest'): + formats.extend(self._extract_ism_formats( + format_url, video_id, ism_id='mss', fatal=False)) + else: + mfs_path = e.get('Type') + mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path) + if not mfs_info: + continue + formats.append({ + 'url': format_url, + 'format_id': 'http-' + mfs_path.split('.')[0], + 'width': mfs_info['width'], + 'height': mfs_info['height'], + }) self._sort_formats(formats) description = media.get('Description') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/discovery.py new/youtube-dl/youtube_dl/extractor/discovery.py --- old/youtube-dl/youtube_dl/extractor/discovery.py 2018-12-16 23:36:43.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/discovery.py 2019-01-02 17:52:15.000000000 +0100 @@ -17,16 +17,29 @@ class DiscoveryIE(DiscoveryGoBaseIE): - _VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site> - discovery| - investigationdiscovery| - discoverylife| - animalplanet| - ahctv| - destinationamerica| - sciencechannel| - tlc| - velocity + _VALID_URL = r'''(?x)https?:// + (?P<site> + (?:www\.)? + (?: + discovery| + investigationdiscovery| + discoverylife| + animalplanet| + ahctv| + destinationamerica| + sciencechannel| + tlc| + velocity + )| + watch\. + (?: + hgtv| + foodnetwork| + travelchannel| + diynetwork| + cookingchanneltv| + motortrend + ) )\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))''' _TESTS = [{ 'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley', @@ -71,7 +84,7 @@ if not access_token: access_token = self._download_json( - 'https://www.%s.com/anonymous' % site, display_id, query={ + 'https://%s.com/anonymous' % site, display_id, query={ 'authRel': 'authorization', 'client_id': try_get( react_data, lambda x: x['application']['apiClientId'], @@ -81,11 +94,12 @@ })['access_token'] try: + headers = self.geo_verification_headers() + headers['Authorization'] = 'Bearer ' + access_token + stream = self._download_json( 'https://api.discovery.com/v1/streaming/video/' + video_id, - display_id, headers={ - 'Authorization': 'Bearer ' + access_token, - }) + display_id, headers=headers) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): e_description = self._parse_json( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/extractors.py new/youtube-dl/youtube_dl/extractor/extractors.py --- old/youtube-dl/youtube_dl/extractor/extractors.py 2018-12-16 23:36:43.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/extractors.py 2019-01-02 17:52:03.000000000 +0100 @@ -557,6 +557,7 @@ from .lecturio import ( LecturioIE, LecturioCourseIE, + LecturioDeCourseIE, ) from .leeco import ( LeIE, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/freespeech.py new/youtube-dl/youtube_dl/extractor/freespeech.py --- old/youtube-dl/youtube_dl/extractor/freespeech.py 2018-12-16 23:36:44.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/freespeech.py 2019-01-02 17:52:15.000000000 +0100 @@ -1,6 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .youtube import YoutubeIE class FreespeechIE(InfoExtractor): @@ -27,8 +28,4 @@ r'data-video-url="([^"]+)"', webpage, 'youtube url') - return { - '_type': 'url', - 'url': youtube_url, - 'ie_key': 'Youtube', - } + return self.url_result(youtube_url, YoutubeIE.ie_key()) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/generic.py new/youtube-dl/youtube_dl/extractor/generic.py --- old/youtube-dl/youtube_dl/extractor/generic.py 2018-12-16 23:36:44.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/generic.py 2019-01-02 17:52:15.000000000 +0100 @@ -2197,10 +2197,7 @@ def _real_extract(self, url): if url.startswith('//'): - return { - '_type': 'url', - 'url': self.http_scheme() + url, - } + return self.url_result(self.http_scheme() + url) parsed_url = compat_urlparse.urlparse(url) if not parsed_url.scheme: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/lecturio.py new/youtube-dl/youtube_dl/extractor/lecturio.py --- old/youtube-dl/youtube_dl/extractor/lecturio.py 2018-12-16 23:36:54.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/lecturio.py 2019-01-02 17:52:03.000000000 +0100 @@ -64,8 +64,14 @@ class LecturioIE(LecturioBaseIE): - _VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.lecture' - _TEST = { + _VALID_URL = r'''(?x) + https:// + (?: + app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.lecture| + (?:www\.)?lecturio\.de/[^/]+/(?P<id_de>[^/?#&]+)\.vortrag + ) + ''' + _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', 'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870', 'info_dict': { @@ -74,7 +80,10 @@ 'title': 'Important Concepts and Terms – Introduction to Microbiology', }, 'skip': 'Requires lecturio account credentials', - } + }, { + 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', + 'only_matching': True, + }] _CC_LANGS = { 'German': 'de', @@ -86,7 +95,8 @@ } def _real_extract(self, url): - display_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') or mobj.group('id_de') webpage = self._download_webpage( 'https://app.lecturio.com/en/lecture/%s/player.html' % display_id, @@ -190,3 +200,30 @@ 'title', default=None) return self.playlist_result(entries, display_id, title) + + +class LecturioDeCourseIE(LecturioBaseIE): + _VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P<id>[^/?#&]+)\.kurs' + _TEST = { + 'url': 'https://www.lecturio.de/jura/grundrechte.kurs', + 'only_matching': True, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + entries = [] + for mobj in re.finditer( + r'(?s)<td[^>]+\bdata-lecture-id=["\'](?P<id>\d+).+?\bhref=(["\'])(?P<url>(?:(?!\2).)+\.vortrag)\b[^>]+>', + webpage): + lecture_url = urljoin(url, mobj.group('url')) + lecture_id = mobj.group('id') + entries.append(self.url_result( + lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id)) + + title = self._search_regex( + r'<h1[^>]*>([^<]+)', webpage, 'title', default=None) + + return self.playlist_result(entries, display_id, title) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/liveleak.py new/youtube-dl/youtube_dl/extractor/liveleak.py --- old/youtube-dl/youtube_dl/extractor/liveleak.py 2018-12-16 23:36:44.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/liveleak.py 2019-01-02 17:52:03.000000000 +0100 @@ -87,7 +87,7 @@ @staticmethod def _extract_urls(webpage): return re.findall( - r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[if]=[\w_]+[^"]+)"', + r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"', webpage) def _real_extract(self, url): @@ -120,13 +120,27 @@ } for idx, info_dict in enumerate(entries): + formats = [] for a_format in info_dict['formats']: if not a_format.get('height'): a_format['height'] = int_or_none(self._search_regex( r'([0-9]+)p\.mp4', a_format['url'], 'height label', default=None)) + formats.append(a_format) - self._sort_formats(info_dict['formats']) + # Removing '.*.mp4' gives the raw video, which is essentially + # the same video without the LiveLeak logo at the top (see + # https://github.com/rg3/youtube-dl/pull/4768) + orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url']) + if a_format['url'] != orig_url: + format_id = a_format.get('format_id') + formats.append({ + 'format_id': 'original' + ('-' + format_id if format_id else ''), + 'url': orig_url, + 'preference': 1, + }) + self._sort_formats(formats) + info_dict['formats'] = formats # Don't append entry ID for one-video pages to keep backward compatibility if len(entries) > 1: @@ -146,7 +160,7 @@ class LiveLeakEmbedIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[if])=(?P<id>[\w_]+)' + _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)' # See generic.py for actual test cases _TESTS = [{ @@ -158,15 +172,14 @@ }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - kind, video_id = mobj.group('kind', 'id') + kind, video_id = re.match(self._VALID_URL, url).groups() if kind == 'f': webpage = self._download_webpage(url, video_id) liveleak_url = self._search_regex( - r'logourl\s*:\s*(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL, + r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL, webpage, 'LiveLeak URL', group='url') - elif kind == 'i': - liveleak_url = 'http://www.liveleak.com/view?i=%s' % video_id + else: + liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id) return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key()) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/livestream.py new/youtube-dl/youtube_dl/extractor/livestream.py --- old/youtube-dl/youtube_dl/extractor/livestream.py 2018-12-16 23:36:44.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/livestream.py 2019-01-02 17:52:15.000000000 +0100 @@ -363,7 +363,4 @@ id = mobj.group('id') webpage = self._download_webpage(url, id) - return { - '_type': 'url', - 'url': self._og_search_url(webpage), - } + return self.url_result(self._og_search_url(webpage)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/manyvids.py new/youtube-dl/youtube_dl/extractor/manyvids.py --- old/youtube-dl/youtube_dl/extractor/manyvids.py 2018-12-16 23:36:44.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/manyvids.py 2019-01-02 17:52:15.000000000 +0100 @@ -2,12 +2,18 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + determine_ext, + int_or_none, + str_to_int, + urlencode_postdata, +) class ManyVidsIE(InfoExtractor): _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)' - _TEST = { + _TESTS = [{ + # preview video 'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/', 'md5': '03f11bb21c52dd12a05be21a5c7dcc97', 'info_dict': { @@ -17,7 +23,18 @@ 'view_count': int, 'like_count': int, }, - } + }, { + # full video + 'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/', + 'md5': 'f3e8f7086409e9b470e2643edb96bdcc', + 'info_dict': { + 'id': '935718', + 'ext': 'mp4', + 'title': 'MY FACE REVEAL', + 'view_count': int, + 'like_count': int, + }, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -28,12 +45,41 @@ r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'video URL', group='url') - title = '%s (Preview)' % self._html_search_regex( - r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title') + title = self._html_search_regex( + (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)', + r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'), + webpage, 'title', default=None) or self._html_search_meta( + 'twitter:title', webpage, 'title', fatal=True) + + if any(p in webpage for p in ('preview_videos', '_preview.mp4')): + title += ' (Preview)' + + mv_token = self._search_regex( + r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, + 'mv token', default=None, group='value') + + if mv_token: + # Sets some cookies + self._download_webpage( + 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php', + video_id, fatal=False, data=urlencode_postdata({ + 'mvtoken': mv_token, + 'vid': video_id, + }), headers={ + 'Referer': url, + 'X-Requested-With': 'XMLHttpRequest' + }) + + if determine_ext(video_url) == 'm3u8': + formats = self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + else: + formats = [{'url': video_url}] like_count = int_or_none(self._search_regex( r'data-likes=["\'](\d+)', webpage, 'like count', default=None)) - view_count = int_or_none(self._html_search_regex( + view_count = str_to_int(self._html_search_regex( r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage, 'view count', default=None)) @@ -42,7 +88,5 @@ 'title': title, 'view_count': view_count, 'like_count': like_count, - 'formats': [{ - 'url': video_url, - }], + 'formats': formats, } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/mediasite.py new/youtube-dl/youtube_dl/extractor/mediasite.py --- old/youtube-dl/youtube_dl/extractor/mediasite.py 2018-12-16 23:36:44.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/mediasite.py 2019-01-02 17:52:03.000000000 +0100 @@ -21,7 +21,7 @@ class MediasiteIE(InfoExtractor): - _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/Play/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)' + _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)' _TESTS = [ { 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d', @@ -84,7 +84,15 @@ 'timestamp': 1333983600, 'duration': 7794, } - } + }, + { + 'url': 'https://collegerama.tudelft.nl/Mediasite/Showcase/livebroadcast/Presentation/ada7020854f743c49fbb45c9ec7dbb351d', + 'only_matching': True, + }, + { + 'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d', + 'only_matching': True, + }, ] # look in Mediasite.Core.js (Mediasite.ContentStreamType[*]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/npo.py new/youtube-dl/youtube_dl/extractor/npo.py --- old/youtube-dl/youtube_dl/extractor/npo.py 2018-12-16 23:36:44.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/npo.py 2019-01-02 17:52:04.000000000 +0100 @@ -363,7 +363,7 @@ class NPOLiveIE(NPOBaseIE): IE_NAME = 'npo.nl:live' - _VALID_URL = r'https?://(?:www\.)?npo\.nl/live(?:/(?P<id>[^/?#&]+))?' + _VALID_URL = r'https?://(?:www\.)?npo(?:start)?\.nl/live(?:/(?P<id>[^/?#&]+))?' _TESTS = [{ 'url': 'http://www.npo.nl/live/npo-1', @@ -380,6 +380,9 @@ }, { 'url': 'http://www.npo.nl/live', 'only_matching': True, + }, { + 'url': 'https://www.npostart.nl/live/npo-1', + 'only_matching': True, }] def _real_extract(self, url): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/packtpub.py new/youtube-dl/youtube_dl/extractor/packtpub.py --- old/youtube-dl/youtube_dl/extractor/packtpub.py 2018-12-16 23:36:44.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/packtpub.py 2019-01-02 17:52:15.000000000 +0100 @@ -24,9 +24,9 @@ class PacktPubIE(PacktPubBaseIE): - _VALID_URL = r'https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro', 'md5': '1e74bd6cfd45d7d07666f4684ef58f70', 'info_dict': { @@ -37,7 +37,10 @@ 'timestamp': 1490918400, 'upload_date': '20170331', }, - } + }, { + 'url': 'https://subscription.packtpub.com/video/web_development/9781787122215/20528/20530/project-intro', + 'only_matching': True, + }] _NETRC_MACHINE = 'packtpub' _TOKEN = None @@ -110,15 +113,18 @@ class PacktPubCourseIE(PacktPubBaseIE): - _VALID_URL = r'(?P<url>https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<id>\d+))' - _TEST = { + _VALID_URL = r'(?P<url>https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<id>\d+))' + _TESTS = [{ 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215', 'info_dict': { 'id': '9781787122215', 'title': 'Learn Nodejs by building 12 projects [Video]', }, 'playlist_count': 90, - } + }, { + 'url': 'https://subscription.packtpub.com/video/web_development/9781787122215', + 'only_matching': True, + }] @classmethod def suitable(cls, url): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/rmcdecouverte.py new/youtube-dl/youtube_dl/extractor/rmcdecouverte.py --- old/youtube-dl/youtube_dl/extractor/rmcdecouverte.py 2018-12-16 23:36:44.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/rmcdecouverte.py 2019-01-02 17:52:15.000000000 +0100 @@ -1,38 +1,46 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from .brightcove import BrightcoveLegacyIE from ..compat import ( compat_parse_qs, compat_urlparse, ) +from ..utils import smuggle_url class RMCDecouverteIE(InfoExtractor): - _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/mediaplayer-replay.*?\bid=(?P<id>\d+)' + _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:(?:[^/]+/)*program_(?P<id>\d+)|(?P<live_id>mediaplayer-direct))' - _TEST = { - 'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=13502&title=AQUAMEN:LES%20ROIS%20DES%20AQUARIUMS%20:UN%20DELICIEUX%20PROJET', + _TESTS = [{ + 'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/', 'info_dict': { - 'id': '5419055995001', + 'id': '5983675500001', 'ext': 'mp4', - 'title': 'UN DELICIEUX PROJET', - 'description': 'md5:63610df7c8b1fc1698acd4d0d90ba8b5', + 'title': 'CORVETTE', + 'description': 'md5:c1e8295521e45ffebf635d6a7658f506', 'uploader_id': '1969646226001', - 'upload_date': '20170502', - 'timestamp': 1493745308, + 'upload_date': '20181226', + 'timestamp': 1545861635, }, 'params': { 'skip_download': True, }, 'skip': 'only available for a week', - } + }, { + # live, geo restricted, bypassable + 'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/', + 'only_matching': True, + }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s' def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') or mobj.group('live_id') + webpage = self._download_webpage(url, display_id) brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) if brightcove_legacy_url: brightcove_id = compat_parse_qs(compat_urlparse.urlparse( @@ -41,5 +49,7 @@ brightcove_id = self._search_regex( r'data-video-id=["\'](\d+)', webpage, 'brightcove id') return self.url_result( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', - brightcove_id) + smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': ['FR']}), + 'BrightcoveNew', brightcove_id) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/savefrom.py new/youtube-dl/youtube_dl/extractor/savefrom.py --- old/youtube-dl/youtube_dl/extractor/savefrom.py 2018-12-16 23:36:45.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/savefrom.py 2019-01-02 17:52:15.000000000 +0100 @@ -30,8 +30,5 @@ def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = os.path.splitext(url.split('/')[-1])[0] - return { - '_type': 'url', - 'id': video_id, - 'url': mobj.group('url'), - } + + return self.url_result(mobj.group('url'), video_id=video_id) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/scrippsnetworks.py new/youtube-dl/youtube_dl/extractor/scrippsnetworks.py --- old/youtube-dl/youtube_dl/extractor/scrippsnetworks.py 2018-12-16 23:36:45.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/scrippsnetworks.py 2019-01-02 17:52:04.000000000 +0100 @@ -19,7 +19,7 @@ _VALID_URL = r'''(?x) https?:// watch\. - (?P<site>hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv|geniuskitchen)\.com/ + (?P<site>geniuskitchen)\.com/ (?: player\.[A-Z0-9]+\.html\#| show/(?:[^/]+/){2}| @@ -28,38 +28,23 @@ (?P<id>\d+) ''' _TESTS = [{ - 'url': 'http://watch.hgtv.com/show/HGTVE/Best-Ever-Treehouses/2241515/Best-Ever-Treehouses/', - 'md5': '26545fd676d939954c6808274bdb905a', + 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/', 'info_dict': { - 'id': '4173834', + 'id': '4194875', 'ext': 'mp4', - 'title': 'Best Ever Treehouses', - 'description': "We're searching for the most over the top treehouses.", + 'title': 'Ample Hills Ice Cream Bike', + 'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.', 'uploader': 'ANV', - 'upload_date': '20170922', - 'timestamp': 1506056400, + 'upload_date': '20171011', + 'timestamp': 1507698000, }, 'params': { 'skip_download': True, }, 'add_ie': [AnvatoIE.ie_key()], - }, { - 'url': 'http://watch.diynetwork.com/show/DSAL/Salvage-Dawgs/2656646/Covington-Church/', - 'only_matching': True, - }, { - 'url': 'http://watch.diynetwork.com/player.HNT.html#2656646', - 'only_matching': True, - }, { - 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/', - 'only_matching': True, }] _SNI_TABLE = { - 'hgtv': 'hgtv', - 'diynetwork': 'diy', - 'foodnetwork': 'food', - 'cookingchanneltv': 'cook', - 'travelchannel': 'trav', 'geniuskitchen': 'genius', } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/ted.py new/youtube-dl/youtube_dl/extractor/ted.py --- old/youtube-dl/youtube_dl/extractor/ted.py 2018-12-16 23:36:45.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/ted.py 2019-01-02 17:52:15.000000000 +0100 @@ -203,10 +203,8 @@ ext_url = None if service.lower() == 'youtube': ext_url = external.get('code') - return { - '_type': 'url', - 'url': ext_url or external['uri'], - } + + return self.url_result(ext_url or external['uri']) resources_ = player_talk.get('resources') or talk_info.get('resources') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/testurl.py new/youtube-dl/youtube_dl/extractor/testurl.py --- old/youtube-dl/youtube_dl/extractor/testurl.py 2018-12-16 23:36:45.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/testurl.py 2019-01-02 17:52:15.000000000 +0100 @@ -61,8 +61,4 @@ self.to_screen('Test URL: %s' % tc['url']) - return { - '_type': 'url', - 'url': tc['url'], - 'id': video_id, - } + return self.url_result(tc['url'], video_id=video_id) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/twitter.py new/youtube-dl/youtube_dl/extractor/twitter.py --- old/youtube-dl/youtube_dl/extractor/twitter.py 2018-12-16 23:36:45.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/twitter.py 2019-01-02 17:52:05.000000000 +0100 @@ -171,7 +171,8 @@ urls.append('https://twitter.com/i/videos/' + video_id) for u in urls: - webpage = self._download_webpage(u, video_id) + webpage = self._download_webpage( + u, video_id, headers={'Referer': 'https://twitter.com/'}) iframe_url = self._html_search_regex( r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/wimp.py new/youtube-dl/youtube_dl/extractor/wimp.py --- old/youtube-dl/youtube_dl/extractor/wimp.py 2018-12-16 23:36:45.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/wimp.py 2019-01-02 17:52:15.000000000 +0100 @@ -40,11 +40,7 @@ r'data-id=["\']([0-9A-Za-z_-]{11})'), webpage, 'video URL', default=None) if youtube_id: - return { - '_type': 'url', - 'url': youtube_id, - 'ie_key': YoutubeIE.ie_key(), - } + return self.url_result(youtube_id, YoutubeIE.ie_key()) info_dict = self._extract_jwplayer_data( webpage, video_id, require_title=False) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/yourporn.py new/youtube-dl/youtube_dl/extractor/yourporn.py --- old/youtube-dl/youtube_dl/extractor/yourporn.py 2018-12-16 23:36:45.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/yourporn.py 2019-01-02 17:52:15.000000000 +0100 @@ -14,6 +14,7 @@ 'ext': 'mp4', 'title': 'md5:c9f43630bd968267672651ba905a7d35', 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 18 }, } @@ -26,7 +27,7 @@ self._search_regex( r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info', group='data'), - video_id)[video_id]).replace('/cdn/', '/cdn2/') + video_id)[video_id]).replace('/cdn/', '/cdn3/') title = (self._search_regex( r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title', @@ -38,4 +39,5 @@ 'url': video_url, 'title': title, 'thumbnail': thumbnail, + 'age_limit': 18 } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/youtube.py new/youtube-dl/youtube_dl/extractor/youtube.py --- old/youtube-dl/youtube_dl/extractor/youtube.py 2018-12-16 23:36:54.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/youtube.py 2019-01-02 17:52:05.000000000 +0100 @@ -1077,6 +1077,11 @@ 'url': 'https://invidio.us/watch?v=BaW_jenozKc', 'only_matching': True, }, + { + # DRM protected + 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc', + 'only_matching': True, + } ] def __init__(self, *args, **kwargs): @@ -1105,7 +1110,7 @@ def _extract_signature_function(self, video_id, player_url, example_sig): id_m = re.match( - r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$', + r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$', player_url) if not id_m: raise ExtractorError('Cannot identify player %r' % player_url) @@ -1673,6 +1678,9 @@ '"token" parameter not in video info for unknown reason', video_id=video_id) + if video_info.get('license_info'): + raise ExtractorError('This video is DRM protected.', expected=True) + video_details = try_get( player_response, lambda x: x['videoDetails'], dict) or {} @@ -1786,6 +1794,25 @@ 'height': int_or_none(width_height[1]), } q = qualities(['small', 'medium', 'hd720']) + streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) + if streaming_formats: + for fmt in streaming_formats: + itag = str_or_none(fmt.get('itag')) + if not itag: + continue + quality = fmt.get('quality') + quality_label = fmt.get('qualityLabel') or quality + formats_spec[itag] = { + 'asr': int_or_none(fmt.get('audioSampleRate')), + 'filesize': int_or_none(fmt.get('contentLength')), + 'format_note': quality_label, + 'fps': int_or_none(fmt.get('fps')), + 'height': int_or_none(fmt.get('height')), + 'quality': q(quality), + # bitrate for itag 43 is always 2147483647 + 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None, + 'width': int_or_none(fmt.get('width')), + } formats = [] for url_data_str in encoded_url_map.split(','): url_data = compat_parse_qs(url_data_str) @@ -1834,7 +1861,7 @@ else: player_version = self._search_regex( [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', - r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'], + r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'], player_url, 'html5 player', fatal=False) player_desc = 'html5 player %s' % player_version @@ -1868,7 +1895,7 @@ filesize = int_or_none(url_data.get( 'clen', [None])[0]) or _extract_filesize(url) - quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0] + quality = url_data.get('quality', [None])[0] more_fields = { 'filesize': filesize, @@ -1876,7 +1903,7 @@ 'width': width, 'height': height, 'fps': int_or_none(url_data.get('fps', [None])[0]), - 'format_note': quality, + 'format_note': url_data.get('quality_label', [None])[0] or quality, 'quality': q(quality), } for key, value in more_fields.items(): @@ -2016,7 +2043,7 @@ r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>', video_webpage) if m_episode: - series = m_episode.group('series') + series = unescapeHTML(m_episode.group('series')) season_number = int(m_episode.group('season')) episode_number = int(m_episode.group('episode')) else: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/version.py new/youtube-dl/youtube_dl/version.py --- old/youtube-dl/youtube_dl/version.py 2018-12-16 23:37:46.000000000 +0100 +++ new/youtube-dl/youtube_dl/version.py 2019-01-02 17:52:51.000000000 +0100 @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.12.17' +__version__ = '2019.01.02'
