Hello community, here is the log from the commit of package you-get for openSUSE:Factory checked in at 2019-05-09 14:28:28 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/you-get (Old) and /work/SRC/openSUSE:Factory/.you-get.new.5148 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "you-get" Thu May 9 14:28:28 2019 rev:22 rq:701737 version:0.4.1302 Changes: -------- --- /work/SRC/openSUSE:Factory/you-get/you-get.changes 2019-04-26 22:54:32.737311443 +0200 +++ /work/SRC/openSUSE:Factory/.you-get.new.5148/you-get.changes 2019-05-09 14:28:31.873955934 +0200 @@ -1,0 +2,5 @@ +Thu May 9 08:29:12 UTC 2019 - Luigi Baldoni <[email protected]> + +- Update to version 0.4.1302 (no changelog supplied) + +------------------------------------------------------------------- Old: ---- you-get-0.4.1295.tar.gz New: ---- you-get-0.4.1302.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ you-get.spec ++++++ --- /var/tmp/diff_new_pack.qbtyJV/_old 2019-05-09 14:28:32.269956850 +0200 +++ /var/tmp/diff_new_pack.qbtyJV/_new 2019-05-09 14:28:32.273956860 +0200 @@ -17,7 +17,7 @@ Name: you-get -Version: 0.4.1295 +Version: 0.4.1302 Release: 0 Summary: Dumb downloader that scrapes the web License: MIT ++++++ you-get-0.4.1295.tar.gz -> you-get-0.4.1302.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1295/.travis.yml new/you-get-0.4.1302/.travis.yml --- old/you-get-0.4.1295/.travis.yml 2019-04-25 16:23:50.000000000 +0200 +++ new/you-get-0.4.1302/.travis.yml 2019-05-09 01:43:06.000000000 +0200 @@ -1,8 +1,6 @@ # https://travis-ci.org/soimort/you-get language: python python: - - "3.2" - - "3.3" - "3.4" - "3.5" - "3.6" @@ -12,13 +10,13 @@ - python: "3.7" dist: xenial - python: "3.8-dev" - dist: xenial + dist: xenial - python: "nightly" - dist: xenial + dist: xenial before_install: - - if [[ $TRAVIS_PYTHON_VERSION != '3.2'* && $TRAVIS_PYTHON_VERSION != '3.3'* ]]; then pip install flake8; fi + - pip install flake8 before_script: - - if [[ $TRAVIS_PYTHON_VERSION != '3.2'* && $TRAVIS_PYTHON_VERSION != '3.3'* ]]; then flake8 . --count --select=E9,F63,F72,F82 --show-source --statistics; fi + - flake8 . --count --select=E9,F63,F72,F82 --show-source --statistics script: make test notifications: webhooks: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1295/src/you_get/extractors/embed.py new/you-get-0.4.1302/src/you_get/extractors/embed.py --- old/you-get-0.4.1295/src/you_get/extractors/embed.py 2019-04-25 16:23:50.000000000 +0200 +++ new/you-get-0.4.1302/src/you_get/extractors/embed.py 2019-05-09 01:43:06.000000000 +0200 @@ -133,7 +133,7 @@ r = 1 else: r += 1 - iframes = matchall(content, [r'<iframe.+?src=(?:\"|\')(.+?)(?:\"|\')']) + iframes = matchall(content, [r'<iframe.+?src=(?:\"|\')(.*?)(?:\"|\')']) for iframe in iframes: if not iframe.startswith('http'): src = urllib.parse.urljoin(url, iframe) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1295/src/you_get/extractors/miaopai.py new/you-get-0.4.1302/src/you_get/extractors/miaopai.py --- old/you-get-0.4.1295/src/you_get/extractors/miaopai.py 2019-04-25 16:23:50.000000000 +0200 +++ new/you-get-0.4.1302/src/you_get/extractors/miaopai.py 2019-05-09 01:43:06.000000000 +0200 @@ -65,26 +65,47 @@ download_urls([video_url], fs.legitimize(title), ext, headers=headers, **kwargs) -def miaopai_download_direct(url, info_only, **kwargs): +def miaopai_download_story(url, output_dir='.', merge=False, info_only=False, **kwargs): + data_url = 'https://m.weibo.cn/s/video/object?%s' % url.split('?')[1] + data_content = get_content(data_url, headers=fake_headers_mobile) + data = json.loads(data_content) + title = data['data']['object']['summary'] + stream_url = data['data']['object']['stream']['url'] + + ext = 'mp4' + print_info(site_info, title, ext, url_info(stream_url, headers=fake_headers_mobile)[2]) + if not info_only: + download_urls([stream_url], fs.legitimize(title), ext, total_size=None, headers=fake_headers_mobile, **kwargs) + + +def miaopai_download_direct(url, output_dir='.', merge=False, info_only=False, **kwargs): mobile_page = get_content(url, headers=fake_headers_mobile) try: title = re.search(r'([\'"])title\1:\s*([\'"])(.+?)\2,', mobile_page).group(3) except: title = re.search(r'([\'"])status_title\1:\s*([\'"])(.+?)\2,', mobile_page).group(3) title = title.replace('\n', '_') - stream_url = re.search(r'([\'"])stream_url\1:\s*([\'"])(.+?)\2,', mobile_page).group(3) + try: + stream_url = re.search(r'([\'"])stream_url\1:\s*([\'"])(.+?)\2,', mobile_page).group(3) + except: + page_url = re.search(r'([\'"])page_url\1:\s*([\'"])(.+?)\2,', mobile_page).group(3) + return miaopai_download_story(page_url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs) + ext = 'mp4' print_info(site_info, title, ext, url_info(stream_url, headers=fake_headers_mobile)[2]) if not info_only: download_urls([stream_url], fs.legitimize(title), ext, total_size=None, headers=fake_headers_mobile, **kwargs) -# ---------------------------------------------------------------------- -def miaopai_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): - if match1(url, r'weibo\.com/tv/v/(\w+)'): +def miaopai_download(url, output_dir='.', merge=False, info_only=False, **kwargs): + if re.match(r'^http[s]://.*\.weibo\.com/\d+/.+', url): return miaopai_download_direct(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs) - if re.match(r'^http[s]://.*\.weibo\.com/\d+/.+', url): + if re.match(r'^http[s]://.*\.weibo\.(com|cn)/s/video/.+', url): + return miaopai_download_story(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs) + + # FIXME! + if re.match(r'^http[s]://.*\.weibo\.com/tv/v/(\w+)', url): return miaopai_download_direct(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs) fid = match1(url, r'\?fid=(\d{4}:\w+)') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1295/src/you_get/extractors/universal.py new/you-get-0.4.1302/src/you_get/extractors/universal.py --- old/you-get-0.4.1295/src/you_get/extractors/universal.py 2019-04-25 16:23:50.000000000 +0200 +++ new/you-get-0.4.1302/src/you_get/extractors/universal.py 2019-05-09 01:43:06.000000000 +0200 @@ -33,27 +33,35 @@ meta_videos = re.findall(r'<meta property="og:video:url" content="([^"]*)"', page) if meta_videos: - for meta_video in meta_videos: - meta_video_url = unescape_html(meta_video) - type_, ext, size = url_info(meta_video_url) - print_info(site_info, page_title, type_, size) - if not info_only: - download_urls([meta_video_url], page_title, - ext, size, - output_dir=output_dir, merge=merge, - faker=True) - return + try: + for meta_video in meta_videos: + meta_video_url = unescape_html(meta_video) + type_, ext, size = url_info(meta_video_url) + print_info(site_info, page_title, type_, size) + if not info_only: + download_urls([meta_video_url], page_title, + ext, size, + output_dir=output_dir, merge=merge, + faker=True) + except: + pass + else: + return hls_urls = re.findall(r'(https?://[^;"\'\\]+' + '\.m3u8?' + r'[^;"\'\\]*)', page) if hls_urls: - for hls_url in hls_urls: - type_, ext, size = url_info(hls_url) - print_info(site_info, page_title, type_, size) - if not info_only: - download_url_ffmpeg(url=hls_url, title=page_title, - ext='mp4', output_dir=output_dir) - return + try: + for hls_url in hls_urls: + type_, ext, size = url_info(hls_url) + print_info(site_info, page_title, type_, size) + if not info_only: + download_url_ffmpeg(url=hls_url, title=page_title, + ext='mp4', output_dir=output_dir) + except: + pass + else: + return # most common media file extensions on the Internet media_exts = ['\.flv', '\.mp3', '\.mp4', '\.webm', @@ -67,12 +75,12 @@ urls = [] for i in media_exts: - urls += re.findall(r'(https?://[^ ;&"\'\\]+' + i + r'[^ ;&"\'\\]*)', page) + urls += re.findall(r'(https?://[^ ;&"\'\\<>]+' + i + r'[^ ;&"\'\\<>]*)', page) p_urls = re.findall(r'(https?%3A%2F%2F[^;&"]+' + i + r'[^;&"]*)', page) urls += [parse.unquote(url) for url in p_urls] - q_urls = re.findall(r'(https?:\\\\/\\\\/[^ ;"\']+' + i + r'[^ ;"\']*)', page) + q_urls = re.findall(r'(https?:\\\\/\\\\/[^ ;"\'<>]+' + i + r'[^ ;"\'<>]*)', page) urls += [url.replace('\\\\/', '/') for url in q_urls] # a link href to an image is often an interesting one diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1295/src/you_get/extractors/youtube.py new/you-get-0.4.1302/src/you_get/extractors/youtube.py --- old/you-get-0.4.1295/src/you_get/extractors/youtube.py 2019-04-25 16:23:50.000000000 +0200 +++ new/you-get-0.4.1302/src/you_get/extractors/youtube.py 2019-05-09 01:43:06.000000000 +0200 @@ -109,7 +109,7 @@ else: f2def = re.search(r'[^$\w]%s:function\((\w+)\)(\{[^\{\}]+\})' % f2e, js) f2def = 'function {}({},b){}'.format(f2e, f2def.group(1), f2def.group(2)) - f2 = re.sub(r'(\W)(as|if|in|is|or)\(', r'\1_\2(', f2) + f2 = re.sub(r'(as|if|in|is|or)', r'_\1', f2) f2 = re.sub(r'\$', '_dollar', f2) code = code + 'global %s\n' % f2 + tr_js(f2def) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1295/src/you_get/version.py new/you-get-0.4.1302/src/you_get/version.py --- old/you-get-0.4.1295/src/you_get/version.py 2019-04-25 16:23:50.000000000 +0200 +++ new/you-get-0.4.1302/src/you_get/version.py 2019-05-09 01:43:06.000000000 +0200 @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1295' +__version__ = '0.4.1302' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1295/tests/test.py new/you-get-0.4.1302/tests/test.py --- old/you-get-0.4.1295/tests/test.py 2019-04-25 16:23:50.000000000 +0200 +++ new/you-get-0.4.1302/tests/test.py 2019-05-09 01:43:06.000000000 +0200 @@ -32,9 +32,6 @@ info_only=True ) - def test_toutiao(self): - toutiao.download('https://www.365yg.com/i6640053613567675662/#mid=1611922564114440', info_only=True) - if __name__ == '__main__': unittest.main()
