Hello community, here is the log from the commit of package you-get for openSUSE:Factory checked in at 2020-07-20 21:01:29 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/you-get (Old) and /work/SRC/openSUSE:Factory/.you-get.new.3592 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "you-get" Mon Jul 20 21:01:29 2020 rev:32 rq:821769 version:0.4.1456 Changes: -------- --- /work/SRC/openSUSE:Factory/you-get/you-get.changes 2020-03-30 23:03:56.208194749 +0200 +++ /work/SRC/openSUSE:Factory/.you-get.new.3592/you-get.changes 2020-07-20 21:03:06.521198610 +0200 @@ -1,0 +2,5 @@ +Sun Jul 19 17:19:23 UTC 2020 - [email protected] + +- Update to version 0.4.1456 (no changelog supplied) + +------------------------------------------------------------------- Old: ---- you-get-0.4.1432.tar.gz New: ---- you-get-0.4.1456.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ you-get.spec ++++++ --- /var/tmp/diff_new_pack.f4dm4r/_old 2020-07-20 21:03:09.805201939 +0200 +++ /var/tmp/diff_new_pack.f4dm4r/_new 2020-07-20 21:03:09.809201943 +0200 @@ -17,7 +17,7 @@ Name: you-get -Version: 0.4.1432 +Version: 0.4.1456 Release: 0 Summary: Dumb downloader that scrapes the web License: MIT ++++++ you-get-0.4.1432.tar.gz -> you-get-0.4.1456.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1432/README.md new/you-get-0.4.1456/README.md --- old/you-get-0.4.1432/README.md 2020-03-30 02:43:59.000000000 +0200 +++ new/you-get-0.4.1456/README.md 2020-07-19 16:17:28.000000000 +0200 @@ -402,7 +402,7 @@ | **AcFun** | <http://www.acfun.cn/> |✓| | | | **Baidu<br/>百度贴吧** | <http://tieba.baidu.com/> |✓|✓| | | 爆米花网 | <http://www.baomihua.com/> |✓| | | -| **bilibili<br/>哔哩哔哩** | <http://www.bilibili.com/> |✓| | | +| **bilibili<br/>哔哩哔哩** | <http://www.bilibili.com/> |✓|✓|✓| | 豆瓣 | <http://www.douban.com/> |✓| |✓| | 斗鱼 | <http://www.douyutv.com/> |✓| | | | 凤凰视频 | <http://v.ifeng.com/> |✓| | | @@ -436,6 +436,7 @@ | 火猫TV | <http://www.huomao.com/> |✓| | | | 阳光宽频网 | <http://www.365yg.com/> |✓| | | | 西瓜视频 | <https://www.ixigua.com/> |✓| | | +| 新片场 | <https://www.xinpianchang.com//> |✓| | | | 快手 | <https://www.kuaishou.com/> |✓|✓| | | 抖音 | <https://www.douyin.com/> |✓| | | | TikTok | <https://www.tiktok.com/> |✓| | | diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1432/src/you_get/common.py new/you-get-0.4.1456/src/you_get/common.py --- old/you-get-0.4.1432/src/you_get/common.py 2020-03-30 02:43:59.000000000 +0200 +++ new/you-get-0.4.1456/src/you_get/common.py 2020-07-19 16:17:28.000000000 +0200 @@ -116,6 +116,7 @@ 'xiaokaxiu' : 'yixia', 'xiaojiadianvideo' : 'fc2video', 'ximalaya' : 'ximalaya', + 'xinpianchang' : 'xinpianchang', 'yinyuetai' : 'yinyuetai', 'yizhibo' : 'yizhibo', 'youku' : 'youku', @@ -628,10 +629,12 @@ if refer is not None: tmp_headers['Referer'] = refer if type(url) is list: - file_size = urls_size(url, faker=faker, headers=tmp_headers) + chunk_sizes = [url_size(url, faker=faker, headers=tmp_headers) for url in url] + file_size = sum(chunk_sizes) is_chunked, urls = True, url else: file_size = url_size(url, faker=faker, headers=tmp_headers) + chunk_sizes = [file_size] is_chunked, urls = False, [url] continue_renameing = True @@ -695,9 +698,13 @@ else: open_mode = 'wb' - for url in urls: + chunk_start = 0 + chunk_end = 0 + for i, url in enumerate(urls): received_chunk = 0 - if received < file_size: + chunk_start += 0 if i == 0 else chunk_sizes[i - 1] + chunk_end += chunk_sizes[i] + if received < file_size and received < chunk_end: if faker: tmp_headers = fake_headers ''' @@ -707,8 +714,9 @@ else: headers = {} ''' - if received and not is_chunked: # only request a range when not chunked - tmp_headers['Range'] = 'bytes=' + str(received) + '-' + if received: + # chunk_start will always be 0 if not chunked + tmp_headers['Range'] = 'bytes=' + str(received - chunk_start) + '-' if refer: tmp_headers['Referer'] = refer @@ -756,8 +764,7 @@ elif not is_chunked and received == file_size: # Download finished break # Unexpected termination. Retry request - if not is_chunked: # when - tmp_headers['Range'] = 'bytes=' + str(received) + '-' + tmp_headers['Range'] = 'bytes=' + str(received - chunk_start) + '-' response = urlopen_with_retry( request.Request(url, headers=tmp_headers) ) @@ -1059,6 +1066,20 @@ print('Merged into %s' % output_filename) except: raise + else: + for part in parts: + os.remove(part) + + elif ext == 'mp3': + try: + from .processor.ffmpeg import has_ffmpeg_installed + + assert has_ffmpeg_installed() + from .processor.ffmpeg import ffmpeg_concat_mp3_to_mp3 + ffmpeg_concat_mp3_to_mp3(parts, output_filepath) + print('Merged into %s' % output_filename) + except: + raise else: for part in parts: os.remove(part) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1432/src/you_get/extractors/__init__.py new/you-get-0.4.1456/src/you_get/extractors/__init__.py --- old/you-get-0.4.1432/src/you_get/extractors/__init__.py 2020-03-30 02:43:59.000000000 +0200 +++ new/you-get-0.4.1456/src/you_get/extractors/__init__.py 2020-07-19 16:17:28.000000000 +0200 @@ -79,10 +79,11 @@ from .w56 import * from .wanmen import * from .xiami import * +from .xinpianchang import * from .yinyuetai import * from .yixia import * from .youku import * from .youtube import * from .zhanqi import * from .zhibo import * -from .zhihu import * +from .zhihu import * \ No newline at end of file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1432/src/you_get/extractors/acfun.py new/you-get-0.4.1456/src/you_get/extractors/acfun.py --- old/you-get-0.4.1432/src/you_get/extractors/acfun.py 2020-03-30 02:43:59.000000000 +0200 +++ new/you-get-0.4.1456/src/you_get/extractors/acfun.py 2020-07-19 16:17:28.000000000 +0200 @@ -125,14 +125,14 @@ if 'playInfos' in currentVideoInfo: m3u8_url = currentVideoInfo['playInfos'][0]['playUrls'][0] elif 'ksPlayJson' in currentVideoInfo: - ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] ) + ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] ) representation = ksPlayJson.get('adaptationSet').get('representation') reps = [] for one in representation: reps.append( (one['width']* one['height'], one['url'], one['backupUrl']) ) m3u8_url = max(reps)[1] - - elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/ab(\d+)", url): + + elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", url): html = get_content(url, headers=fake_headers) tag_script = match1(html, r'<script>window\.pageInfo([^<]+)</script>') json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1432/src/you_get/extractors/bilibili.py new/you-get-0.4.1456/src/you_get/extractors/bilibili.py --- old/you-get-0.4.1432/src/you_get/extractors/bilibili.py 2020-03-30 02:43:59.000000000 +0200 +++ new/you-get-0.4.1456/src/you_get/extractors/bilibili.py 2020-07-19 16:17:28.000000000 +0200 @@ -10,6 +10,8 @@ # Bilibili media encoding options, in descending quality order. stream_types = [ + {'id': 'hdflv2_4k', 'quality': 120, 'audio_quality': 30280, + 'container': 'FLV', 'video_resolution': '2160p', 'desc': '超清 4K'}, {'id': 'flv_p60', 'quality': 116, 'audio_quality': 30280, 'container': 'FLV', 'video_resolution': '1080p', 'desc': '高清 1080P60'}, {'id': 'hdflv2', 'quality': 112, 'audio_quality': 30280, @@ -42,8 +44,10 @@ return 64 elif height <= 1080 and qn <= 80: return 80 - else: + elif height <= 1080 and qn <= 112: return 112 + else: + return 120 @staticmethod def bilibili_headers(referer=None, cookie=None): @@ -77,8 +81,8 @@ return 'https://www.bilibili.com/audio/music-service-c/web/song/of-menu?sid=%s&pn=1&ps=%s' % (sid, ps) @staticmethod - def bilibili_bangumi_api(avid, cid, ep_id, qn=0): - return 'https://api.bilibili.com/pgc/player/web/playurl?avid=%s&cid=%s&qn=%s&type=&otype=json&ep_id=%s&fnver=0&fnval=16' % (avid, cid, qn, ep_id) + def bilibili_bangumi_api(avid, cid, ep_id, qn=0, fnval=16): + return 'https://api.bilibili.com/pgc/player/web/playurl?avid=%s&cid=%s&qn=%s&type=&otype=json&ep_id=%s&fnver=0&fnval=%s' % (avid, cid, qn, ep_id, fnval) @staticmethod def bilibili_interface_api(cid, qn=0): @@ -213,7 +217,7 @@ if playinfo_ is not None: playinfos.append(playinfo_) # get alternative formats from API - for qn in [112, 80, 64, 32, 16]: + for qn in [120, 112, 80, 64, 32, 16]: # automatic format for durl: qn=0 # for dash, qn does not matter if current_quality is None or qn < current_quality: @@ -312,15 +316,16 @@ return current_quality = api_playinfo['result']['quality'] # get alternative formats from API - for qn in [112, 80, 64, 32, 16]: - # automatic format for durl: qn=0 - # for dash, qn does not matter - if qn != current_quality: - api_url = self.bilibili_bangumi_api(avid, cid, ep_id, qn=qn) - api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url)) - api_playinfo = json.loads(api_content) - if api_playinfo['code'] == 0: # success - playinfos.append(api_playinfo) + for fnval in [8, 16]: + for qn in [120, 112, 80, 64, 32, 16]: + # automatic format for durl: qn=0 + # for dash, qn does not matter + if qn != current_quality: + api_url = self.bilibili_bangumi_api(avid, cid, ep_id, qn=qn, fnval=fnval) + api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url)) + api_playinfo = json.loads(api_content) + if api_playinfo['code'] == 0: # success + playinfos.append(api_playinfo) for playinfo in playinfos: if 'durl' in playinfo['result']: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1432/src/you_get/extractors/ckplayer.py new/you-get-0.4.1456/src/you_get/extractors/ckplayer.py --- old/you-get-0.4.1432/src/you_get/extractors/ckplayer.py 2020-03-30 02:43:59.000000000 +0200 +++ new/you-get-0.4.1456/src/you_get/extractors/ckplayer.py 2020-07-19 16:17:28.000000000 +0200 @@ -6,7 +6,7 @@ __all__ = ['ckplayer_download'] -from xml.etree import cElementTree as ET +from xml.etree import ElementTree as ET from copy import copy from ..common import * #---------------------------------------------------------------------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1432/src/you_get/extractors/cntv.py new/you-get-0.4.1456/src/you_get/extractors/cntv.py --- old/you-get-0.4.1432/src/you_get/extractors/cntv.py 2020-03-30 02:43:59.000000000 +0200 +++ new/you-get-0.4.1456/src/you_get/extractors/cntv.py 2020-07-19 16:17:28.000000000 +0200 @@ -44,12 +44,12 @@ def cntv_download(url, **kwargs): if re.match(r'http://tv\.cntv\.cn/video/(\w+)/(\w+)', url): rid = match1(url, r'http://tv\.cntv\.cn/video/\w+/(\w+)') - elif re.match(r'http://tv\.cctv\.com/\d+/\d+/\d+/\w+.shtml', url): + elif re.match(r'http(s)?://tv\.cctv\.com/\d+/\d+/\d+/\w+.shtml', url): rid = r1(r'var guid = "(\w+)"', get_content(url)) elif re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or \ re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url) or \ re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url) or \ - re.match(r'http://\w+.cctv.com/\d+/\d+/\d+/\w+.shtml', url) or \ + re.match(r'http(s)?://\w+.cctv.com/\d+/\d+/\d+/\w+.shtml', url) or \ re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url): page = get_content(url) rid = r1(r'videoCenterId","(\w+)"', page) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1432/src/you_get/extractors/soundcloud.py new/you-get-0.4.1456/src/you_get/extractors/soundcloud.py --- old/you-get-0.4.1432/src/you_get/extractors/soundcloud.py 2020-03-30 02:43:59.000000000 +0200 +++ new/you-get-0.4.1456/src/you_get/extractors/soundcloud.py 2020-07-19 16:17:28.000000000 +0200 @@ -1,44 +1,80 @@ #!/usr/bin/env python -__all__ = ['soundcloud_download', 'soundcloud_download_by_id'] +__all__ = ['sndcd_download'] from ..common import * +import re import json import urllib.error -client_id = 'WKcQQdEZw7Oi01KqtHWxeVSxNyRzgT8M' -def soundcloud_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False): - assert title - url = 'https://api.soundcloud.com/tracks/{}/{}?client_id={}'.format(id, 'stream', client_id) - - type, ext, size = url_info(url) - - print_info(site_info, title, type, size) - - if not info_only: - download_urls([url], title, ext, size, output_dir, merge = merge) - -def soundcloud_i1_api(track_id): - url = 'https://api.soundcloud.com/i1/tracks/{}/streams?client_id={}'.format(track_id, client_id) - return json.loads(get_content(url))['http_mp3_128_url'] - -def soundcloud_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - url = 'https://api.soundcloud.com/resolve.json?url={}&client_id={}'.format(url, client_id) - metadata = get_content(url) - info = json.loads(metadata) - title = info["title"] - real_url = info.get('download_url') - if real_url is None: - real_url = info.get('steram_url') - if real_url is None: - raise Exception('Cannot get media URI for {}'.format(url)) - real_url = soundcloud_i1_api(info['id']) - mime, ext, size = url_info(real_url) - print_info(site_info, title, mime, size) - if not info_only: - download_urls([real_url], title, ext, size, output_dir, merge=merge) +def get_sndcd_apikey(): + home_page = get_content('https://soundcloud.com') + js_url = re.findall(r'script crossorigin src="(.+?)"></script>', home_page)[-1] + + client_id = get_content(js_url) + return re.search(r'client_id:"(.+?)"', client_id).group(1) + + +def get_resource_info(resource_url, client_id): + cont = get_content(resource_url, decoded=True) + + x = re.escape('forEach(function(e){n(e)})}catch(t){}})},') + x = re.search(r'' + x + r'(.*)\);</script>', cont) + + info = json.loads(x.group(1))[-1]['data'][0] + + info = info['tracks'] if info.get('track_count') else [info] + + ids = [i['id'] for i in info if i.get('comment_count') is None] + ids = list(map(str, ids)) + ids_split = ['%2C'.join(ids[i:i+10]) for i in range(0, len(ids), 10)] + api_url = 'https://api-v2.soundcloud.com/tracks?ids={ids}&client_id={client_id}&%5Bobject%20Object%5D=&app_version=1584348206&app_locale=en' + + res = [] + for ids in ids_split: + uri = api_url.format(ids=ids, client_id=client_id) + cont = get_content(uri, decoded=True) + res += json.loads(cont) + + res = iter(res) + info = [next(res) if i.get('comment_count') is None else i for i in info] + + return info + + +def sndcd_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + client_id = get_sndcd_apikey() + + r_info = get_resource_info(url, client_id) + + for info in r_info: + title = info['title'] + metadata = info.get('publisher_metadata') + + transcodings = info['media']['transcodings'] + sq = [i for i in transcodings if i['quality'] == 'sq'] + hq = [i for i in transcodings if i['quality'] == 'hq'] + # source url + surl = sq[0] if hq == [] else hq[0] + surl = surl['url'] + + uri = surl + '?client_id=' + client_id + r = get_content(uri) + surl = json.loads(r)['url'] + + m3u8 = get_content(surl) + # url list + urll = re.findall(r'http.*?(?=\n)', m3u8) + + size = urls_size(urll) + print_info(site_info, title, 'audio/mpeg', size) + print(end='', flush=True) + + if not info_only: + download_urls(urll, title=title, ext='mp3', total_size=size, output_dir=output_dir, merge=True) + site_info = "SoundCloud.com" -download = soundcloud_download -download_playlist = playlist_not_supported('soundcloud') +download = sndcd_download +download_playlist = sndcd_download diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1432/src/you_get/extractors/tiktok.py new/you-get-0.4.1456/src/you_get/extractors/tiktok.py --- old/you-get-0.4.1432/src/you_get/extractors/tiktok.py 2020-03-30 02:43:59.000000000 +0200 +++ new/you-get-0.4.1456/src/you_get/extractors/tiktok.py 2020-07-19 16:17:28.000000000 +0200 @@ -9,7 +9,7 @@ title = r1(r'<title.*?>(.*?)</title>', html) video_id = r1(r'/video/(\d+)', url) or r1(r'musical\?id=(\d+)', html) title = '%s [%s]' % (title, video_id) - source = r1(r'<video .*?src="([^"]+)"', html) + source = r1(r'<video .*?src="([^"]+)"', html) or r1(r'"contentUrl":"([^"]+)"', html) mime, ext, size = url_info(source) print_info(site_info, title, mime, size) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1432/src/you_get/extractors/twitter.py new/you-get-0.4.1456/src/you_get/extractors/twitter.py --- old/you-get-0.4.1432/src/you_get/extractors/twitter.py 2020-03-30 02:43:59.000000000 +0200 +++ new/you-get-0.4.1456/src/you_get/extractors/twitter.py 2020-07-19 16:17:28.000000000 +0200 @@ -41,58 +41,63 @@ r1(r'<meta name="twitter:site:id" content="([^"]*)"', html) page_title = "{} [{}]".format(screen_name, item_id) - try: # extract images - urls = re.findall(r'property="og:image"\s*content="([^"]+:large)"', html) - assert urls - images = [] - for url in urls: - url = ':'.join(url.split(':')[:-1]) + ':orig' - filename = parse.unquote(url.split('/')[-1]) - title = '.'.join(filename.split('.')[:-1]) - ext = url.split(':')[-2].split('.')[-1] - size = int(get_head(url)['Content-Length']) - images.append({'title': title, - 'url': url, - 'ext': ext, - 'size': size}) - size = sum([image['size'] for image in images]) - print_info(site_info, page_title, images[0]['ext'], size) - - if not info_only: - for image in images: - title = image['title'] - ext = image['ext'] - size = image['size'] - url = image['url'] - print_info(site_info, title, ext, size) - download_urls([url], title, ext, size, - output_dir=output_dir) - - except: # extract video - #i_url = 'https://twitter.com/i/videos/' + item_id - #i_content = get_content(i_url) - #js_url = r1(r'src="([^"]+)"', i_content) - #js_content = get_content(js_url) - #authorization = r1(r'"(Bearer [^"]+)"', js_content) - authorization = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA' - - ga_url = 'https://api.twitter.com/1.1/guest/activate.json' - ga_content = post_content(ga_url, headers={'authorization': authorization}) - guest_token = json.loads(ga_content)['guest_token'] + authorization = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA' + + ga_url = 'https://api.twitter.com/1.1/guest/activate.json' + ga_content = post_content(ga_url, headers={'authorization': authorization}) + guest_token = json.loads(ga_content)['guest_token'] + + api_url = 'https://api.twitter.com/2/timeline/conversation/%s.json?tweet_mode=extended' % item_id + api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token}) + + info = json.loads(api_content) + if 'extended_entities' in info['globalObjects']['tweets'][item_id]: + # if the tweet contains media, download them + media = info['globalObjects']['tweets'][item_id]['extended_entities']['media'] + + elif info['globalObjects']['tweets'][item_id].get('is_quote_status') == True: + # if the tweet does not contain media, but it quotes a tweet + # and the quoted tweet contains media, download them + item_id = info['globalObjects']['tweets'][item_id]['quoted_status_id_str'] api_url = 'https://api.twitter.com/2/timeline/conversation/%s.json?tweet_mode=extended' % item_id api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token}) info = json.loads(api_content) - variants = info['globalObjects']['tweets'][item_id]['extended_entities']['media'][0]['video_info']['variants'] - variants = sorted(variants, key=lambda kv: kv.get('bitrate', 0)) - urls = [ variants[-1]['url'] ] - size = urls_size(urls) - mime, ext = variants[-1]['content_type'], 'mp4' - - print_info(site_info, page_title, mime, size) - if not info_only: - download_urls(urls, page_title, ext, size, output_dir, merge=merge) + + if 'extended_entities' in info['globalObjects']['tweets'][item_id]: + media = info['globalObjects']['tweets'][item_id]['extended_entities']['media'] + else: + # quoted tweet has no media + return + + else: + # no media, no quoted tweet + return + + for medium in media: + if 'video_info' in medium: + # FIXME: we're assuming one tweet only contains one video here + variants = medium['video_info']['variants'] + variants = sorted(variants, key=lambda kv: kv.get('bitrate', 0)) + urls = [ variants[-1]['url'] ] + size = urls_size(urls) + mime, ext = variants[-1]['content_type'], 'mp4' + + print_info(site_info, page_title, mime, size) + if not info_only: + download_urls(urls, page_title, ext, size, output_dir, merge=merge) + + else: + title = item_id + '_' + medium['media_url_https'].split('.')[-2].split('/')[-1] + urls = [ medium['media_url_https'] + ':orig' ] + size = urls_size(urls) + ext = medium['media_url_https'].split('.')[-1] + + print_info(site_info, title, ext, size) + if not info_only: + download_urls(urls, title, ext, size, output_dir, merge=merge) + site_info = "Twitter.com" download = twitter_download diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1432/src/you_get/extractors/universal.py new/you-get-0.4.1456/src/you_get/extractors/universal.py --- old/you-get-0.4.1432/src/you_get/extractors/universal.py 2020-03-30 02:43:59.000000000 +0200 +++ new/you-get-0.4.1456/src/you_get/extractors/universal.py 2020-07-19 16:17:28.000000000 +0200 @@ -99,6 +99,19 @@ for rel_url in rel_urls: urls += [ r1(r'(.*/)', url) + rel_url ] + # site-relative path + rel_urls = [] + rel_urls += re.findall(r'href="(/[^"]+\.jpe?g)"', page, re.I) + rel_urls += re.findall(r'href="(/[^"]+\.png)"', page, re.I) + rel_urls += re.findall(r'href="(/[^"]+\.gif)"', page, re.I) + for rel_url in rel_urls: + urls += [ r1(r'(https?://[^/]+)', url) + rel_url ] + + # sometimes naive + urls += re.findall(r'data-original="(https?://[^"]+\.jpe?g)"', page, re.I) + urls += re.findall(r'data-original="(https?://[^"]+\.png)"', page, re.I) + urls += re.findall(r'data-original="(https?://[^"]+\.gif)"', page, re.I) + # MPEG-DASH MPD mpd_urls = re.findall(r'src="(https?://[^"]+\.mpd)"', page) for mpd_url in mpd_urls: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1432/src/you_get/extractors/xinpianchang.py new/you-get-0.4.1456/src/you_get/extractors/xinpianchang.py --- old/you-get-0.4.1432/src/you_get/extractors/xinpianchang.py 1970-01-01 01:00:00.000000000 +0100 +++ new/you-get-0.4.1456/src/you_get/extractors/xinpianchang.py 2020-07-19 16:17:28.000000000 +0200 @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +import re +import json +from ..extractor import VideoExtractor +from ..common import get_content, playlist_not_supported + + +class Xinpianchang(VideoExtractor): + name = 'xinpianchang' + stream_types = [ + {'id': '4K', 'quality': '超清 4K', 'video_profile': 'mp4-4K'}, + {'id': '2K', 'quality': '超清 2K', 'video_profile': 'mp4-2K'}, + {'id': '1080', 'quality': '高清 1080P', 'video_profile': 'mp4-FHD'}, + {'id': '720', 'quality': '高清 720P', 'video_profile': 'mp4-HD'}, + {'id': '540', 'quality': '清晰 540P', 'video_profile': 'mp4-SD'}, + {'id': '360', 'quality': '流畅 360P', 'video_profile': 'mp4-LD'} + ] + + def prepare(self, **kwargs): + # find key + page_content = get_content(self.url) + match_rule = r"vid: \"(.+?)\"," + key = re.findall(match_rule, page_content)[0] + + # get videos info + video_url = 'https://openapi-vtom.vmovier.com/v3/video/' + key + '?expand=resource' + data = json.loads(get_content(video_url)) + self.title = data["data"]["video"]["title"] + video_info = data["data"]["resource"]["progressive"] + + # set streams dict + for video in video_info: + url = video["https_url"] + size = video["filesize"] + profile = video["profile_code"] + stype = [st for st in self.__class__.stream_types if st['video_profile'] == profile][0] + + stream_data = dict(src=[url], size=size, container='mp4', quality=stype['quality']) + self.streams[stype['id']] = stream_data + + +download = Xinpianchang().download_by_url +download_playlist = playlist_not_supported('xinpianchang') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1432/src/you_get/extractors/youtube.py new/you-get-0.4.1456/src/you_get/extractors/youtube.py --- old/you-get-0.4.1432/src/you_get/extractors/youtube.py 2020-03-30 02:43:59.000000000 +0200 +++ new/you-get-0.4.1456/src/you_get/extractors/youtube.py 2020-07-19 16:17:28.000000000 +0200 @@ -335,9 +335,9 @@ 'mime': stream['mimeType'].split(';')[0], 'container': mime_to_container(stream['mimeType'].split(';')[0]), } - if 'cipher' in stream: + if 'signatureCipher' in stream: self.streams[stream_itag].update(dict([(_.split('=')[0], parse.unquote(_.split('=')[1])) - for _ in stream['cipher'].split('&')])) + for _ in stream['signatureCipher'].split('&')])) # Prepare caption tracks try: @@ -439,6 +439,7 @@ except: # VEVO if not self.html5player: return + self.html5player = self.html5player.replace('\/', '/') # unescape URL (for age-restricted videos) self.js = get_content(self.html5player) try: @@ -481,10 +482,10 @@ del stream['contentLength'] del stream['initRange'] del stream['indexRange'] - if 'cipher' in stream: + if 'signatureCipher' in stream: stream.update(dict([(_.split('=')[0], parse.unquote(_.split('=')[1])) - for _ in stream['cipher'].split('&')])) - del stream['cipher'] + for _ in stream['signatureCipher'].split('&')])) + del stream['signatureCipher'] for stream in streams: # get over speed limiting stream['url'] += '&ratebypass=yes' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1432/src/you_get/processor/ffmpeg.py new/you-get-0.4.1456/src/you_get/processor/ffmpeg.py --- old/you-get-0.4.1432/src/you_get/processor/ffmpeg.py 2020-03-30 02:43:59.000000000 +0200 +++ new/you-get-0.4.1456/src/you_get/processor/ffmpeg.py 2020-07-19 16:17:28.000000000 +0200 @@ -60,7 +60,7 @@ for file in files: if os.path.isfile(file): params.extend(['-i', file]) params.extend(['-c', 'copy']) - params.append(output) + params.extend(['--', output]) if subprocess.call(params, stdin=STDIN): print('Merging without re-encode failed.\nTry again re-encoding audio... ', end="", flush=True) try: os.remove(output) @@ -74,7 +74,7 @@ params.extend(['-strict', 'experimental']) elif ext == 'webm': params.extend(['-c:a', 'opus']) - params.append(output) + params.extend(['--', output]) return subprocess.call(params, stdin=STDIN) else: return 0 @@ -83,7 +83,8 @@ for file in files: if os.path.isfile(file): params = [FFMPEG] + LOGLEVEL - params.extend(['-y', '-i', file, output]) + params.extend(['-y', '-i', file]) + params.extend(['--', output]) subprocess.call(params, stdin=STDIN) return @@ -93,7 +94,8 @@ if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)): concat_list = generate_concat_list(files, output) params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1', - '-i', concat_list, '-c', 'copy', output] + '-i', concat_list, '-c', 'copy'] + params.extend(['--', output]) if subprocess.call(params, stdin=STDIN) == 0: os.remove(output + '.txt') return True @@ -114,7 +116,7 @@ params = [FFMPEG] + LOGLEVEL + ['-y', '-i'] params.append(output + '.mpg') params += ['-vcodec', 'copy', '-acodec', 'copy'] - params.append(output) + params.extend(['--', output]) if subprocess.call(params, stdin=STDIN) == 0: for file in files: @@ -131,7 +133,8 @@ for file in files: if os.path.isfile(file): params[-1] += file + '|' - params += ['-f', 'matroska', '-c', 'copy', output] + params += ['-f', 'matroska', '-c', 'copy'] + params.extend(['--', output]) try: if subprocess.call(params, stdin=STDIN) == 0: @@ -148,7 +151,8 @@ concat_list = generate_concat_list(files, output) params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1', '-i', concat_list, '-c', 'copy', - '-bsf:a', 'aac_adtstoasc', output] + '-bsf:a', 'aac_adtstoasc'] + params.extend(['--', output]) subprocess.check_call(params, stdin=STDIN) os.remove(output + '.txt') return True @@ -169,9 +173,10 @@ if os.path.isfile(f): params[-1] += f + '|' if FFMPEG == 'avconv': - params += ['-c', 'copy', output] + params += ['-c', 'copy'] else: - params += ['-c', 'copy', '-absf', 'aac_adtstoasc', output] + params += ['-c', 'copy', '-absf', 'aac_adtstoasc'] + params.extend(['--', output]) if subprocess.call(params, stdin=STDIN) == 0: for file in files: @@ -180,6 +185,19 @@ else: raise +def ffmpeg_concat_mp3_to_mp3(files, output='output.mp3'): + print('Merging video parts... ', end="", flush=True) + + files = 'concat:' + '|'.join(files) + + params = [FFMPEG] + LOGLEVEL + ['-y'] + params += ['-i', files, '-acodec', 'copy'] + params.extend(['--', output]) + + subprocess.call(params) + + return True + def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'): print('Merging video parts... ', end="", flush=True) # Use concat demuxer on FFmpeg >= 1.1 @@ -187,7 +205,8 @@ concat_list = generate_concat_list(files, output) params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1', '-i', concat_list, '-c', 'copy', - '-bsf:a', 'aac_adtstoasc', output] + '-bsf:a', 'aac_adtstoasc'] + params.extend(['--', output]) subprocess.check_call(params, stdin=STDIN) os.remove(output + '.txt') return True @@ -208,9 +227,10 @@ if os.path.isfile(f): params[-1] += f + '|' if FFMPEG == 'avconv': - params += ['-c', 'copy', output] + params += ['-c', 'copy'] else: - params += ['-c', 'copy', '-absf', 'aac_adtstoasc', output] + params += ['-c', 'copy', '-absf', 'aac_adtstoasc'] + params.extend(['--', output]) subprocess.check_call(params, stdin=STDIN) for file in files: @@ -236,7 +256,7 @@ ffmpeg_params.append(files) #not the same here!!!! if FFMPEG == 'avconv': #who cares? - ffmpeg_params += ['-c', 'copy', output] + ffmpeg_params += ['-c', 'copy'] else: ffmpeg_params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc'] @@ -246,7 +266,7 @@ ffmpeg_params.append(k) ffmpeg_params.append(v) - ffmpeg_params.append(output) + ffmpeg_params.extend(['--', output]) print(' '.join(ffmpeg_params)) @@ -274,7 +294,7 @@ params.extend(['-c:v', 'copy']) params.extend(['-c:a', 'aac']) params.extend(['-strict', 'experimental']) - params.append(output+"."+ext) + params.extend(['--', output + "." + ext]) return subprocess.call(params, stdin=STDIN) else: raise EnvironmentError('No ffmpeg found') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1432/src/you_get/version.py new/you-get-0.4.1456/src/you_get/version.py --- old/you-get-0.4.1432/src/you_get/version.py 2020-03-30 02:43:59.000000000 +0200 +++ new/you-get-0.4.1456/src/you_get/version.py 2020-07-19 16:17:28.000000000 +0200 @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1432' +__version__ = '0.4.1456' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1432/tests/test.py new/you-get-0.4.1456/tests/test.py --- old/you-get-0.4.1432/tests/test.py 2020-03-30 02:43:59.000000000 +0200 +++ new/you-get-0.4.1456/tests/test.py 2020-07-19 16:17:28.000000000 +0200 @@ -8,7 +8,8 @@ youtube, missevan, acfun, - bilibili + bilibili, + soundcloud ) @@ -45,5 +46,16 @@ bilibili.download( "https://www.bilibili.com/watchlater/#/av74906671/p6", info_only=True ) + + def test_soundcloud(self): + ## single song + soundcloud.download( + 'https://soundcloud.com/keiny-pham/impure-bird', info_only=True + ) + ## playlist + soundcloud.download( + 'https://soundcloud.com/anthony-flieger/sets/cytus', info_only=True + ) + if __name__ == '__main__': unittest.main()
