Hello community, here is the log from the commit of package you-get for openSUSE:Factory checked in at 2020-01-01 14:59:22 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/you-get (Old) and /work/SRC/openSUSE:Factory/.you-get.new.6675 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "you-get" Wed Jan 1 14:59:22 2020 rev:27 rq:760186 version:0.4.1388 Changes: -------- --- /work/SRC/openSUSE:Factory/you-get/you-get.changes 2019-09-25 08:49:14.314216724 +0200 +++ /work/SRC/openSUSE:Factory/.you-get.new.6675/you-get.changes 2020-01-01 14:59:35.413971593 +0100 @@ -1,0 +2,10 @@ +Tue Dec 31 16:39:50 UTC 2019 - Luigi Baldoni <[email protected]> + +- Removed groups + +------------------------------------------------------------------- +Sat Dec 28 22:17:28 UTC 2019 - Luigi Baldoni <[email protected]> + +- Update to version 0.4.1388 (no changelog supplied) + +------------------------------------------------------------------- Old: ---- you-get-0.4.1355.tar.gz New: ---- you-get-0.4.1388.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ you-get.spec ++++++ --- /var/tmp/diff_new_pack.plE2Kx/_old 2020-01-01 14:59:36.921972380 +0100 +++ /var/tmp/diff_new_pack.plE2Kx/_new 2020-01-01 14:59:36.925972382 +0100 @@ -1,7 +1,7 @@ # # spec file for package you-get # -# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2019 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,11 +17,10 @@ Name: you-get -Version: 0.4.1355 +Version: 0.4.1388 Release: 0 Summary: Dumb downloader that scrapes the web License: MIT -Group: Productivity/Networking/Web/Utilities URL: https://you-get.org Source0: https://github.com/soimort/you-get/archive/v%{version}.tar.gz#/%{name}-%{version}.tar.gz BuildRequires: bash-completion @@ -40,7 +39,6 @@ %package bash-completion Summary: Bash completion for %{name} -Group: System/Shells Requires: bash-completion Supplements: packageand(%{name}:bash) BuildArch: noarch @@ -50,7 +48,6 @@ %package fish-completion Summary: Fish completion for %{name} -Group: System/Shells Requires: fish Supplements: packageand(%{name}:fish) BuildArch: noarch @@ -60,7 +57,6 @@ %package zsh-completion Summary: Zsh Completion for %{name} -Group: System/Shells Requires: zsh Supplements: packageand(%{name}:zsh) BuildArch: noarch ++++++ you-get-0.4.1355.tar.gz -> you-get-0.4.1388.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1355/.gitignore new/you-get-0.4.1388/.gitignore --- old/you-get-0.4.1355/.gitignore 2019-09-24 13:44:06.000000000 +0200 +++ new/you-get-0.4.1388/.gitignore 2019-12-28 21:35:25.000000000 +0100 @@ -83,3 +83,9 @@ /.idea *.m4a *.DS_Store +*.txt + +*.zip + +.vscode + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1355/README.md new/you-get-0.4.1388/README.md --- old/you-get-0.4.1355/README.md 2019-09-24 13:44:06.000000000 +0200 +++ new/you-get-0.4.1388/README.md 2019-12-28 21:35:25.000000000 +0100 @@ -421,6 +421,7 @@ | 荔枝FM | <http://www.lizhi.fm/> | | |✓| | 秒拍 | <http://www.miaopai.com/> |✓| | | | MioMio弹幕网 | <http://www.miomio.tv/> |✓| | | +| MissEvan<br/>猫耳FM | <http://www.missevan.com/> | | |✓| | 痞客邦 | <https://www.pixnet.net/> |✓| | | | PPTV聚力 | <http://www.pptv.com/> |✓| | | | 齐鲁网 | <http://v.iqilu.com/> |✓| | | diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1355/src/you_get/common.py new/you-get-0.4.1388/src/you_get/common.py --- old/you-get-0.4.1355/src/you_get/common.py 2019-09-24 13:44:06.000000000 +0200 +++ new/you-get-0.4.1388/src/you_get/common.py 2019-12-28 21:35:25.000000000 +0100 @@ -66,6 +66,7 @@ 'iwara' : 'iwara', 'joy' : 'joy', 'kankanews' : 'bilibili', + 'kakao' : 'kakao', 'khanacademy' : 'khan', 'ku6' : 'ku6', 'kuaishou' : 'kuaishou', @@ -79,6 +80,7 @@ 'metacafe' : 'metacafe', 'mgtv' : 'mgtv', 'miomio' : 'miomio', + 'missevan' : 'missevan', 'mixcloud' : 'mixcloud', 'mtv81' : 'mtv81', 'musicplayon' : 'musicplayon', @@ -271,15 +273,21 @@ def launch_player(player, urls): import subprocess import shlex + urls = list(urls) + for url in urls.copy(): + if type(url) is list: + urls.extend(url) + urls = [url for url in urls if type(url) is str] + assert urls if (sys.version_info >= (3, 3)): import shutil exefile=shlex.split(player)[0] if shutil.which(exefile) is not None: - subprocess.call(shlex.split(player) + list(urls)) + subprocess.call(shlex.split(player) + urls) else: log.wtf('[Failed] Cannot find player "%s"' % exefile) else: - subprocess.call(shlex.split(player) + list(urls)) + subprocess.call(shlex.split(player) + urls) def parse_query_param(url, param): @@ -915,7 +923,7 @@ if kwargs.get('part', -1) >= 0: result = '%s[%02d]' % (result, kwargs.get('part')) result = '%s.%s' % (result, merged_ext) - return result + return result.replace("'", "_") def print_user_agent(faker=False): urllib_default_user_agent = 'Python-urllib/%d.%d' % sys.version_info[:2] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1355/src/you_get/extractors/__init__.py new/you-get-0.4.1388/src/you_get/extractors/__init__.py --- old/you-get-0.4.1355/src/you_get/extractors/__init__.py 2019-09-24 13:44:06.000000000 +0200 +++ new/you-get-0.4.1388/src/you_get/extractors/__init__.py 2019-12-28 21:35:25.000000000 +0100 @@ -33,7 +33,10 @@ from .iqilu import * from .iqiyi import * from .joy import * +from .khan import * from .ku6 import * +from .kakao import * +from .kuaishou import * from .kugou import * from .kuwo import * from .le import * @@ -62,6 +65,7 @@ from .sohu import * from .soundcloud import * from .suntv import * +from .ted import * from .theplatform import * from .tiktok import * from .tucao import * @@ -81,9 +85,6 @@ from .yixia import * from .youku import * from .youtube import * -from .ted import * -from .khan import * from .zhanqi import * -from .kuaishou import * from .zhibo import * from .zhihu import * diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1355/src/you_get/extractors/acfun.py new/you-get-0.4.1388/src/you_get/extractors/acfun.py --- old/you-get-0.4.1355/src/you_get/extractors/acfun.py 2019-09-24 13:44:06.000000000 +0200 +++ new/you-get-0.4.1388/src/you_get/extractors/acfun.py 2019-12-28 21:35:25.000000000 +0100 @@ -121,9 +121,17 @@ video_list = json_data.get('videoList') if len(video_list) > 1: title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0] - - m3u8_url = json_data.get('currentVideoInfo')['playInfos'][0]['playUrls'][0] - + currentVideoInfo = json_data.get('currentVideoInfo') + if 'playInfos' in currentVideoInfo: + m3u8_url = currentVideoInfo['playInfos'][0]['playUrls'][0] + elif 'ksPlayJson' in currentVideoInfo: + ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] ) + representation = ksPlayJson.get('adaptationSet').get('representation') + reps = [] + for one in representation: + reps.append( (one['width']* one['height'], one['url'], one['backupUrl']) ) + m3u8_url = max(reps)[1] + elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/ab(\d+)", url): html = get_content(url, headers=fake_headers) tag_script = match1(html, r'<script>window\.pageInfo([^<]+)</script>') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1355/src/you_get/extractors/baidu.py new/you-get-0.4.1388/src/you_get/extractors/baidu.py --- old/you-get-0.4.1355/src/you_get/extractors/baidu.py 2019-09-24 13:44:06.000000000 +0200 +++ new/you-get-0.4.1388/src/you_get/extractors/baidu.py 2019-12-28 21:35:25.000000000 +0100 @@ -140,8 +140,8 @@ output_dir=output_dir, merge=False) items = re.findall( - r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html) - urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i + r'//tiebapic.baidu.com/forum/w[^"]+/([^/"]+)', html) + urls = ['http://tiebapic.baidu.com/forum/pic/item/' + i for i in set(items)] # handle albums @@ -151,7 +151,7 @@ album_info = json.loads(get_content(album_url)) for i in album_info['data']['pic_list']: urls.append( - 'http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg') + 'http://tiebapic.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg') ext = 'jpg' size = float('Inf') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1355/src/you_get/extractors/bilibili.py new/you-get-0.4.1388/src/you_get/extractors/bilibili.py --- old/you-get-0.4.1355/src/you_get/extractors/bilibili.py 2019-09-24 13:44:06.000000000 +0200 +++ new/you-get-0.4.1388/src/you_get/extractors/bilibili.py 2019-12-28 21:35:25.000000000 +0100 @@ -31,15 +31,17 @@ ] @staticmethod - def height_to_quality(height): - if height <= 360: + def height_to_quality(height, qn): + if height <= 360 and qn <= 16: return 16 - elif height <= 480: + elif height <= 480 and qn <= 32: return 32 - elif height <= 720: + elif height <= 720 and qn <= 64: return 64 - else: + elif height <= 1080 and qn <= 80: return 80 + else: + return 112 @staticmethod def bilibili_headers(referer=None, cookie=None): @@ -123,7 +125,7 @@ self.stream_qualities = {s['quality']: s for s in self.stream_types} try: - html_content = get_content(self.url, headers=self.bilibili_headers()) + html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url)) except: html_content = '' # live always returns 400 (why?) #self.title = match1(html_content, @@ -144,7 +146,7 @@ initial_state = json.loads(initial_state_text) ep_id = initial_state['epList'][0]['id'] self.url = 'https://www.bilibili.com/bangumi/play/ep%s' % ep_id - html_content = get_content(self.url, headers=self.bilibili_headers()) + html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url)) # sort it out if re.match(r'https?://(www\.)?bilibili\.com/audio/au(\d+)', self.url): @@ -203,12 +205,12 @@ if playinfo_ is not None: playinfos.append(playinfo_) # get alternative formats from API - for qn in [80, 64, 32, 16]: + for qn in [112, 80, 64, 32, 16]: # automatic format for durl: qn=0 # for dash, qn does not matter if current_quality is None or qn < current_quality: api_url = self.bilibili_api(avid, cid, qn=qn) - api_content = get_content(api_url, headers=self.bilibili_headers()) + api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url)) api_playinfo = json.loads(api_content) if api_playinfo['code'] == 0: # success playinfos.append(api_playinfo) @@ -216,7 +218,7 @@ message = api_playinfo['data']['message'] if best_quality is None or qn <= best_quality: api_url = self.bilibili_interface_api(cid, qn=qn) - api_content = get_content(api_url, headers=self.bilibili_headers()) + api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url)) api_playinfo_data = json.loads(api_content) if api_playinfo_data.get('quality'): playinfos.append({'code': 0, 'message': '0', 'ttl': 1, 'data': api_playinfo_data}) @@ -255,17 +257,21 @@ size = self.url_size(baseurl, headers=self.bilibili_headers(referer=self.url)) # find matching audio track - audio_baseurl = playinfo['data']['dash']['audio'][0]['baseUrl'] - for audio in playinfo['data']['dash']['audio']: - if int(audio['id']) == audio_quality: - audio_baseurl = audio['baseUrl'] - break - if not audio_size_cache.get(audio_quality, False): - audio_size_cache[audio_quality] = self.url_size(audio_baseurl, headers=self.bilibili_headers(referer=self.url)) - size += audio_size_cache[audio_quality] - - self.dash_streams[format_id] = {'container': container, 'quality': desc, - 'src': [[baseurl], [audio_baseurl]], 'size': size} + if playinfo['data']['dash']['audio']: + audio_baseurl = playinfo['data']['dash']['audio'][0]['baseUrl'] + for audio in playinfo['data']['dash']['audio']: + if int(audio['id']) == audio_quality: + audio_baseurl = audio['baseUrl'] + break + if not audio_size_cache.get(audio_quality, False): + audio_size_cache[audio_quality] = self.url_size(audio_baseurl, headers=self.bilibili_headers(referer=self.url)) + size += audio_size_cache[audio_quality] + + self.dash_streams[format_id] = {'container': container, 'quality': desc, + 'src': [[baseurl], [audio_baseurl]], 'size': size} + else: + self.dash_streams[format_id] = {'container': container, 'quality': desc, + 'src': [[baseurl]], 'size': size} # get danmaku self.danmaku = get_content('http://comment.bilibili.com/%s.xml' % cid) @@ -289,7 +295,7 @@ cid = initial_state['epInfo']['cid'] playinfos = [] api_url = self.bilibili_bangumi_api(avid, cid, ep_id) - api_content = get_content(api_url, headers=self.bilibili_headers()) + api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url)) api_playinfo = json.loads(api_content) if api_playinfo['code'] == 0: # success playinfos.append(api_playinfo) @@ -298,12 +304,12 @@ return current_quality = api_playinfo['result']['quality'] # get alternative formats from API - for qn in [80, 64, 32, 16]: + for qn in [112, 80, 64, 32, 16]: # automatic format for durl: qn=0 # for dash, qn does not matter if qn != current_quality: api_url = self.bilibili_bangumi_api(avid, cid, ep_id, qn=qn) - api_content = get_content(api_url, headers=self.bilibili_headers()) + api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url)) api_playinfo = json.loads(api_content) if api_playinfo['code'] == 0: # success playinfos.append(api_playinfo) @@ -325,7 +331,7 @@ if 'dash' in playinfo['result']: for video in playinfo['result']['dash']['video']: # playinfo['result']['quality'] does not reflect the correct quality of DASH stream - quality = self.height_to_quality(video['height']) # convert height to quality code + quality = self.height_to_quality(video['height'], video['id']) # convert height to quality code s = self.stream_qualities[quality] format_id = 'dash-' + s['id'] # prefix container = 'mp4' # enforce MP4 container @@ -420,6 +426,98 @@ self.streams['mp4'] = {'container': container, 'size': size, 'src': [playurl]} + + def prepare_by_cid(self,avid,cid,title,html_content,playinfo,playinfo_,url): + #response for interaction video + #主要针对互动视频,使用cid而不是url来相互区分 + + self.stream_qualities = {s['quality']: s for s in self.stream_types} + self.title = title + self.url = url + + current_quality, best_quality = None, None + if playinfo is not None: + current_quality = playinfo['data']['quality'] or None # 0 indicates an error, fallback to None + if 'accept_quality' in playinfo['data'] and playinfo['data']['accept_quality'] != []: + best_quality = playinfo['data']['accept_quality'][0] + playinfos = [] + if playinfo is not None: + playinfos.append(playinfo) + if playinfo_ is not None: + playinfos.append(playinfo_) + # get alternative formats from API + for qn in [80, 64, 32, 16]: + # automatic format for durl: qn=0 + # for dash, qn does not matter + if current_quality is None or qn < current_quality: + api_url = self.bilibili_api(avid, cid, qn=qn) + api_content = get_content(api_url, headers=self.bilibili_headers()) + api_playinfo = json.loads(api_content) + if api_playinfo['code'] == 0: # success + playinfos.append(api_playinfo) + else: + message = api_playinfo['data']['message'] + if best_quality is None or qn <= best_quality: + api_url = self.bilibili_interface_api(cid, qn=qn) + api_content = get_content(api_url, headers=self.bilibili_headers()) + api_playinfo_data = json.loads(api_content) + if api_playinfo_data.get('quality'): + playinfos.append({'code': 0, 'message': '0', 'ttl': 1, 'data': api_playinfo_data}) + if not playinfos: + log.w(message) + # use bilibili error video instead + url = 'https://static.hdslb.com/error.mp4' + _, container, size = url_info(url) + self.streams['flv480'] = {'container': container, 'size': size, 'src': [url]} + return + + for playinfo in playinfos: + quality = playinfo['data']['quality'] + format_id = self.stream_qualities[quality]['id'] + container = self.stream_qualities[quality]['container'].lower() + desc = self.stream_qualities[quality]['desc'] + + if 'durl' in playinfo['data']: + src, size = [], 0 + for durl in playinfo['data']['durl']: + src.append(durl['url']) + size += durl['size'] + self.streams[format_id] = {'container': container, 'quality': desc, 'size': size, 'src': src} + + # DASH formats + if 'dash' in playinfo['data']: + audio_size_cache = {} + for video in playinfo['data']['dash']['video']: + # prefer the latter codecs! + s = self.stream_qualities[video['id']] + format_id = 'dash-' + s['id'] # prefix + container = 'mp4' # enforce MP4 container + desc = s['desc'] + audio_quality = s['audio_quality'] + baseurl = video['baseUrl'] + size = self.url_size(baseurl, headers=self.bilibili_headers(referer=self.url)) + + # find matching audio track + if playinfo['data']['dash']['audio']: + audio_baseurl = playinfo['data']['dash']['audio'][0]['baseUrl'] + for audio in playinfo['data']['dash']['audio']: + if int(audio['id']) == audio_quality: + audio_baseurl = audio['baseUrl'] + break + if not audio_size_cache.get(audio_quality, False): + audio_size_cache[audio_quality] = self.url_size(audio_baseurl, + headers=self.bilibili_headers(referer=self.url)) + size += audio_size_cache[audio_quality] + + self.dash_streams[format_id] = {'container': container, 'quality': desc, + 'src': [[baseurl], [audio_baseurl]], 'size': size} + else: + self.dash_streams[format_id] = {'container': container, 'quality': desc, + 'src': [[baseurl]], 'size': size} + + # get danmaku + self.danmaku = get_content('http://comment.bilibili.com/%s.xml' % cid) + def extract(self, **kwargs): # set UA and referer for downloading headers = self.bilibili_headers(referer=self.url) @@ -474,9 +572,66 @@ initial_state = json.loads(initial_state_text) aid = initial_state['videoData']['aid'] pn = initial_state['videoData']['videos'] - for pi in range(1, pn + 1): - purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi) - self.__class__().download_by_url(purl, **kwargs) + if pn!= len(initial_state['videoData']['pages']):#interaction video 互动视频 + search_node_list = [] + download_cid_set = set([initial_state['videoData']['cid']]) + params = { + 'id': 'cid:{}'.format(initial_state['videoData']['cid']), + 'aid': str(aid) + } + urlcontent = get_content('https://api.bilibili.com/x/player.so?'+parse.urlencode(params), headers=self.bilibili_headers(referer='https://www.bilibili.com/video/av{}'.format(aid))) + graph_version = json.loads(urlcontent[urlcontent.find('<interaction>')+13:urlcontent.find('</interaction>')])['graph_version'] + params = { + 'aid': str(aid), + 'graph_version': graph_version, + 'platform': 'pc', + 'portal': 0, + 'screen': 0, + } + node_info = json.loads(get_content('https://api.bilibili.com/x/stein/nodeinfo?'+parse.urlencode(params))) + + playinfo_text = match1(html_content, r'__playinfo__=(.*?)</script><script>') # FIXME + playinfo = json.loads(playinfo_text) if playinfo_text else None + + html_content_ = get_content(self.url, headers=self.bilibili_headers(cookie='CURRENT_FNVAL=16')) + playinfo_text_ = match1(html_content_, r'__playinfo__=(.*?)</script><script>') # FIXME + playinfo_ = json.loads(playinfo_text_) if playinfo_text_ else None + + self.prepare_by_cid(aid, initial_state['videoData']['cid'], initial_state['videoData']['title'] + ('P{}. {}'.format(1, node_info['data']['title'])),html_content,playinfo,playinfo_,url) + self.extract(**kwargs) + self.download(**kwargs) + for choice in node_info['data']['edges']['choices']: + search_node_list.append(choice['node_id']) + if not choice['cid'] in download_cid_set: + download_cid_set.add(choice['cid']) + self.prepare_by_cid(aid,choice['cid'],initial_state['videoData']['title']+('P{}. {}'.format(len(download_cid_set),choice['option'])),html_content,playinfo,playinfo_,url) + self.extract(**kwargs) + self.download(**kwargs) + while len(search_node_list)>0: + node_id = search_node_list.pop(0) + params.update({'node_id':node_id}) + node_info = json.loads(get_content('https://api.bilibili.com/x/stein/nodeinfo?'+parse.urlencode(params))) + if node_info['data'].__contains__('edges'): + for choice in node_info['data']['edges']['choices']: + search_node_list.append(choice['node_id']) + if not choice['cid'] in download_cid_set: + download_cid_set.add(choice['cid'] ) + self.prepare_by_cid(aid,choice['cid'],initial_state['videoData']['title']+('P{}. {}'.format(len(download_cid_set),choice['option'])),html_content,playinfo,playinfo_,url) + self.extract(**kwargs) + self.download(**kwargs) + else: + playinfo_text = match1(html_content, r'__playinfo__=(.*?)</script><script>') # FIXME + playinfo = json.loads(playinfo_text) if playinfo_text else None + + html_content_ = get_content(self.url, headers=self.bilibili_headers(cookie='CURRENT_FNVAL=16')) + playinfo_text_ = match1(html_content_, r'__playinfo__=(.*?)</script><script>') # FIXME + playinfo_ = json.loads(playinfo_text_) if playinfo_text_ else None + for pi in range(pn): + self.prepare_by_cid(aid,initial_state['videoData']['pages'][pi]['cid'],'%s (P%s. %s)' % (initial_state['videoData']['title'], pi+1, initial_state['videoData']['pages'][pi]['part']),html_content,playinfo,playinfo_,url) + self.extract(**kwargs) + self.download(**kwargs) + # purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi+1) + # self.__class__().download_by_url(purl, **kwargs) elif sort == 'bangumi': initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1355/src/you_get/extractors/kakao.py new/you-get-0.4.1388/src/you_get/extractors/kakao.py --- old/you-get-0.4.1355/src/you_get/extractors/kakao.py 1970-01-01 01:00:00.000000000 +0100 +++ new/you-get-0.4.1388/src/you_get/extractors/kakao.py 2019-12-28 21:35:25.000000000 +0100 @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +from ..common import * +from .universal import * + +__all__ = ['kakao_download'] + + +def kakao_download(url, output_dir='.', info_only=False, **kwargs): + json_request_url = 'https://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?vid={}' + + # in this implementation playlist not supported so use url_without_playlist + # if want to support playlist need to change that + if re.search('playlistId', url): + url = re.search(r"(.+)\?.+?", url).group(1) + + page = get_content(url) + try: + vid = re.search(r"<meta name=\"vid\" content=\"(.+)\">", page).group(1) + title = re.search(r"<meta name=\"title\" content=\"(.+)\">", page).group(1) + + meta_str = get_content(json_request_url.format(vid)) + meta_json = json.loads(meta_str) + + standard_preset = meta_json['output_list']['standard_preset'] + output_videos = meta_json['output_list']['output_list'] + size = '' + if meta_json['svcname'] == 'smr_pip': + for v in output_videos: + if v['preset'] == 'mp4_PIP_SMR_480P': + size = int(v['filesize']) + break + else: + for v in output_videos: + if v['preset'] == standard_preset: + size = int(v['filesize']) + break + + video_url = meta_json['location']['url'] + + print_info(site_info, title, 'mp4', size) + if not info_only: + download_urls([video_url], title, 'mp4', size, output_dir, **kwargs) + except: + universal_download(url, output_dir, merge=kwargs['merge'], info_only=info_only, **kwargs) + + +site_info = "tv.kakao.com" +download = kakao_download +download_playlist = playlist_not_supported('kakao') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1355/src/you_get/extractors/missevan.py new/you-get-0.4.1388/src/you_get/extractors/missevan.py --- old/you-get-0.4.1355/src/you_get/extractors/missevan.py 1970-01-01 01:00:00.000000000 +0100 +++ new/you-get-0.4.1388/src/you_get/extractors/missevan.py 2019-12-28 21:35:25.000000000 +0100 @@ -0,0 +1,361 @@ +""" +MIT License + +Copyright (c) 2019 WaferJay + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" + +import json +import os +import re + +from ..common import get_content, urls_size, log, player, dry_run +from ..extractor import VideoExtractor + +_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 ' \ + '(KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36' + + +class _NoMatchException(Exception): + pass + + +class _Dispatcher(object): + + def __init__(self): + self.entry = [] + + def register(self, patterns, fun): + if not isinstance(patterns, (list, tuple)): + patterns = [patterns] + + patterns = [re.compile(reg) for reg in patterns] + self.entry.append((patterns, fun)) + + def endpoint(self, *patterns): + assert patterns, 'patterns must not be empty' + def _wrap(fun): + self.register(patterns, fun) + return fun + return _wrap + + def test(self, url): + return any(pa.search(url) for pas, _ in self.entry for pa in pas) + + def dispatch(self, url, *args, **kwargs): + + for patterns, fun in self.entry: + + for pa in patterns: + + match = pa.search(url) + if not match: + continue + + kwargs.update(match.groupdict()) + return fun(*args, **kwargs) + + raise _NoMatchException() + +missevan_stream_types = [ + {'id': 'source', 'quality': '源文件', 'url_json_key': 'soundurl', + 'resource_url_fmt': 'sound/{resource_url}'}, + {'id': '320', 'quality': '320 Kbps', 'url_json_key': 'soundurl_64'}, + {'id': '128', 'quality': '128 Kbps', 'url_json_key': 'soundurl_128'}, + {'id': '32', 'quality': '32 Kbps', 'url_json_key': 'soundurl_32'}, + {'id': 'covers', 'desc': '封面图', 'url_json_key': 'cover_image', + 'default_src': 'covers/nocover.png', + 'resource_url_fmt': 'covers/{resource_url}'}, + {'id': 'coversmini', 'desc': '封面缩略图', 'url_json_key': 'cover_image', + 'default_src': 'coversmini/nocover.png', + 'resource_url_fmt': 'coversmini/{resource_url}'} +] + +def _get_resource_uri(data, stream_type): + uri = data[stream_type['url_json_key']] + if not uri: + return stream_type.get('default_src') + + uri_fmt = stream_type.get('resource_url_fmt') + if not uri_fmt: + return uri + return uri_fmt.format(resource_url=uri) + +def is_covers_stream(stream): + stream = stream or '' + return stream.lower() in ('covers', 'coversmini') + +def get_file_extension(file_path, default=''): + _, suffix = os.path.splitext(file_path) + if suffix: + # remove dot + suffix = suffix[1:] + return suffix or default + +def best_quality_stream_id(streams, stream_types): + for stream_type in stream_types: + if streams.get(stream_type['id']): + return stream_type['id'] + + raise AssertionError('no stream selected') + + +class MissEvanWithStream(VideoExtractor): + + name = 'MissEvan' + stream_types = missevan_stream_types + + def __init__(self, *args): + super().__init__(*args) + self.referer = 'https://www.missevan.com/' + self.ua = _UA + + @classmethod + def create(cls, title, streams, *, streams_sorted=None): + obj = cls() + obj.title = title + obj.streams.update(streams) + streams_sorted = streams_sorted or cls._setup_streams_sorted(streams) + obj.streams_sorted.extend(streams_sorted) + return obj + + def set_danmaku(self, danmaku): + self.danmaku = danmaku + return self + + @staticmethod + def _setup_streams_sorted(streams): + streams_sorted = [] + for key, stream in streams.items(): + copy_stream = stream.copy() + copy_stream['id'] = key + streams_sorted.append(copy_stream) + + return streams_sorted + + def download(self, **kwargs): + stream_id = kwargs.get('stream_id') or self.stream_types[0]['id'] + stream = self.streams[stream_id] + if 'size' not in stream: + stream['size'] = urls_size(stream['src']) + + super().download(**kwargs) + + def unsupported_method(self, *args, **kwargs): + raise AssertionError('Unsupported') + + download_by_url = unsupported_method + download_by_vid = unsupported_method + prepare = unsupported_method + extract = unsupported_method + + +class MissEvan(VideoExtractor): + + name = 'MissEvan' + stream_types = missevan_stream_types + + def __init__(self, *args): + super().__init__(*args) + self.referer = 'https://www.missevan.com/' + self.ua = _UA + self.__headers = {'User-Agent': self.ua, 'Referer': self.referer} + + __prepare_dispatcher = _Dispatcher() + + @__prepare_dispatcher.endpoint( + re.compile(r'missevan\.com/sound/(?:player\?.*?id=)?(?P<sid>\d+)', re.I)) + def prepare_sound(self, sid, **kwargs): + json_data = self._get_json(self.url_sound_api(sid)) + sound = json_data['info']['sound'] + + self.title = sound['soundstr'] + if sound.get('need_pay'): + log.e('付费资源无法下载') + return + + if not is_covers_stream(kwargs.get('stream_id')) and not dry_run: + self.danmaku = self._get_content(self.url_danmaku_api(sid)) + + self.streams = self.setup_streams(sound) + + @classmethod + def setup_streams(cls, sound): + streams = {} + + for stream_type in cls.stream_types: + uri = _get_resource_uri(sound, stream_type) + resource_url = cls.url_resource(uri) if uri else None + + if resource_url: + container = get_file_extension(resource_url) + stream_id = stream_type['id'] + streams[stream_id] = {'src': [resource_url], 'container': container} + quality = stream_type.get('quality') + if quality: + streams[stream_id]['quality'] = quality + return streams + + def prepare(self, **kwargs): + if self.vid: + self.prepare_sound(self.vid, **kwargs) + return + + try: + self.__prepare_dispatcher.dispatch(self.url, self, **kwargs) + except _NoMatchException: + log.e('[Error] Unsupported URL pattern.') + exit(1) + + @staticmethod + def download_covers(title, streams, **kwargs): + if not is_covers_stream(kwargs.get('stream_id')) \ + and not kwargs.get('json_output') \ + and not kwargs.get('info_only') \ + and not player: + kwargs['stream_id'] = 'covers' + MissEvanWithStream \ + .create(title, streams) \ + .download(**kwargs) + + _download_playlist_dispatcher = _Dispatcher() + + @_download_playlist_dispatcher.endpoint( + re.compile(r'missevan\.com/album(?:info)?/(?P<aid>\d+)', re.I)) + def download_album(self, aid, **kwargs): + json_data = self._get_json(self.url_album_api(aid)) + album = json_data['info']['album'] + self.title = album['title'] + sounds = json_data['info']['sounds'] + + output_dir = os.path.abspath(kwargs.pop('output_dir', '.')) + output_dir = os.path.join(output_dir, self.title) + kwargs['output_dir'] = output_dir + + for sound in sounds: + sound_title = sound['soundstr'] + if sound.get('need_pay'): + log.w('跳过付费资源: ' + sound_title) + continue + + streams = self.setup_streams(sound) + extractor = MissEvanWithStream.create(sound_title, streams) + if not dry_run: + sound_id = sound['id'] + danmaku = self._get_content(self.url_danmaku_api(sound_id)) + extractor.set_danmaku(danmaku) + extractor.download(**kwargs) + + self.download_covers(sound_title, streams, **kwargs) + + @_download_playlist_dispatcher.endpoint( + re.compile(r'missevan\.com(?:/mdrama)?/drama/(?P<did>\d+)', re.I)) + def download_drama(self, did, **kwargs): + json_data = self._get_json(self.url_drama_api(did)) + + drama = json_data['info']['drama'] + if drama.get('need_pay'): + log.w('该剧集包含付费资源, 付费资源将被跳过') + + self.title = drama['name'] + output_dir = os.path.abspath(kwargs.pop('output_dir', '.')) + output_dir = os.path.join(output_dir, self.title) + kwargs['output_dir'] = output_dir + + episodes = json_data['info']['episodes'] + for each in episodes['episode']: + if each.get('need_pay'): + log.w('跳过付费资源: ' + each['soundstr']) + continue + sound_id = each['sound_id'] + MissEvan().download_by_vid(sound_id, **kwargs) + + def download_playlist_by_url(self, url, **kwargs): + self.url = url + try: + self._download_playlist_dispatcher.dispatch(url, self, **kwargs) + except _NoMatchException: + log.e('[Error] Unsupported URL pattern with --playlist option.') + exit(1) + + def download_by_url(self, url, **kwargs): + if not kwargs.get('playlist') and self._download_playlist_dispatcher.test(url): + log.w('This is an album or drama. (use --playlist option to download all).') + else: + super().download_by_url(url, **kwargs) + + def download(self, **kwargs): + kwargs['keep_obj'] = True # keep the self.streams to download cover + super().download(**kwargs) + self.download_covers(self.title, self.streams, **kwargs) + + def extract(self, **kwargs): + stream_id = kwargs.get('stream_id') + + # fetch all streams size when output info or json + if kwargs.get('info_only') and not stream_id \ + or kwargs.get('json_output'): + + for _, stream in self.streams.items(): + stream['size'] = urls_size(stream['src']) + return + + # fetch size of the selected stream only + if not stream_id: + stream_id = best_quality_stream_id(self.streams, self.stream_types) + + stream = self.streams[stream_id] + if 'size' not in stream: + stream['size'] = urls_size(stream['src']) + + def _get_content(self, url): + return get_content(url, headers=self.__headers) + + def _get_json(self, url): + content = self._get_content(url) + return json.loads(content) + + @staticmethod + def url_album_api(album_id): + return 'https://www.missevan.com/sound' \ + '/soundalllist?albumid=' + str(album_id) + + @staticmethod + def url_sound_api(sound_id): + return 'https://www.missevan.com/sound' \ + '/getsound?soundid=' + str(sound_id) + + @staticmethod + def url_drama_api(drama_id): + return 'https://www.missevan.com/dramaapi' \ + '/getdrama?drama_id=' + str(drama_id) + + @staticmethod + def url_danmaku_api(sound_id): + return 'https://www.missevan.com/sound/getdm?soundid=' + str(sound_id) + + @staticmethod + def url_resource(uri): + return 'https://static.missevan.com/' + uri + +site = MissEvan() +site_info = 'MissEvan.com' +download = site.download_by_url +download_playlist = site.download_playlist_by_url diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1355/src/you_get/extractors/naver.py new/you-get-0.4.1388/src/you_get/extractors/naver.py --- old/you-get-0.4.1355/src/you_get/extractors/naver.py 2019-09-24 13:44:06.000000000 +0200 +++ new/you-get-0.4.1388/src/you_get/extractors/naver.py 2019-12-28 21:35:25.000000000 +0100 @@ -16,15 +16,8 @@ ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}' page = get_content(url) try: - temp = re.search(r"<meta\s+property=\"og:video:url\"\s+content='(.+?)'>", page) - if temp is not None: - og_video_url = temp.group(1) - params_dict = urllib.parse.parse_qs(urllib.parse.urlparse(og_video_url).query) - vid = params_dict['vid'][0] - key = params_dict['outKey'][0] - else: - vid = re.search(r"\"videoId\"\s*:\s*\"(.+?)\"", page).group(1) - key = re.search(r"\"inKey\"\s*:\s*\"(.+?)\"", page).group(1) + vid = re.search(r"\"videoId\"\s*:\s*\"(.+?)\"", page).group(1) + key = re.search(r"\"inKey\"\s*:\s*\"(.+?)\"", page).group(1) meta_str = get_content(ep.format(vid, key)) meta_json = json.loads(meta_str) if 'errorCode' in meta_json: @@ -38,7 +31,7 @@ size = url_size(video_url) print_info(site_info, title, 'mp4', size) if not info_only: - download_urls([video_url], title, 'mp4', size, **kwargs) + download_urls([video_url], title, 'mp4', size, output_dir, **kwargs) except: universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1355/src/you_get/extractors/qq.py new/you-get-0.4.1388/src/you_get/extractors/qq.py --- old/you-get-0.4.1355/src/you_get/extractors/qq.py 2019-09-24 13:44:06.000000000 +0200 +++ new/you-get-0.4.1388/src/you_get/extractors/qq.py 2019-12-28 21:35:25.000000000 +0100 @@ -128,7 +128,7 @@ if 'mp.weixin.qq.com/s' in url: content = get_content(url) - vids = matchall(content, [r'\?vid=(\w+)']) + vids = matchall(content, [r'[?;]vid=(\w+)']) for vid in vids: qq_download_by_vid(vid, vid, output_dir, merge, info_only) return diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1355/src/you_get/extractors/youtube.py new/you-get-0.4.1388/src/you_get/extractors/youtube.py --- old/you-get-0.4.1355/src/you_get/extractors/youtube.py 2019-09-24 13:44:06.000000000 +0200 +++ new/you-get-0.4.1388/src/you_get/extractors/youtube.py 2019-12-28 21:35:25.000000000 +0100 @@ -220,7 +220,10 @@ stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') #stream_list = ytplayer_config['args']['adaptive_fmts'].split(',') except: - stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',') + if 'url_encoded_fmt_stream_map' not in video_info: + stream_list = json.loads(video_info['player_response'][0])['streamingData']['formats'] + else: + stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',') if re.search('([^"]*/base\.js)"', video_page): self.html5player = 'https://www.youtube.com' + re.search('([^"]*/base\.js)"', video_page).group(1) else: @@ -302,19 +305,35 @@ exit(0) for stream in stream_list: - metadata = parse.parse_qs(stream) - stream_itag = metadata['itag'][0] - self.streams[stream_itag] = { - 'itag': metadata['itag'][0], - 'url': metadata['url'][0], - 'sig': metadata['sig'][0] if 'sig' in metadata else None, - 's': metadata['s'][0] if 's' in metadata else None, - 'quality': metadata['quality'][0] if 'quality' in metadata else None, - #'quality': metadata['quality_label'][0] if 'quality_label' in metadata else None, - 'type': metadata['type'][0], - 'mime': metadata['type'][0].split(';')[0], - 'container': mime_to_container(metadata['type'][0].split(';')[0]), - } + if isinstance(stream, str): + metadata = parse.parse_qs(stream) + stream_itag = metadata['itag'][0] + self.streams[stream_itag] = { + 'itag': metadata['itag'][0], + 'url': metadata['url'][0], + 'sig': metadata['sig'][0] if 'sig' in metadata else None, + 's': metadata['s'][0] if 's' in metadata else None, + 'quality': metadata['quality'][0] if 'quality' in metadata else None, + #'quality': metadata['quality_label'][0] if 'quality_label' in metadata else None, + 'type': metadata['type'][0], + 'mime': metadata['type'][0].split(';')[0], + 'container': mime_to_container(metadata['type'][0].split(';')[0]), + } + else: + stream_itag = stream['itag'] + self.streams[stream_itag] = { + 'itag': str(stream['itag']), + 'url': stream['url'] if 'url' in stream else None, + 'sig': None, + 's': None, + 'quality': stream['quality'], + 'type': stream['mimeType'], + 'mime': stream['mimeType'].split(';')[0], + 'container': mime_to_container(stream['mimeType'].split(';')[0]), + } + if 'cipher' in stream: + self.streams[stream_itag].update(dict([(_.split('=')[0], parse.unquote(_.split('=')[1])) + for _ in stream['cipher'].split('&')])) # Prepare caption tracks try: @@ -425,10 +444,38 @@ for i in afmt.split('&')]) for afmt in ytplayer_config['args']['adaptive_fmts'].split(',')] except: - streams = [dict([(i.split('=')[0], - parse.unquote(i.split('=')[1])) - for i in afmt.split('&')]) - for afmt in video_info['adaptive_fmts'][0].split(',')] + if 'adaptive_fmts' in video_info: + streams = [dict([(i.split('=')[0], + parse.unquote(i.split('=')[1])) + for i in afmt.split('&')]) + for afmt in video_info['adaptive_fmts'][0].split(',')] + else: + streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats'] + for stream in streams: + stream['itag'] = str(stream['itag']) + if 'qualityLabel' in stream: + stream['quality_label'] = stream['qualityLabel'] + del stream['qualityLabel'] + if 'width' in stream: + stream['size'] = '{}x{}'.format(stream['width'], stream['height']) + del stream['width'] + del stream['height'] + stream['type'] = stream['mimeType'] + stream['clen'] = stream['contentLength'] + stream['init'] = '{}-{}'.format( + stream['initRange']['start'], + stream['initRange']['end']) + stream['index'] = '{}-{}'.format( + stream['indexRange']['start'], + stream['indexRange']['end']) + del stream['mimeType'] + del stream['contentLength'] + del stream['initRange'] + del stream['indexRange'] + if 'cipher' in stream: + stream.update(dict([(_.split('=')[0], parse.unquote(_.split('=')[1])) + for _ in stream['cipher'].split('&')])) + del stream['cipher'] for stream in streams: # get over speed limiting stream['url'] += '&ratebypass=yes' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1355/src/you_get/version.py new/you-get-0.4.1388/src/you_get/version.py --- old/you-get-0.4.1355/src/you_get/version.py 2019-09-24 13:44:06.000000000 +0200 +++ new/you-get-0.4.1388/src/you_get/version.py 2019-12-28 21:35:25.000000000 +0100 @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1355' +__version__ = '0.4.1388' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1355/tests/test.py new/you-get-0.4.1388/tests/test.py --- old/you-get-0.4.1355/tests/test.py 2019-09-24 13:44:06.000000000 +0200 +++ new/you-get-0.4.1388/tests/test.py 2019-12-28 21:35:25.000000000 +0100 @@ -6,8 +6,8 @@ imgur, magisto, youtube, - bilibili, - toutiao, + missevan, + acfun ) @@ -22,6 +22,13 @@ info_only=True ) + def test_missevan(self): + missevan.download('https://m.missevan.com/sound/1285995', info_only=True) + missevan.download_playlist( + 'https://www.missevan.com/mdrama/drama/24130', info_only=True) + missevan.download_playlist( + 'https://www.missevan.com/albuminfo/203090', info_only=True) + def test_youtube(self): youtube.download( 'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True @@ -32,6 +39,8 @@ info_only=True ) + def test_acfun(self): + acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True) if __name__ == '__main__': unittest.main()
