Hello community, here is the log from the commit of package you-get for openSUSE:Factory checked in at 2018-12-03 10:10:35 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/you-get (Old) and /work/SRC/openSUSE:Factory/.you-get.new.19453 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "you-get" Mon Dec 3 10:10:35 2018 rev:15 rq:653197 version:0.4.1181 Changes: -------- --- /work/SRC/openSUSE:Factory/you-get/you-get.changes 2018-11-09 07:54:17.131720342 +0100 +++ /work/SRC/openSUSE:Factory/.you-get.new.19453/you-get.changes 2018-12-03 10:10:55.543679324 +0100 @@ -1,0 +2,5 @@ +Sat Dec 1 06:54:44 UTC 2018 - [email protected] + +- Update to version 0.4.1181 (no changelog supplied) + +------------------------------------------------------------------- Old: ---- you-get-0.4.1167.tar.gz New: ---- you-get-0.4.1181.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ you-get.spec ++++++ --- /var/tmp/diff_new_pack.oKUsAh/_old 2018-12-03 10:10:56.751678207 +0100 +++ /var/tmp/diff_new_pack.oKUsAh/_new 2018-12-03 10:10:56.751678207 +0100 @@ -17,7 +17,7 @@ Name: you-get -Version: 0.4.1167 +Version: 0.4.1181 Release: 0 Summary: Dumb downloader that scrapes the web License: MIT ++++++ you-get-0.4.1167.tar.gz -> you-get-0.4.1181.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/common.py new/you-get-0.4.1181/src/you_get/common.py --- old/you-get-0.4.1167/src/you_get/common.py 2018-11-07 16:59:58.000000000 +0100 +++ new/you-get-0.4.1181/src/you_get/common.py 2018-11-30 21:51:11.000000000 +0100 @@ -102,6 +102,7 @@ 'soundcloud' : 'soundcloud', 'ted' : 'ted', 'theplatform' : 'theplatform', + 'tiktok' : 'tiktok', 'tucao' : 'tucao', 'tudou' : 'tudou', 'tumblr' : 'tumblr', @@ -1575,9 +1576,9 @@ url = 'https://www.google.com/search?tbm=vid&q=%s' % parse.quote(keywords) page = get_content(url, headers=fake_headers) videos = re.findall( - r'<a href="(https?://[^"]+)" onmousedown="[^"]+">([^<]+)<', page + r'<a href="(https?://[^"]+)" onmousedown="[^"]+"><h3 class="[^"]*">([^<]+)<', page ) - vdurs = re.findall(r'<span class="vdur _dwc">([^<]+)<', page) + vdurs = re.findall(r'<span class="vdur[^"]*">([^<]+)<', page) durs = [r1(r'(\d+:\d+)', unescape_html(dur)) for dur in vdurs] print('Google Videos search:') for v in zip(videos, durs): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/extractor.py new/you-get-0.4.1181/src/you_get/extractor.py --- old/you-get-0.4.1167/src/you_get/extractor.py 2018-11-07 16:59:58.000000000 +0100 +++ new/you-get-0.4.1181/src/you_get/extractor.py 2018-11-30 21:51:11.000000000 +0100 @@ -211,7 +211,7 @@ ext = self.dash_streams[stream_id]['container'] total_size = self.dash_streams[stream_id]['size'] - if ext == 'm3u8': + if ext == 'm3u8' or ext == 'm4a': ext = 'mp4' if not urls: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/extractors/__init__.py new/you-get-0.4.1181/src/you_get/extractors/__init__.py --- old/you-get-0.4.1167/src/you_get/extractors/__init__.py 2018-11-07 16:59:58.000000000 +0100 +++ new/you-get-0.4.1181/src/you_get/extractors/__init__.py 2018-11-30 21:51:11.000000000 +0100 @@ -67,6 +67,7 @@ from .soundcloud import * from .suntv import * from .theplatform import * +from .tiktok import * from .tucao import * from .tudou import * from .tumblr import * @@ -88,4 +89,4 @@ from .khan import * from .zhanqi import * from .kuaishou import * -from .zhibo import * \ No newline at end of file +from .zhibo import * diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/extractors/acfun.py new/you-get-0.4.1181/src/you_get/extractors/acfun.py --- old/you-get-0.4.1167/src/you_get/extractors/acfun.py 2018-11-07 16:59:58.000000000 +0100 +++ new/you-get-0.4.1181/src/you_get/extractors/acfun.py 2018-11-30 21:51:11.000000000 +0100 @@ -85,9 +85,13 @@ _, _, seg_size = url_info(url) size += seg_size #fallback to flvhd is not quite possible - print_info(site_info, title, 'mp4', size) + if re.search(r'fid=[0-9A-Z\-]*.flv', preferred[0][0]): + ext = 'flv' + else: + ext = 'mp4' + print_info(site_info, title, ext, size) if not info_only: - download_urls(preferred[0], title, 'mp4', size, output_dir=output_dir, merge=merge) + download_urls(preferred[0], title, ext, size, output_dir=output_dir, merge=merge) else: raise NotImplementedError(sourceType) @@ -105,27 +109,42 @@ pass def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - assert re.match(r'http://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url) - html = get_content(url) + assert re.match(r'http://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', url) - title = r1(r'data-title="([^"]+)"', html) + if re.match(r'http://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url): + html = get_content(url) + title = r1(r'data-title="([^"]+)"', html) + if match1(url, r'_(\d+)$'): # current P + title = title + " " + r1(r'active">([^<]*)', html) + vid = r1('data-vid="(\d+)"', html) + up = r1('data-name="([^"]+)"', html) + # bangumi + elif re.match("http://[^\.]*\.*acfun\.[^\.]+/bangumi/ab(\d+)", url): + html = get_content(url) + title = match1(html, r'"newTitle"\s*:\s*"([^"]+)"') + if match1(url, r'_(\d+)$'): # current P + title = title + " " + r1(r'active">([^<]*)', html) + vid = match1(html, r'videoId="(\d+)"') + up = "acfun" + else: + raise NotImplemented + + assert title and vid title = unescape_html(title) title = escape_file_path(title) - assert title - if match1(url, r'_(\d+)$'): # current P - title = title + " " + r1(r'active">([^<]*)', html) - - vid = r1('data-vid="(\d+)"', html) - up = r1('data-name="([^"]+)"', html) p_title = r1('active">([^<]+)', html) title = '%s (%s)' % (title, up) - if p_title: title = '%s - %s' % (title, p_title) + if p_title: + title = '%s - %s' % (title, p_title) + + acfun_download_by_vid(vid, title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) + site_info = "AcFun.tv" download = acfun_download download_playlist = playlist_not_supported('acfun') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/extractors/bilibili.py new/you-get-0.4.1181/src/you_get/extractors/bilibili.py --- old/you-get-0.4.1167/src/you_get/extractors/bilibili.py 2018-11-07 16:59:58.000000000 +0100 +++ new/you-get-0.4.1181/src/you_get/extractors/bilibili.py 2018-11-30 21:51:11.000000000 +0100 @@ -130,14 +130,13 @@ m = re.search(r'<h1.*?>(.*?)</h1>', self.page) or re.search(r'<h1 title="([^"]+)">', self.page) if m is not None: self.title = m.group(1) - s = re.search(r'<span>([^<]+)</span>', m.group(1)) + s = re.search(r'<span.*?>([^<]+)</span>', m.group(1)) if s: self.title = unescape_html(s.group(1)) if self.title is None: m = re.search(r'property="og:title" content="([^"]+)"', self.page) if m is not None: self.title = m.group(1) - if 'subtitle' in kwargs: subtitle = kwargs['subtitle'] self.title = '{} {}'.format(self.title, subtitle) @@ -162,6 +161,8 @@ self.live_entry(**kwargs) elif 'vc.bilibili.com' in self.url: self.vc_entry(**kwargs) + elif 'audio/au' in self.url: + self.audio_entry(**kwargs) else: self.entry(**kwargs) @@ -173,6 +174,30 @@ self.title = page_list[0]['pagename'] self.download_by_vid(page_list[0]['cid'], True, bangumi_movie=True, **kwargs) + def audio_entry(self, **kwargs): + assert re.match(r'https?://www.bilibili.com/audio/au\d+', self.url) + patt = r"(\d+)" + audio_id = re.search(patt, self.url).group(1) + audio_info_url = \ + 'https://www.bilibili.com/audio/music-service-c/web/song/info?sid={}'.format(audio_id) + audio_info_response = json.loads(get_content(audio_info_url)) + if audio_info_response['msg'] != 'success': + log.wtf('fetch audio information failed!') + sys.exit(2) + self.title = audio_info_response['data']['title'] + # TODO:there is no quality option for now + audio_download_url = \ + 'https://www.bilibili.com/audio/music-service-c/web/url?sid={}&privilege=2&quality=2'.format(audio_id) + audio_download_response = json.loads(get_content(audio_download_url)) + if audio_download_response['msg'] != 'success': + log.wtf('fetch audio resource failed!') + sys.exit(2) + self.streams['mp4'] = {} + self.streams['mp4']['src'] = [audio_download_response['data']['cdns'][0]] + self.streams['mp4']['container'] = 'm4a' + self.streams['mp4']['size'] = audio_download_response['data']['size'] + + def entry(self, **kwargs): # tencent player tc_flashvars = re.search(r'"bili-cid=\d+&bili-aid=\d+&vid=([^"]+)"', self.page) @@ -265,22 +290,9 @@ episode_id = frag else: episode_id = re.search(r'first_ep_id\s*=\s*"(\d+)"', self.page) or re.search(r'\/ep(\d+)', self.url).group(1) - # cont = post_content('http://bangumi.bilibili.com/web_api/get_source', post_data=dict(episode_id=episode_id)) - # cid = json.loads(cont)['result']['cid'] - cont = get_content('http://bangumi.bilibili.com/web_api/episode/{}.json'.format(episode_id)) - ep_info = json.loads(cont)['result']['currentEpisode'] - - bangumi_data = get_bangumi_info(str(ep_info['seasonId'])) - bangumi_payment = bangumi_data.get('payment') - if bangumi_payment and bangumi_payment['price'] != '0': - log.w("It's a paid item") - # ep_ids = collect_bangumi_epids(bangumi_data) - - index_title = ep_info['indexTitle'] - long_title = ep_info['longTitle'].strip() - cid = ep_info['danmaku'] - - self.title = '{} [{} {}]'.format(self.title, index_title, long_title) + data = json.loads(re.search(r'__INITIAL_STATE__=(.+);\(function', self.page).group(1)) + cid = data['epInfo']['cid'] + # index_title = data['epInfo']['index_title'] self.download_by_vid(cid, bangumi=True, **kwargs) @@ -383,7 +395,79 @@ else: log.wtf("Fail to parse the fav title" + url, "") +def download_music_from_favlist(url, page, **kwargs): + m = re.search(r'https?://www.bilibili.com/audio/mycollection/(\d+)', url) + if m is not None: + sid = m.group(1) + json_result = json.loads(get_content("https://www.bilibili.com/audio/music-service-c/web/song/of-coll?" + "sid={}&pn={}&ps=100".format(sid, page))) + if json_result['msg'] == 'success': + music_list = json_result['data']['data'] + music_count = len(music_list) + for i in range(music_count): + audio_id = music_list[i]['id'] + audio_title = music_list[i]['title'] + audio_url = "https://www.bilibili.com/audio/au{}".format(audio_id) + print("Start downloading music ", audio_title) + Bilibili().download_by_url(audio_url, **kwargs) + if page < json_result['data']['pageCount']: + page += 1 + download_music_from_favlist(url, page, **kwargs) + else: + log.wtf("Fail to get music list of page " + json_result) + sys.exit(2) + else: + log.wtf("Fail to parse the sid from " + url, "") + +def download_video_from_totallist(url, page, **kwargs): + # the url has format: https://space.bilibili.com/64169458/#/video + m = re.search(r'space\.bilibili\.com/(\d+)/.*?video', url) + mid = "" + if m is not None: + mid = m.group(1) + jsonresult = json.loads(get_content("https://space.bilibili.com/ajax/member/getSubmitVideos?mid={}&pagesize=100&tid=0&page={}&keyword=&order=pubdate&jsonp=jsonp".format(mid, page))) + if jsonresult['status']: + videos = jsonresult['data']['vlist'] + videocount = len(videos) + for i in range(videocount): + videoid = videos[i]["aid"] + videotitle = videos[i]["title"] + videourl = "https://www.bilibili.com/video/av{}".format(videoid) + print("Start downloading ", videotitle, " video ", videotitle) + Bilibili().download_by_url(videourl, subtitle=videotitle, **kwargs) + if page < jsonresult['data']['pages']: + page += 1 + download_video_from_totallist(url, page, **kwargs) + else: + log.wtf("Fail to get the files of page " + jsonresult) + sys.exit(2) + + else: + log.wtf("Fail to parse the video title" + url, "") +def download_music_from_totallist(url, page, **kwargs): + m = re.search(r'https?://www.bilibili.com/audio/am(\d+)\?type=\d', url) + if m is not None: + sid = m.group(1) + json_result = json.loads(get_content("https://www.bilibili.com/audio/music-service-c/web/song/of-menu?" + "sid={}&pn={}&ps=100".format(sid, page))) + if json_result['msg'] == 'success': + music_list = json_result['data']['data'] + music_count = len(music_list) + for i in range(music_count): + audio_id = music_list[i]['id'] + audio_title = music_list[i]['title'] + audio_url = "https://www.bilibili.com/audio/au{}".format(audio_id) + print("Start downloading music ",audio_title) + Bilibili().download_by_url(audio_url, **kwargs) + if page < json_result['data']['pageCount']: + page += 1 + download_music_from_totallist(url, page, **kwargs) + else: + log.wtf("Fail to get music list of page " + json_result) + sys.exit(2) + else: + log.wtf("Fail to parse the sid from " + url, "") def bilibili_download_playlist_by_url(url, **kwargs): url = url_locations([url], faker=True)[0] @@ -403,6 +487,12 @@ elif 'favlist' in url: # this a fav list folder download_video_from_favlist(url, **kwargs) + elif re.match(r'https?://space.bilibili.com/\d+/#/video', url): + download_video_from_totallist(url, 1, **kwargs) + elif re.match(r'https://www.bilibili.com/audio/mycollection/\d+', url): + download_music_from_favlist(url, 1, **kwargs) + elif re.match(r'https?://www.bilibili.com/audio/am\d+\?type=\d', url): + download_music_from_totallist(url, 1, **kwargs) else: aid = re.search(r'av(\d+)', url).group(1) page_list = json.loads(get_content('http://www.bilibili.com/widget/getPageList?aid={}'.format(aid))) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/extractors/iwara.py new/you-get-0.4.1181/src/you_get/extractors/iwara.py --- old/you-get-0.4.1167/src/you_get/extractors/iwara.py 2018-11-07 16:59:58.000000000 +0100 +++ new/you-get-0.4.1181/src/you_get/extractors/iwara.py 2018-11-30 21:51:11.000000000 +0100 @@ -17,20 +17,20 @@ def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs): global headers - video_hash=match1(url, r'http://\w+.iwara.tv/videos/(\w+)') - video_url=match1(url, r'(http://\w+.iwara.tv)/videos/\w+') - html = get_content(url,headers=headers) + video_hash = match1(url, r'https?://\w+.iwara.tv/videos/(\w+)') + video_url = match1(url, r'(https?://\w+.iwara.tv)/videos/\w+') + html = get_content(url, headers=headers) title = r1(r'<title>(.*)</title>', html) - api_url=video_url+'/api/video/'+video_hash - content=get_content(api_url,headers=headers) - data=json.loads(content) - type,ext,size=url_info(data[0]['uri'], headers=headers) - down_urls=data[0]['uri'] - print_info(down_urls,title+data[0]['resolution'],type,size) + api_url = video_url + '/api/video/' + video_hash + content = get_content(api_url, headers=headers) + data = json.loads(content) + down_urls = 'https:' + data[0]['uri'] + type, ext, size = url_info(down_urls, headers=headers) + print_info(site_info, title+data[0]['resolution'], type, size) if not info_only: - download_urls([down_urls], title, ext, size, output_dir, merge = merge,headers=headers) + download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers) -site_info = "iwara" +site_info = "Iwara" download = iwara_download download_playlist = playlist_not_supported('iwara') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/extractors/tiktok.py new/you-get-0.4.1181/src/you_get/extractors/tiktok.py --- old/you-get-0.4.1167/src/you_get/extractors/tiktok.py 1970-01-01 01:00:00.000000000 +0100 +++ new/you-get-0.4.1181/src/you_get/extractors/tiktok.py 2018-11-30 21:51:11.000000000 +0100 @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +__all__ = ['tiktok_download'] + +from ..common import * + +def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + html = get_html(url) + title = r1(r'<title>(.*?)</title>', html) + video_id = r1(r'/video/(\d+)', url) or r1(r'musical\?id=(\d+)', html) + title = '%s [%s]' % (title, video_id) + dataText = r1(r'var data = \[(.*)\] ', html) or r1(r'var data = (\{.*\})', html) + data = json.loads(dataText) + source = 'http:' + data['video']['play_addr']['url_list'][0] + mime, ext, size = url_info(source) + + print_info(site_info, title, mime, size) + if not info_only: + download_urls([source], title, ext, size, output_dir, merge=merge) + +site_info = "TikTok.com" +download = tiktok_download +download_playlist = playlist_not_supported('tiktok') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/extractors/youku.py new/you-get-0.4.1181/src/you_get/extractors/youku.py --- old/you-get-0.4.1167/src/you_get/extractors/youku.py 2018-11-07 16:59:58.000000000 +0100 +++ new/you-get-0.4.1181/src/you_get/extractors/youku.py 2018-11-30 21:51:11.000000000 +0100 @@ -78,7 +78,7 @@ self.api_error_code = None self.api_error_msg = None - self.ccode = '0516' + self.ccode = '0590' # Found in http://g.alicdn.com/player/ykplayer/0.5.64/youku-player.min.js # grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/extractors/youtube.py new/you-get-0.4.1181/src/you_get/extractors/youtube.py --- old/you-get-0.4.1167/src/you_get/extractors/youtube.py 2018-11-07 16:59:58.000000000 +0100 +++ new/you-get-0.4.1181/src/you_get/extractors/youtube.py 2018-11-30 21:51:11.000000000 +0100 @@ -144,7 +144,10 @@ for video in videos: vid = parse_query_param(video, 'v') index = parse_query_param(video, 'index') - self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs) + try: + self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs) + except: + pass def prepare(self, **kwargs): assert self.url or self.vid @@ -160,7 +163,8 @@ ytplayer_config = None if 'status' not in video_info: - log.wtf('[Failed] Unknown status.') + log.wtf('[Failed] Unknown status.', exit_code=None) + raise elif video_info['status'] == ['ok']: if 'use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']: self.title = parse.unquote_plus(video_info['title'][0]) @@ -192,7 +196,8 @@ ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+});ytplayer', video_page).group(1)) except: msg = re.search('class="message">([^<]+)<', video_page).group(1) - log.wtf('[Failed] "%s"' % msg.strip()) + log.wtf('[Failed] "%s"' % msg.strip(), exit_code=None) + raise if 'title' in ytplayer_config['args']: # 150 Restricted from playback on certain sites @@ -201,18 +206,22 @@ self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js'] stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') else: - log.wtf('[Error] The uploader has not made this video available in your country.') + log.wtf('[Error] The uploader has not made this video available in your country.', exit_code=None) + raise #self.title = re.search('<meta name="title" content="([^"]+)"', video_page).group(1) #stream_list = [] elif video_info['errorcode'] == ['100']: - log.wtf('[Failed] This video does not exist.', exit_code=int(video_info['errorcode'][0])) + log.wtf('[Failed] This video does not exist.', exit_code=None) #int(video_info['errorcode'][0]) + raise else: - log.wtf('[Failed] %s' % video_info['reason'][0], exit_code=int(video_info['errorcode'][0])) + log.wtf('[Failed] %s' % video_info['reason'][0], exit_code=None) #int(video_info['errorcode'][0]) + raise else: - log.wtf('[Failed] Invalid status.') + log.wtf('[Failed] Invalid status.', exit_code=None) + raise # YouTube Live if ytplayer_config and (ytplayer_config['args'].get('livestream') == '1' or ytplayer_config['args'].get('live_playback') == '1'): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1167/src/you_get/version.py new/you-get-0.4.1181/src/you_get/version.py --- old/you-get-0.4.1167/src/you_get/version.py 2018-11-07 16:59:58.000000000 +0100 +++ new/you-get-0.4.1181/src/you_get/version.py 2018-11-30 21:51:11.000000000 +0100 @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1167' +__version__ = '0.4.1181'
