Hello community, here is the log from the commit of package you-get for openSUSE:Factory checked in at 2018-05-16 11:42:54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/you-get (Old) and /work/SRC/openSUSE:Factory/.you-get.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "you-get" Wed May 16 11:42:54 2018 rev:6 rq:607687 version:0.4.1077 Changes: -------- --- /work/SRC/openSUSE:Factory/you-get/you-get.changes 2018-04-17 11:18:46.966852637 +0200 +++ /work/SRC/openSUSE:Factory/.you-get.new/you-get.changes 2018-05-16 11:44:10.770651664 +0200 @@ -1,0 +2,5 @@ +Tue May 15 19:47:15 UTC 2018 - [email protected] + +- Update to version 0.4.1077 (no changelog supplied) + +------------------------------------------------------------------- Old: ---- you-get-0.4.1060.tar.gz New: ---- you-get-0.4.1077.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ you-get.spec ++++++ --- /var/tmp/diff_new_pack.Umcdup/_old 2018-05-16 11:44:11.446627058 +0200 +++ /var/tmp/diff_new_pack.Umcdup/_new 2018-05-16 11:44:11.450626912 +0200 @@ -17,7 +17,7 @@ Name: you-get -Version: 0.4.1060 +Version: 0.4.1077 Release: 0 Summary: Dumb downloader that scrapes the web License: MIT ++++++ you-get-0.4.1060.tar.gz -> you-get-0.4.1077.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1060/src/you_get/common.py new/you-get-0.4.1077/src/you_get/common.py --- old/you-get-0.4.1060/src/you_get/common.py 2018-04-15 17:13:08.000000000 +0200 +++ new/you-get-0.4.1077/src/you_get/common.py 2018-05-15 21:22:51.000000000 +0200 @@ -1581,6 +1581,11 @@ domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host assert domain, 'unsupported url: ' + url + # all non-ASCII code points must be quoted (percent-encoded UTF-8) + url = ''.join([ch if ord(ch) in range(128) else parse.quote(ch) for ch in url]) + video_host = r1(r'https?://([^/]+)/', url) + video_url = r1(r'https?://[^/]+(.*)', url) + k = r1(r'([^.]+)', domain) if k in SITES: return ( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1060/src/you_get/extractors/bilibili.py new/you-get-0.4.1077/src/you_get/extractors/bilibili.py --- old/you-get-0.4.1060/src/you_get/extractors/bilibili.py 2018-04-15 17:13:08.000000000 +0200 +++ new/you-get-0.4.1077/src/you_get/extractors/bilibili.py 2018-05-15 21:22:51.000000000 +0200 @@ -28,7 +28,8 @@ live_room_init_api_url = 'https://api.live.bilibili.com/room/v1/Room/room_init?id={}' live_room_info_api_url = 'https://api.live.bilibili.com/room/v1/Room/get_info?room_id={}' - SEC1 = '1c15888dc316e05a15fdd0a02ed6584f' + #SEC1 = '1c15888dc316e05a15fdd0a02ed6584f' + SEC1 = '94aba54af9065f71de72f5508f1cd42e' SEC2 = '9b288147e5474dd2aa67085f716c560d' stream_types = [ {'id': 'hdflv'}, @@ -44,7 +45,7 @@ @staticmethod def bilibili_stream_type(urls): url = urls[0] - if 'hd.flv' in url or '-112.flv' in url: + if 'hd.flv' in url or '-80.flv' in url: return 'hdflv', 'flv' if '-64.flv' in url: return 'flv720', 'flv' @@ -59,7 +60,8 @@ def api_req(self, cid, quality, bangumi, bangumi_movie=False, **kwargs): ts = str(int(time.time())) if not bangumi: - params_str = 'cid={}&player=1&quality={}&ts={}'.format(cid, quality, ts) + #params_str = 'cid={}&player=1&quality={}&ts={}'.format(cid, quality, ts) + params_str = 'appkey=84956560bc028eb7&cid={}&otype=xml&qn={}&quality={}&type='.format(cid, quality, quality) chksum = hashlib.md5(bytes(params_str+self.SEC1, 'utf8')).hexdigest() api_url = self.api_url + params_str + '&sign=' + chksum else: @@ -97,7 +99,7 @@ quality = 'hdflv' if bangumi else 'flv' info_only = kwargs.get('info_only') - for qlt in range(4, -1, -1): + for qlt in [116,112,80,74,64,32,16,15]: api_xml = self.api_req(cid, qlt, bangumi, **kwargs) self.parse_bili_xml(api_xml) if not info_only or stream_id: @@ -128,6 +130,9 @@ m = re.search(r'<h1.*?>(.*?)</h1>', self.page) or re.search(r'<h1 title="([^"]+)">', self.page) if m is not None: self.title = m.group(1) + s = re.search(r'<span>([^<]+)</span>', m.group(1)) + if s: + self.title = unescape_html(s.group(1)) if self.title is None: m = re.search(r'property="og:title" content="([^"]+)"', self.page) if m is not None: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1060/src/you_get/extractors/douyin.py new/you-get-0.4.1077/src/you_get/extractors/douyin.py --- old/you-get-0.4.1060/src/you_get/extractors/douyin.py 2018-04-15 17:13:08.000000000 +0200 +++ new/you-get-0.4.1077/src/you_get/extractors/douyin.py 2018-05-15 21:22:51.000000000 +0200 @@ -7,6 +7,7 @@ url_size, print_info, get_content, + fake_headers, download_urls, playlist_not_supported, ) @@ -16,13 +17,19 @@ def douyin_download_by_url(url, **kwargs): - page_content = get_content(url) + page_content = get_content(url, headers=fake_headers) match_rule = re.compile(r'var data = \[(.*?)\];') video_info = json.loads(match_rule.findall(page_content)[0]) video_url = video_info['video']['play_addr']['url_list'][0] - title = video_info['cha_list'][0]['cha_name'] + # fix: https://www.douyin.com/share/video/6553248251821165832 + # if there is no title, use desc + cha_list = video_info['cha_list'] + if cha_list: + title = cha_list[0]['cha_name'] + else: + title = video_info['desc'] video_format = 'mp4' - size = url_size(video_url) + size = url_size(video_url, faker=True) print_info( site_info='douyin.com', title=title, type=video_format, size=size @@ -30,6 +37,7 @@ if not kwargs['info_only']: download_urls( urls=[video_url], title=title, ext=video_format, total_size=size, + faker=True, **kwargs ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1060/src/you_get/extractors/douyutv.py new/you-get-0.4.1077/src/you_get/extractors/douyutv.py --- old/you-get-0.4.1060/src/you_get/extractors/douyutv.py 2018-04-15 17:13:08.000000000 +0200 +++ new/you-get-0.4.1077/src/you_get/extractors/douyutv.py 2018-05-15 21:22:51.000000000 +0200 @@ -9,6 +9,10 @@ import time import re +headers = { + 'user-agent': 'Mozilla/5.0 (iPad; CPU OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B466 Safari/600.1.4' + } + def douyutv_video_download(url, output_dir='.', merge=True, info_only=False, **kwargs): ep = 'http://vmobile.douyu.com/video/getInfo?vid=' patt = r'show/([0-9A-Za-z]+)' @@ -19,7 +23,7 @@ log.wtf('Unknown url pattern') vid = hit.group(1) - page = get_content(url) + page = get_content(url, headers=headers) hit = re.search(title_patt, page) if hit is None: title = vid @@ -35,21 +39,18 @@ urls = general_m3u8_extractor(m3u8_url) download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs) -def douyutv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): + +def douyutv_download(url, output_dir='.', merge=True, info_only=False, **kwargs): if 'v.douyu.com/show/' in url: douyutv_video_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) return - headers = { - 'user-agent': 'Mozilla/5.0 (iPad; CPU OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B466 Safari/600.1.4' - } - - url = re.sub(r'[w.]*douyu.com','m.douyu.com',url) + url = re.sub(r'[w.]*douyu.com', 'm.douyu.com', url) html = get_content(url, headers) room_id_patt = r'room_id\s*:\s*(\d+),' room_id = match1(html, room_id_patt) if room_id == "0": - room_id = url[url.rfind('/')+1:] + room_id = url[url.rfind('/') + 1:] api_url = "http://www.douyutv.com/api/v1/" args = "room/%s?aid=wp&client_sys=wp&time=%d" % (room_id, int(time.time())) @@ -60,7 +61,7 @@ content = get_content(json_request_url, headers) json_content = json.loads(content) data = json_content['data'] - server_status = json_content.get('error',0) + server_status = json_content.get('error', 0) if server_status is not 0: raise ValueError("Server returned error:%s" % server_status) @@ -73,7 +74,8 @@ print_info(site_info, title, 'flv', float('inf')) if not info_only: - download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir = output_dir, merge = merge) + download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir=output_dir, merge=merge) + site_info = "douyu.com" download = douyutv_download diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1060/src/you_get/extractors/ixigua.py new/you-get-0.4.1077/src/you_get/extractors/ixigua.py --- old/you-get-0.4.1060/src/you_get/extractors/ixigua.py 2018-04-15 17:13:08.000000000 +0200 +++ new/you-get-0.4.1077/src/you_get/extractors/ixigua.py 2018-05-15 21:22:51.000000000 +0200 @@ -1,101 +1,13 @@ #!/usr/bin/env python __all__ = ['ixigua_download', 'ixigua_download_playlist'] -import base64 -import random -import binascii -from ..common import * +from .toutiao import download as toutiao_download +from .toutiao import download_playlist as toutiao_download_playlist -headers = { - 'User-Agent': 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36' - ' (KHTML, like Gecko) Chrome/61.0.3163.100 Mobile Safari/537.36' -} - -def get_r(): - return str(random.random())[2:] - - -def right_shift(val, n): - return val >> n if val >= 0 else (val + 0x100000000) >> n - - -def get_s(text): - """get video info""" - js_data = json.loads(text) - id = js_data['data']['video_id'] - p = get_r() - url = 'http://i.snssdk.com/video/urls/v/1/toutiao/mp4/%s' % id - n = parse.urlparse(url).path + '?r=%s' % p - c = binascii.crc32(n.encode('utf-8')) - s = right_shift(c, 0) - return url + '?r=%s&s=%s' % (p, s), js_data['data']['title'] - - -def get_moment(url, user_id, base_url, video_list): - """Recursively obtaining a video list""" - video_list_data = json.loads(get_content(url, headers=headers)) - if not video_list_data['next']['max_behot_time']: - return video_list - [video_list.append(i["display_url"]) for i in video_list_data["data"]] - max_behot_time = video_list_data['next']['max_behot_time'] - _param = { - 'user_id': user_id, - 'base_url': base_url, - 'video_list': video_list, - 'url': base_url.format(user_id=user_id, max_behot_time=max_behot_time), - } - return get_moment(**_param) - - -def ixigua_download(url, output_dir='.', info_only=False, **kwargs): - """ Download a single video - Sample URL: https://www.ixigua.com/a6487187567887254029/#mid=59051127876 - """ - try: - video_page_id = re.findall('(\d+)', [i for i in url.split('/') if i][3])[0] if 'toutiao.com' in url \ - else re.findall('(\d+)', [i for i in url.split('/') if i][2])[0] - - video_start_info_url = r'https://m.ixigua.com/i{}/info/'.format(video_page_id) - video_info_url, title = get_s(get_content(video_start_info_url, headers=headers or kwargs.get('headers', {}))) - video_info = json.loads(get_content(video_info_url, headers=headers or kwargs.get('headers', {}))) - except Exception: - raise NotImplementedError(url) - try: - video_url = base64.b64decode(video_info["data"]["video_list"]["video_1"]["main_url"]).decode() - except Exception: - raise NotImplementedError(url) - filetype, ext, size = url_info(video_url, headers=headers or kwargs.get('headers', {})) - print_info(site_info, title, filetype, size) - if not info_only: - _param = { - 'output_dir': output_dir, - 'headers': headers or kwargs.get('headers', {}) - } - download_urls([video_url], title, ext, size, **_param) - - -def ixigua_download_playlist(url, output_dir='.', info_only=False, **kwargs): - """Download all video from the user's video list - Sample URL: https://www.ixigua.com/c/user/71141690831/ - """ - if 'user' not in url: - raise NotImplementedError(url) - user_id = url.split('/')[-2] - max_behot_time = 0 - if not user_id: - raise NotImplementedError(url) - base_url = "https://www.ixigua.com/c/user/article/?user_id={user_id}" \ - "&max_behot_time={max_behot_time}&max_repin_time=0&count=20&page_type=0" - _param = { - 'user_id': user_id, - 'base_url': base_url, - 'video_list': [], - 'url': base_url.format(user_id=user_id, max_behot_time=max_behot_time), - } - for i in get_moment(**_param): - ixigua_download(i, output_dir, info_only, **kwargs) +def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + return toutiao_download(url.replace('ixigua', '365yg')) site_info = "ixigua.com" download = ixigua_download -download_playlist = ixigua_download_playlist +download_playlist = toutiao_download_playlist \ No newline at end of file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1060/src/you_get/extractors/qq.py new/you-get-0.4.1077/src/you_get/extractors/qq.py --- old/you-get-0.4.1060/src/you_get/extractors/qq.py 2018-04-15 17:13:08.000000000 +0200 +++ new/you-get-0.4.1077/src/you_get/extractors/qq.py 2018-05-15 21:22:51.000000000 +0200 @@ -18,11 +18,14 @@ host = video_json['vl']['vi'][0]['ul']['ui'][0]['url'] streams = video_json['fl']['fi'] seg_cnt = video_json['vl']['vi'][0]['cl']['fc'] + filename = video_json['vl']['vi'][0]['fn'] if seg_cnt == 0: seg_cnt = 1 + else: + fn_pre, magic_str, video_type = filename.split('.') best_quality = streams[-1]['name'] - part_format_id = streams[-1]['id'] + #part_format_id = streams[-1]['id'] part_urls= [] total_size = 0 @@ -31,7 +34,17 @@ # filename = fn_pre + '.mp4' #else: # filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4' - filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4' + #filename = fn_pre + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4' + + # fix some error cases("check vid&filename failed" and "format invalid") + # https://v.qq.com/x/page/q06058th9ll.html + # https://v.qq.com/x/page/t060789a21e.html + if seg_cnt == 1: + part_format_id = video_json['vl']['vi'][0]['cl']['keyid'].split('.')[-1] + else: + part_format_id = video_json['vl']['vi'][0]['cl']['ci'][part - 1]['keyid'].split('.')[1] + filename = '.'.join([fn_pre, magic_str, str(part), video_type]) + key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format={}&vid={}&filename={}&appver=3.2.19.333".format(part_format_id, vid, filename) part_info = get_content(key_api) key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1060/src/you_get/extractors/tumblr.py new/you-get-0.4.1077/src/you_get/extractors/tumblr.py --- old/you-get-0.4.1060/src/you_get/extractors/tumblr.py 2018-04-15 17:13:08.000000000 +0200 +++ new/you-get-0.4.1077/src/you_get/extractors/tumblr.py 2018-05-15 21:22:51.000000000 +0200 @@ -70,6 +70,11 @@ real_url = r1(r'<source src="([^"]*)"', html) if not real_url: iframe_url = r1(r'<[^>]+tumblr_video_container[^>]+><iframe[^>]+src=[\'"]([^\'"]*)[\'"]', html) + + if iframe_url is None: + universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs) + return + if iframe_url: iframe_html = get_content(iframe_url, headers=fake_headers) real_url = r1(r'<video[^>]*>[\n ]*<source[^>]+src=[\'"]([^\'"]*)[\'"]', iframe_html) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1060/src/you_get/extractors/twitter.py new/you-get-0.4.1077/src/you_get/extractors/twitter.py --- old/you-get-0.4.1060/src/you_get/extractors/twitter.py 2018-04-15 17:13:08.000000000 +0200 +++ new/you-get-0.4.1077/src/you_get/extractors/twitter.py 2018-05-15 21:22:51.000000000 +0200 @@ -18,6 +18,17 @@ if re.match(r'https?://mobile', url): # normalize mobile URL url = 'https://' + match1(url, r'//mobile\.(.+)') + if re.match(r'https?://twitter\.com/i/moments/', url): # moments + html = get_html(url) + paths = re.findall(r'data-permalink-path="([^"]+)"', html) + for path in paths: + twitter_download('https://twitter.com' + path, + output_dir=output_dir, + merge=merge, + info_only=info_only, + **kwargs) + return + html = get_html(url) screen_name = r1(r'data-screen-name="([^"]*)"', html) or \ r1(r'<meta name="twitter:title" content="([^"]*)"', html) @@ -58,7 +69,10 @@ url = r1(r'<meta\s*property="og:video:url"\s*content="([^"]+)"', html) if not url: url = 'https://twitter.com/i/videos/%s' % item_id - html = get_content(url) + try: + html = get_content(url) + except: + return data_config = r1(r'data-config="([^"]*)"', html) or \ r1(r'data-player-config="([^"]*)"', html) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1060/src/you_get/extractors/universal.py new/you-get-0.4.1077/src/you_get/extractors/universal.py --- old/you-get-0.4.1060/src/you_get/extractors/universal.py 2018-04-15 17:13:08.000000000 +0200 +++ new/you-get-0.4.1077/src/you_get/extractors/universal.py 2018-05-15 21:22:51.000000000 +0200 @@ -111,16 +111,25 @@ for candy in candies: try: - mime, ext, size = url_info(candy['url'], faker=True) - if not size: size = float('Int') + try: + mime, ext, size = url_info(candy['url'], faker=False) + assert size + except: + mime, ext, size = url_info(candy['url'], faker=True) + if not size: size = float('Inf') except: continue else: print_info(site_info, candy['title'], ext, size) if not info_only: - download_urls([candy['url']], candy['title'], ext, size, - output_dir=output_dir, merge=merge, - faker=True) + try: + download_urls([candy['url']], candy['title'], ext, size, + output_dir=output_dir, merge=merge, + faker=False) + except: + download_urls([candy['url']], candy['title'], ext, size, + output_dir=output_dir, merge=merge, + faker=True) return else: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1060/src/you_get/extractors/youku.py new/you-get-0.4.1077/src/you_get/extractors/youku.py --- old/you-get-0.4.1060/src/you_get/extractors/youku.py 2018-04-15 17:13:08.000000000 +0200 +++ new/you-get-0.4.1077/src/you_get/extractors/youku.py 2018-05-15 21:22:51.000000000 +0200 @@ -78,7 +78,7 @@ self.api_error_code = None self.api_error_msg = None - self.ccode = '0502' + self.ccode = '0510' # Found in http://g.alicdn.com/player/ykplayer/0.5.28/youku-player.min.js # grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1060/src/you_get/version.py new/you-get-0.4.1077/src/you_get/version.py --- old/you-get-0.4.1060/src/you_get/version.py 2018-04-15 17:13:08.000000000 +0200 +++ new/you-get-0.4.1077/src/you_get/version.py 2018-05-15 21:22:51.000000000 +0200 @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1060' +__version__ = '0.4.1077'
