Hello community, here is the log from the commit of package you-get for openSUSE:Factory checked in at 2020-10-28 10:00:23 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/you-get (Old) and /work/SRC/openSUSE:Factory/.you-get.new.3463 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "you-get" Wed Oct 28 10:00:23 2020 rev:33 rq:844456 version:0.4.1475 Changes: -------- --- /work/SRC/openSUSE:Factory/you-get/you-get.changes 2020-07-20 21:03:06.521198610 +0200 +++ /work/SRC/openSUSE:Factory/.you-get.new.3463/you-get.changes 2020-10-28 10:01:13.143263839 +0100 @@ -1,0 +2,5 @@ +Tue Oct 27 19:26:44 UTC 2020 - [email protected] + +- Update to version 0.4.1475 (no changelog supplied) + +------------------------------------------------------------------- Old: ---- you-get-0.4.1456.tar.gz New: ---- you-get-0.4.1475.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ you-get.spec ++++++ --- /var/tmp/diff_new_pack.PYtPE6/_old 2020-10-28 10:01:16.319266061 +0100 +++ /var/tmp/diff_new_pack.PYtPE6/_new 2020-10-28 10:01:16.323266064 +0100 @@ -17,7 +17,7 @@ Name: you-get -Version: 0.4.1456 +Version: 0.4.1475 Release: 0 Summary: Dumb downloader that scrapes the web License: MIT ++++++ you-get-0.4.1456.tar.gz -> you-get-0.4.1475.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1456/src/you_get/common.py new/you-get-0.4.1475/src/you_get/common.py --- old/you-get-0.4.1456/src/you_get/common.py 2020-07-19 16:17:28.000000000 +0200 +++ new/you-get-0.4.1475/src/you_get/common.py 2020-10-27 18:19:28.000000000 +0100 @@ -1422,12 +1422,27 @@ def set_socks_proxy(proxy): try: import socks - socks_proxy_addrs = proxy.split(':') - socks.set_default_proxy( - socks.SOCKS5, - socks_proxy_addrs[0], - int(socks_proxy_addrs[1]) - ) + if '@' in proxy: + proxy_info = proxy.split("@") + socks_proxy_addrs = proxy_info[1].split(':') + socks_proxy_auth = proxy_info[0].split(":") + print(socks_proxy_auth[0]+" "+socks_proxy_auth[1]+" "+socks_proxy_addrs[0]+" "+socks_proxy_addrs[1]) + socks.set_default_proxy( + socks.SOCKS5, + socks_proxy_addrs[0], + int(socks_proxy_addrs[1]), + True, + socks_proxy_auth[0], + socks_proxy_auth[1] + ) + else: + socks_proxy_addrs = proxy.split(':') + print(socks_proxy_addrs[0]+" "+socks_proxy_addrs[1]) + socks.set_default_proxy( + socks.SOCKS5, + socks_proxy_addrs[0], + int(socks_proxy_addrs[1]), + ) socket.socket = socks.socksocket def getaddrinfo(*args): @@ -1565,7 +1580,7 @@ '--no-proxy', action='store_true', help='Never use a proxy' ) proxy_grp.add_argument( - '-s', '--socks-proxy', metavar='HOST:PORT', + '-s', '--socks-proxy', metavar='HOST:PORT or USERNAME:PASSWORD@HOST:PORT', help='Use an SOCKS5 proxy for downloading' ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1456/src/you_get/extractors/acfun.py new/you-get-0.4.1475/src/you_get/extractors/acfun.py --- old/you-get-0.4.1456/src/you_get/extractors/acfun.py 2020-07-19 16:17:28.000000000 +0200 +++ new/you-get-0.4.1475/src/you_get/extractors/acfun.py 2020-10-27 18:19:28.000000000 +0100 @@ -1,175 +1,213 @@ #!/usr/bin/env python -__all__ = ['acfun_download'] - from ..common import * +from ..extractor import VideoExtractor + +class AcFun(VideoExtractor): + name = "AcFun" + + stream_types = [ + {'id': '2160P', 'qualityType': '2160p'}, + {'id': '1080P60', 'qualityType': '1080p60'}, + {'id': '720P60', 'qualityType': '720p60'}, + {'id': '1080P+', 'qualityType': '1080p+'}, + {'id': '1080P', 'qualityType': '1080p'}, + {'id': '720P', 'qualityType': '720p'}, + {'id': '540P', 'qualityType': '540p'}, + {'id': '360P', 'qualityType': '360p'} + ] + + def prepare(self, **kwargs): + assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', self.url) + + if re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', self.url): + html = get_content(self.url, headers=fake_headers) + json_text = match1(html, r"(?s)videoInfo\s*=\s*(\{.*?\});") + json_data = json.loads(json_text) + vid = json_data.get('currentVideoInfo').get('id') + up = json_data.get('user').get('name') + self.title = json_data.get('title') + video_list = json_data.get('videoList') + if len(video_list) > 1: + self.title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0] + currentVideoInfo = json_data.get('currentVideoInfo') + + elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", self.url): + html = get_content(self.url, headers=fake_headers) + tag_script = match1(html, r'<script>\s*window\.pageInfo([^<]+)</script>') + json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1] + json_data = json.loads(json_text) + self.title = json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title'] + vid = str(json_data['videoId']) + up = "acfun" + currentVideoInfo = json_data.get('currentVideoInfo') -from .le import letvcloud_download_by_vu -from .qq import qq_download_by_vid -from .sina import sina_download_by_vid -from .tudou import tudou_download_by_iid -from .youku import youku_download_by_vid - -import json -import re -import base64 -import time - -def get_srt_json(id): - url = 'http://danmu.aixifan.com/V2/%s' % id - return get_content(url) - -def youku_acfun_proxy(vid, sign, ref): - endpoint = 'http://player.acfun.cn/flash_data?vid={}&ct=85&ev=3&sign={}&time={}' - url = endpoint.format(vid, sign, str(int(time.time() * 1000))) - json_data = json.loads(get_content(url, headers=dict(referer=ref)))['data'] - enc_text = base64.b64decode(json_data) - dec_text = rc4(b'8bdc7e1a', enc_text).decode('utf8') - youku_json = json.loads(dec_text) - - yk_streams = {} - for stream in youku_json['stream']: - tp = stream['stream_type'] - yk_streams[tp] = [], stream['total_size'] - if stream.get('segs'): - for seg in stream['segs']: - yk_streams[tp][0].append(seg['url']) else: - yk_streams[tp] = stream['m3u8'], stream['total_size'] + raise NotImplemented - return yk_streams + if 'ksPlayJson' in currentVideoInfo: + durationMillis = currentVideoInfo['durationMillis'] + ksPlayJson = ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] ) + representation = ksPlayJson.get('adaptationSet')[0].get('representation') + stream_list = representation + + for stream in stream_list: + m3u8_url = stream["url"] + size = durationMillis * stream["avgBitrate"] / 8 + # size = float('inf') + container = 'mp4' + stream_id = stream["qualityLabel"] + quality = stream["qualityType"] + + stream_data = dict(src=m3u8_url, size=size, container=container, quality=quality) + self.streams[stream_id] = stream_data + + assert self.title and m3u8_url + self.title = unescape_html(self.title) + self.title = escape_file_path(self.title) + p_title = r1('active">([^<]+)', html) + self.title = '%s (%s)' % (self.title, up) + if p_title: + self.title = '%s - %s' % (self.title, p_title) + + + def download(self, **kwargs): + if 'json_output' in kwargs and kwargs['json_output']: + json_output.output(self) + elif 'info_only' in kwargs and kwargs['info_only']: + if 'stream_id' in kwargs and kwargs['stream_id']: + # Display the stream + stream_id = kwargs['stream_id'] + if 'index' not in kwargs: + self.p(stream_id) + else: + self.p_i(stream_id) + else: + # Display all available streams + if 'index' not in kwargs: + self.p([]) + else: + stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag'] + self.p_i(stream_id) -def acfun_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False, **kwargs): - """str, str, str, bool, bool ->None + else: + if 'stream_id' in kwargs and kwargs['stream_id']: + # Download the stream + stream_id = kwargs['stream_id'] + else: + stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag'] + + if 'index' not in kwargs: + self.p(stream_id) + else: + self.p_i(stream_id) + if stream_id in self.streams: + url = self.streams[stream_id]['src'] + ext = self.streams[stream_id]['container'] + total_size = self.streams[stream_id]['size'] + + + if ext == 'm3u8' or ext == 'm4a': + ext = 'mp4' + + if not url: + log.wtf('[Failed] Cannot extract video source.') + # For legacy main() + headers = {} + if self.ua is not None: + headers['User-Agent'] = self.ua + if self.referer is not None: + headers['Referer'] = self.referer + + download_url_ffmpeg(url, self.title, ext, output_dir=kwargs['output_dir'], merge=kwargs['merge']) + + if 'caption' not in kwargs or not kwargs['caption']: + print('Skipping captions or danmaku.') + return + + for lang in self.caption_tracks: + filename = '%s.%s.srt' % (get_filename(self.title), lang) + print('Saving %s ... ' % filename, end="", flush=True) + srt = self.caption_tracks[lang] + with open(os.path.join(kwargs['output_dir'], filename), + 'w', encoding='utf-8') as x: + x.write(srt) + print('Done.') + + if self.danmaku is not None and not dry_run: + filename = '{}.cmt.xml'.format(get_filename(self.title)) + print('Downloading {} ...\n'.format(filename)) + with open(os.path.join(kwargs['output_dir'], filename), 'w', encoding='utf8') as fp: + fp.write(self.danmaku) + + if self.lyrics is not None and not dry_run: + filename = '{}.lrc'.format(get_filename(self.title)) + print('Downloading {} ...\n'.format(filename)) + with open(os.path.join(kwargs['output_dir'], filename), 'w', encoding='utf8') as fp: + fp.write(self.lyrics) + + # For main_dev() + #download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size']) + keep_obj = kwargs.get('keep_obj', False) + if not keep_obj: + self.__init__() + + + def acfun_download(self, url, output_dir='.', merge=True, info_only=False, **kwargs): + assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', url) + + def getM3u8UrlFromCurrentVideoInfo(currentVideoInfo): + if 'playInfos' in currentVideoInfo: + return currentVideoInfo['playInfos'][0]['playUrls'][0] + elif 'ksPlayJson' in currentVideoInfo: + ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] ) + representation = ksPlayJson.get('adaptationSet')[0].get('representation') + reps = [] + for one in representation: + reps.append( (one['width']* one['height'], one['url'], one['backupUrl']) ) + return max(reps)[1] + + + if re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url): + html = get_content(url, headers=fake_headers) + json_text = match1(html, r"(?s)videoInfo\s*=\s*(\{.*?\});") + json_data = json.loads(json_text) + vid = json_data.get('currentVideoInfo').get('id') + up = json_data.get('user').get('name') + title = json_data.get('title') + video_list = json_data.get('videoList') + if len(video_list) > 1: + title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0] + currentVideoInfo = json_data.get('currentVideoInfo') + m3u8_url = getM3u8UrlFromCurrentVideoInfo(currentVideoInfo) + elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", url): + html = get_content(url, headers=fake_headers) + tag_script = match1(html, r'<script>\s*window\.pageInfo([^<]+)</script>') + json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1] + json_data = json.loads(json_text) + title = json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title'] + vid = str(json_data['videoId']) + up = "acfun" - Download Acfun video by vid. + currentVideoInfo = json_data.get('currentVideoInfo') + m3u8_url = getM3u8UrlFromCurrentVideoInfo(currentVideoInfo) - Call Acfun API, decide which site to use, and pass the job to its - extractor. - """ - - #first call the main parasing API - info = json.loads(get_content('http://www.acfun.cn/video/getVideo.aspx?id=' + vid, headers=fake_headers)) - - sourceType = info['sourceType'] - - #decide sourceId to know which extractor to use - if 'sourceId' in info: sourceId = info['sourceId'] - # danmakuId = info['danmakuId'] - - #call extractor decided by sourceId - if sourceType == 'sina': - sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) - elif sourceType == 'youku': - youku_download_by_vid(sourceId, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) - elif sourceType == 'tudou': - tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) - elif sourceType == 'qq': - qq_download_by_vid(sourceId, title, True, output_dir=output_dir, merge=merge, info_only=info_only) - elif sourceType == 'letv': - letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only) - elif sourceType == 'zhuzhan': - #As in Jul.28.2016, Acfun is using embsig to anti hotlink so we need to pass this -#In Mar. 2017 there is a dedicated ``acfun_proxy'' in youku cloud player -#old code removed - url = 'http://www.acfun.cn/v/ac' + vid - yk_streams = youku_acfun_proxy(info['sourceId'], info['encode'], url) - seq = ['mp4hd3', 'mp4hd2', 'mp4hd', 'flvhd'] - for t in seq: - if yk_streams.get(t): - preferred = yk_streams[t] - break -#total_size in the json could be incorrect(F.I. 0) - size = 0 - for url in preferred[0]: - _, _, seg_size = url_info(url) - size += seg_size -#fallback to flvhd is not quite possible - if re.search(r'fid=[0-9A-Z\-]*.flv', preferred[0][0]): - ext = 'flv' else: - ext = 'mp4' - print_info(site_info, title, ext, size) - if not info_only: - download_urls(preferred[0], title, ext, size, output_dir=output_dir, merge=merge) - else: - raise NotImplementedError(sourceType) - - if not info_only and not dry_run: - if not kwargs['caption']: - print('Skipping danmaku.') - return - try: - title = get_filename(title) - print('Downloading %s ...\n' % (title + '.cmt.json')) - cmt = get_srt_json(vid) - with open(os.path.join(output_dir, title + '.cmt.json'), 'w', encoding='utf-8') as x: - x.write(cmt) - except: - pass - -def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', url) - - if re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url): - html = get_content(url, headers=fake_headers) - json_text = match1(html, r"(?s)videoInfo\s*=\s*(\{.*?\});") - json_data = json.loads(json_text) - vid = json_data.get('currentVideoInfo').get('id') - up = json_data.get('user').get('name') - title = json_data.get('title') - video_list = json_data.get('videoList') - if len(video_list) > 1: - title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0] - currentVideoInfo = json_data.get('currentVideoInfo') - if 'playInfos' in currentVideoInfo: - m3u8_url = currentVideoInfo['playInfos'][0]['playUrls'][0] - elif 'ksPlayJson' in currentVideoInfo: - ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] ) - representation = ksPlayJson.get('adaptationSet').get('representation') - reps = [] - for one in representation: - reps.append( (one['width']* one['height'], one['url'], one['backupUrl']) ) - m3u8_url = max(reps)[1] - - elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", url): - html = get_content(url, headers=fake_headers) - tag_script = match1(html, r'<script>window\.pageInfo([^<]+)</script>') - json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1] - json_data = json.loads(json_text) - title = json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title'] - vid = str(json_data['videoId']) - up = "acfun" - - play_info = get_content("https://www.acfun.cn/rest/pc-direct/play/playInfo/m3u8Auto?videoId=" + vid, headers=fake_headers) - play_url = json.loads(play_info)['playInfo']['streams'][0]['playUrls'][0] - m3u8_all_qualities_file = get_content(play_url) - m3u8_all_qualities_lines = m3u8_all_qualities_file.split('#EXT-X-STREAM-INF:')[1:] - highest_quality_line = m3u8_all_qualities_lines[0] - for line in m3u8_all_qualities_lines: - bandwith = int(match1(line, r'BANDWIDTH=(\d+)')) - if bandwith > int(match1(highest_quality_line, r'BANDWIDTH=(\d+)')): - highest_quality_line = line - #TODO: 应由用户指定清晰度 - m3u8_url = match1(highest_quality_line, r'\n([^#\n]+)$') - m3u8_url = play_url[:play_url.rfind("/")+1] + m3u8_url - - else: - raise NotImplemented - - assert title and m3u8_url - title = unescape_html(title) - title = escape_file_path(title) - p_title = r1('active">([^<]+)', html) - title = '%s (%s)' % (title, up) - if p_title: - title = '%s - %s' % (title, p_title) - - print_info(site_info, title, 'm3u8', float('inf')) - if not info_only: - download_url_ffmpeg(m3u8_url, title, 'mp4', output_dir=output_dir, merge=merge) + raise NotImplemented + + assert title and m3u8_url + title = unescape_html(title) + title = escape_file_path(title) + p_title = r1('active">([^<]+)', html) + title = '%s (%s)' % (title, up) + if p_title: + title = '%s - %s' % (title, p_title) + print_info(site_info, title, 'm3u8', float('inf')) + if not info_only: + download_url_ffmpeg(m3u8_url, title, 'mp4', output_dir=output_dir, merge=merge) +site = AcFun() site_info = "AcFun.cn" -download = acfun_download +download = site.download_by_url download_playlist = playlist_not_supported('acfun') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1456/src/you_get/extractors/bilibili.py new/you-get-0.4.1475/src/you_get/extractors/bilibili.py --- old/you-get-0.4.1456/src/you_get/extractors/bilibili.py 2020-07-19 16:17:28.000000000 +0200 +++ new/you-get-0.4.1475/src/you_get/extractors/bilibili.py 2020-10-27 18:19:28.000000000 +0100 @@ -62,7 +62,7 @@ @staticmethod def bilibili_api(avid, cid, qn=0): - return 'https://api.bilibili.com/x/player/playurl?avid=%s&cid=%s&qn=%s&type=&otype=json&fnver=0&fnval=16' % (avid, cid, qn) + return 'https://api.bilibili.com/x/player/playurl?avid=%s&cid=%s&qn=%s&type=&otype=json&fnver=0&fnval=16&fourk=1' % (avid, cid, qn) @staticmethod def bilibili_audio_api(sid): @@ -114,7 +114,7 @@ @staticmethod def bilibili_space_video_api(mid, pn=1, ps=100): - return 'https://space.bilibili.com/ajax/member/getSubmitVideos?mid=%s&page=%s&pagesize=%s&order=0&jsonp=jsonp' % (mid, pn, ps) + return "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%s&ps=%s&tid=0&keyword=&order=pubdate&jsonp=jsonp" % (mid, pn, ps) @staticmethod def bilibili_vc_api(video_id): @@ -734,15 +734,15 @@ api_url = self.bilibili_space_video_api(mid) api_content = get_content(api_url, headers=self.bilibili_headers()) videos_info = json.loads(api_content) - pc = videos_info['data']['pages'] + pc = videos_info['data']['page']['count'] // videos_info['data']['page']['ps'] for pn in range(1, pc + 1): api_url = self.bilibili_space_video_api(mid, pn=pn) api_content = get_content(api_url, headers=self.bilibili_headers()) videos_info = json.loads(api_content) - epn, i = len(videos_info['data']['vlist']), 0 - for video in videos_info['data']['vlist']: + epn, i = len(videos_info['data']['list']['vlist']), 0 + for video in videos_info['data']['list']['vlist']: i += 1; log.w('Extracting %s of %s videos ...' % (i, epn)) url = 'https://www.bilibili.com/video/av%s' % video['aid'] self.__class__().download_playlist_by_url(url, **kwargs) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1456/src/you_get/extractors/imgur.py new/you-get-0.4.1475/src/you_get/extractors/imgur.py --- old/you-get-0.4.1456/src/you_get/extractors/imgur.py 2020-07-19 16:17:28.000000000 +0200 +++ new/you-get-0.4.1475/src/you_get/extractors/imgur.py 2020-10-27 18:19:28.000000000 +0100 @@ -52,20 +52,16 @@ else: # gallery image content = get_content(self.url) - image = json.loads(match1(content, r'image\s*:\s*({.*}),')) - ext = image['ext'] + url = match1(content, r'(https?://i.imgur.com/[^"]+)') + _, container, size = url_info(url) self.streams = { 'original': { - 'src': ['http://i.imgur.com/%s%s' % (image['hash'], ext)], - 'size': image['size'], - 'container': ext[1:] - }, - 'thumbnail': { - 'src': ['http://i.imgur.com/%ss%s' % (image['hash'], '.jpg')], - 'container': 'jpg' + 'src': [url], + 'size': size, + 'container': container } } - self.title = image['title'] or image['hash'] + self.title = r1(r'i\.imgur\.com/([^./]*)', url) def extract(self, **kwargs): if 'stream_id' in kwargs and kwargs['stream_id']: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1456/src/you_get/extractors/instagram.py new/you-get-0.4.1475/src/you_get/extractors/instagram.py --- old/you-get-0.4.1456/src/you_get/extractors/instagram.py 2020-07-19 16:17:28.000000000 +0200 +++ new/you-get-0.4.1475/src/you_get/extractors/instagram.py 2020-10-27 18:19:28.000000000 +0100 @@ -8,7 +8,7 @@ url = r1(r'([^?]*)', url) html = get_html(url) - vid = r1(r'instagram.com/p/([^/]+)', url) + vid = r1(r'instagram.com/\w+/([^/]+)', url) description = r1(r'<meta property="og:title" content="([^"]*)"', html) title = "{} [{}]".format(description.replace("\n", " "), vid) stream = r1(r'<meta property="og:video" content="([^"]*)"', html) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1456/src/you_get/extractors/iqiyi.py new/you-get-0.4.1475/src/you_get/extractors/iqiyi.py --- old/you-get-0.4.1456/src/you_get/extractors/iqiyi.py 2020-07-19 16:17:28.000000000 +0200 +++ new/you-get-0.4.1475/src/you_get/extractors/iqiyi.py 2020-10-27 18:19:28.000000000 +0100 @@ -20,7 +20,7 @@ use @fffonion 's method in #617. Add trace AVM(asasm) code in Iqiyi's encode function where the salt is put into the encode array and reassemble by RABCDasm(or WinRABCDasm),then use Fiddler to response modified file to replace the src file with its AutoResponder function ,set browser Fiddler proxy and play with !debug version! Flash Player ,finially get result in flashlog.txt(its location can be easily found in search engine). Code Like (without letters after #comment:),it just do the job : trace("{IQIYI_SALT}:"+salt_array.join("")) - ```(Postion After getTimer) + ```(Position After getTimer) findpropstrict QName(PackageNamespace(""), "trace") pushstring "{IQIYI_SALT}:" #comment for you to locate the salt getscopeobject 1 @@ -119,10 +119,10 @@ self.url = url video_page = get_content(url) - videos = set(re.findall(r'<a href="(http://www\.iqiyi\.com/v_[^"]+)"', video_page)) + videos = set(re.findall(r'<a href="(?=https?:)?(//www\.iqiyi\.com/v_[^"]+)"', video_page)) for video in videos: - self.__class__().download_by_url(video, **kwargs) + self.__class__().download_by_url('https:' + video, **kwargs) def prepare(self, **kwargs): assert self.url or self.vid @@ -153,7 +153,7 @@ except Exception as e: log.i("vd: {} is not handled".format(stream['vd'])) log.i("info is {}".format(stream)) - + def download(self, **kwargs): """Override the original one @@ -201,7 +201,7 @@ if not urls: log.wtf('[Failed] Cannot extract video source.') # For legacy main() - + #Here's the change!! download_url_ffmpeg(urls[0], self.title, 'mp4', output_dir=kwargs['output_dir'], merge=kwargs['merge'], stream=False) @@ -215,7 +215,7 @@ with open(os.path.join(kwargs['output_dir'], filename), 'w', encoding='utf-8') as x: x.write(srt) - print('Done.') + print('Done.') ''' if info["code"] != "A000000": diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1456/src/you_get/extractors/tiktok.py new/you-get-0.4.1475/src/you_get/extractors/tiktok.py --- old/you-get-0.4.1456/src/you_get/extractors/tiktok.py 2020-07-19 16:17:28.000000000 +0200 +++ new/you-get-0.4.1475/src/you_get/extractors/tiktok.py 2020-10-27 18:19:28.000000000 +0100 @@ -5,16 +5,42 @@ from ..common import * def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - html = get_html(url, faker=True) - title = r1(r'<title.*?>(.*?)</title>', html) - video_id = r1(r'/video/(\d+)', url) or r1(r'musical\?id=(\d+)', html) - title = '%s [%s]' % (title, video_id) - source = r1(r'<video .*?src="([^"]+)"', html) or r1(r'"contentUrl":"([^"]+)"', html) - mime, ext, size = url_info(source) + referUrl = url.split('?')[0] + headers = fake_headers + + # trick or treat + html = get_content(url, headers=headers) + data = r1(r'<script id="__NEXT_DATA__".*?>(.*?)</script>', html) + info = json.loads(data) + wid = info['props']['initialProps']['$wid'] + cookie = 'tt_webid=%s; tt_webid_v2=%s' % (wid, wid) + + # here's the cookie + headers['Cookie'] = cookie + + # try again + html = get_content(url, headers=headers) + data = r1(r'<script id="__NEXT_DATA__".*?>(.*?)</script>', html) + info = json.loads(data) + wid = info['props']['initialProps']['$wid'] + cookie = 'tt_webid=%s; tt_webid_v2=%s' % (wid, wid) + + videoData = info['props']['pageProps']['itemInfo']['itemStruct'] + videoId = videoData['id'] + videoUrl = videoData['video']['downloadAddr'] + uniqueId = videoData['author'].get('uniqueId') + nickName = videoData['author'].get('nickname') + + title = '%s [%s]' % (nickName or uniqueId, videoId) + + # we also need the referer + headers['Referer'] = referUrl + + mime, ext, size = url_info(videoUrl, headers=headers) print_info(site_info, title, mime, size) if not info_only: - download_urls([source], title, ext, size, output_dir, merge=merge) + download_urls([videoUrl], title, ext, size, output_dir=output_dir, merge=merge, headers=headers) site_info = "TikTok.com" download = tiktok_download diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1456/src/you_get/extractors/youtube.py new/you-get-0.4.1475/src/you_get/extractors/youtube.py --- old/you-get-0.4.1456/src/you_get/extractors/youtube.py 2020-07-19 16:17:28.000000000 +0200 +++ new/you-get-0.4.1475/src/you_get/extractors/youtube.py 2020-10-27 18:19:28.000000000 +0100 @@ -157,34 +157,22 @@ log.wtf('[Failed] Unsupported URL pattern.') video_page = get_content('https://www.youtube.com/playlist?list=%s' % playlist_id) - from html.parser import HTMLParser - videos = sorted([HTMLParser().unescape(video) - for video in re.findall(r'<a href="(/watch\?[^"]+)"', video_page) - if parse_query_param(video, 'index')], - key=lambda video: parse_query_param(video, 'index')) + ytInitialData = json.loads(match1(video_page, r'window\["ytInitialData"\]\s*=\s*(.+);')) - # Parse browse_ajax page for more videos to load - load_more_href = match1(video_page, r'data-uix-load-more-href="([^"]+)"') - while load_more_href: - browse_ajax = get_content('https://www.youtube.com/%s' % load_more_href) - browse_data = json.loads(browse_ajax) - load_more_widget_html = browse_data['load_more_widget_html'] - content_html = browse_data['content_html'] - vs = set(re.findall(r'href="(/watch\?[^"]+)"', content_html)) - videos += sorted([HTMLParser().unescape(video) - for video in list(vs) - if parse_query_param(video, 'index')]) - load_more_href = match1(load_more_widget_html, r'data-uix-load-more-href="([^"]+)"') + tab0 = ytInitialData['contents']['twoColumnBrowseResultsRenderer']['tabs'][0] + itemSection0 = tab0['tabRenderer']['content']['sectionListRenderer']['contents'][0] + playlistVideoList0 = itemSection0['itemSectionRenderer']['contents'][0] + videos = playlistVideoList0['playlistVideoListRenderer']['contents'] self.title = re.search(r'<meta name="title" content="([^"]+)"', video_page).group(1) self.p_playlist() - for video in videos: - vid = parse_query_param(video, 'v') - index = parse_query_param(video, 'index') + for index, video in enumerate(videos, 1): + vid = video['playlistVideoRenderer']['videoId'] try: self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs) except: pass + # FIXME: show DASH stream sizes (by default) for playlist videos def prepare(self, **kwargs): assert self.url or self.vid diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1456/src/you_get/version.py new/you-get-0.4.1475/src/you_get/version.py --- old/you-get-0.4.1456/src/you_get/version.py 2020-07-19 16:17:28.000000000 +0200 +++ new/you-get-0.4.1475/src/you_get/version.py 2020-10-27 18:19:28.000000000 +0100 @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1456' +__version__ = '0.4.1475' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1456/tests/test.py new/you-get-0.4.1475/tests/test.py --- old/you-get-0.4.1456/tests/test.py 2020-07-19 16:17:28.000000000 +0200 +++ new/you-get-0.4.1475/tests/test.py 2020-10-27 18:19:28.000000000 +0100 @@ -9,7 +9,8 @@ missevan, acfun, bilibili, - soundcloud + soundcloud, + tiktok ) @@ -53,9 +54,15 @@ 'https://soundcloud.com/keiny-pham/impure-bird', info_only=True ) ## playlist - soundcloud.download( - 'https://soundcloud.com/anthony-flieger/sets/cytus', info_only=True - ) + #soundcloud.download( + # 'https://soundcloud.com/anthony-flieger/sets/cytus', info_only=True + #) + + def tests_tiktok(self): + tiktok.download('https://www.tiktok.com/@nmb48_official/video/6850796940293164290', info_only=True) + tiktok.download('https://t.tiktok.com/i18n/share/video/6850796940293164290/', info_only=True) + tiktok.download('https://vt.tiktok.com/UGJR4R/', info_only=True) + if __name__ == '__main__': unittest.main()
