Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package you-get for openSUSE:Factory checked in at 2022-04-23 19:46:58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/you-get (Old) and /work/SRC/openSUSE:Factory/.you-get.new.1538 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "you-get" Sat Apr 23 19:46:58 2022 rev:42 rq:972262 version:0.4.1602 Changes: -------- --- /work/SRC/openSUSE:Factory/you-get/you-get.changes 2021-11-04 16:10:39.721085115 +0100 +++ /work/SRC/openSUSE:Factory/.you-get.new.1538/you-get.changes 2022-04-23 19:49:07.971209223 +0200 @@ -1,0 +2,5 @@ +Sat Apr 23 05:09:47 UTC 2022 - Luigi Baldoni <[email protected]> + +- Update to version 0.4.1602 (no changelog) + +------------------------------------------------------------------- Old: ---- you-get-0.4.1555.tar.gz New: ---- you-get-0.4.1602.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ you-get.spec ++++++ --- /var/tmp/diff_new_pack.VCOINh/_old 2022-04-23 19:49:08.387209717 +0200 +++ /var/tmp/diff_new_pack.VCOINh/_new 2022-04-23 19:49:08.395209727 +0200 @@ -1,7 +1,7 @@ # # spec file for package you-get # -# Copyright (c) 2021 SUSE LLC +# Copyright (c) 2022 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,7 +17,7 @@ Name: you-get -Version: 0.4.1555 +Version: 0.4.1602 Release: 0 Summary: Dumb downloader that scrapes the web License: MIT ++++++ you-get-0.4.1555.tar.gz -> you-get-0.4.1602.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1555/README.md new/you-get-0.4.1602/README.md --- old/you-get-0.4.1555/README.md 2021-11-03 18:32:35.000000000 +0100 +++ new/you-get-0.4.1602/README.md 2022-04-22 23:44:08.000000000 +0200 @@ -55,7 +55,7 @@ The following dependencies are necessary: -* **[Python](https://www.python.org/downloads/)** 3.2 or above +* **[Python](https://www.python.org/downloads/)** 3.5 or above * **[FFmpeg](https://www.ffmpeg.org/)** 1.0 or above * (Optional) [RTMPDump](https://rtmpdump.mplayerhq.hu/) @@ -89,6 +89,14 @@ to install `you-get` to a permanent path. +You can also use the [pipenv](https://pipenv.pypa.io/en/latest) to install the `you-get` in the Python virtual environment. + +``` +$ pipenv install -e . +$ pipenv run you-get --version +you-get: version 0.4.1555, a tiny downloader that scrapes the web. +``` + ### Option 4: Git clone This is the recommended way for all developers, even if you don't often code in Python. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1555/src/you_get/common.py new/you-get-0.4.1602/src/you_get/common.py --- old/you-get-0.4.1555/src/you_get/common.py 2021-11-03 18:32:35.000000000 +0100 +++ new/you-get-0.4.1602/src/you_get/common.py 2022-04-22 23:44:08.000000000 +0200 @@ -136,6 +136,8 @@ output_filename = None auto_rename = False insecure = False +m3u8 = False +postfix = False fake_headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # noqa @@ -340,10 +342,32 @@ return decompressobj.decompress(data)+decompressobj.flush() +# an http.client implementation of get_content() +# because urllib does not support "Connection: keep-alive" +def getHttps(host, url, headers, debuglevel=0): + import http.client + + conn = http.client.HTTPSConnection(host) + conn.set_debuglevel(debuglevel) + conn.request("GET", url, headers=headers) + resp = conn.getresponse() + + data = resp.read() + data = ungzip(data) + #data = undeflate(data) + + return str(data, encoding='utf-8') + + # DEPRECATED in favor of get_content() def get_response(url, faker=False): logging.debug('get_response: %s' % url) - + ctx = None + if insecure: + # ignore ssl errors + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE # install cookies if cookies: opener = request.build_opener(request.HTTPCookieProcessor(cookies)) @@ -351,10 +375,10 @@ if faker: response = request.urlopen( - request.Request(url, headers=fake_headers), None + request.Request(url, headers=fake_headers), None, context=ctx, ) else: - response = request.urlopen(url) + response = request.urlopen(url, context=ctx) data = response.read() if response.info().get('Content-Encoding') == 'gzip': @@ -983,6 +1007,8 @@ pass title = tr(get_filename(title)) + if postfix and 'vid' in kwargs: + title = "%s [%s]" % (title, kwargs['vid']) output_filename = get_output_filename(urls, title, ext, output_dir, merge) output_filepath = os.path.join(output_dir, output_filename) @@ -1339,7 +1365,13 @@ if re.match(r'https?://', url) is None: url = 'http://' + url - if playlist: + if m3u8: + if output_filename: + title = output_filename + else: + title = "m3u8file" + download_url_ffmpeg(url=url, title=title,ext = 'mp4',output_dir = '.') + elif playlist: download_playlist(url, **kwargs) else: download(url, **kwargs) @@ -1443,7 +1475,6 @@ proxy_info = proxy.split("@") socks_proxy_addrs = proxy_info[1].split(':') socks_proxy_auth = proxy_info[0].split(":") - print(socks_proxy_auth[0]+" "+socks_proxy_auth[1]+" "+socks_proxy_addrs[0]+" "+socks_proxy_addrs[1]) socks.set_default_proxy( socks.SOCKS5, socks_proxy_addrs[0], @@ -1454,7 +1485,6 @@ ) else: socks_proxy_addrs = proxy.split(':') - print(socks_proxy_addrs[0]+" "+socks_proxy_addrs[1]) socks.set_default_proxy( socks.SOCKS5, socks_proxy_addrs[0], @@ -1528,6 +1558,10 @@ help='Do not download captions (subtitles, lyrics, danmaku, ...)' ) download_grp.add_argument( + '--postfix', action='store_true', default=False, + help='Postfix downloaded files with unique identifiers' + ) + download_grp.add_argument( '-f', '--force', action='store_true', default=False, help='Force overwriting existing files' ) @@ -1619,6 +1653,10 @@ download_grp.add_argument('--stream', help=argparse.SUPPRESS) download_grp.add_argument('--itag', help=argparse.SUPPRESS) + download_grp.add_argument('-m', '--m3u8', action='store_true', default=False, + help = 'download vide using an m3u8 url') + + parser.add_argument('URL', nargs='*', help=argparse.SUPPRESS) args = parser.parse_args() @@ -1644,6 +1682,8 @@ global output_filename global auto_rename global insecure + global m3u8 + global postfix output_filename = args.output_filename extractor_proxy = args.extractor_proxy @@ -1665,6 +1705,9 @@ if args.cookies: load_cookies(args.cookies) + if args.m3u8: + m3u8 = True + caption = True stream_id = args.format or args.stream or args.itag if args.no_caption: @@ -1677,6 +1720,7 @@ # ignore ssl insecure = True + postfix = args.postfix if args.no_proxy: set_http_proxy('') @@ -1763,20 +1807,10 @@ url = 'https://www.google.com/search?tbm=vid&q=%s' % parse.quote(keywords) page = get_content(url, headers=fake_headers) videos = re.findall( - r'<a href="(https?://[^"]+)" onmousedown="[^"]+"><h3 class="[^"]*">([^<]+)<', page + r'(https://www\.youtube\.com/watch\?v=[\w-]+)', page ) - vdurs = re.findall(r'<span class="vdur[^"]*">([^<]+)<', page) - durs = [r1(r'(\d+:\d+)', unescape_html(dur)) for dur in vdurs] - print('Google Videos search:') - for v in zip(videos, durs): - print('- video: {} [{}]'.format( - unescape_html(v[0][1]), - v[1] if v[1] else '?' - )) - print('# you-get %s' % log.sprint(v[0][0], log.UNDERLINE)) - print() print('Best matched result:') - return(videos[0][0]) + return(videos[0]) def url_to_module(url): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1555/src/you_get/extractor.py new/you-get-0.4.1602/src/you_get/extractor.py --- old/you-get-0.4.1555/src/you_get/extractor.py 2021-11-03 18:32:35.000000000 +0100 +++ new/you-get-0.4.1602/src/you_get/extractor.py 2022-04-22 23:44:08.000000000 +0200 @@ -238,7 +238,8 @@ download_urls(urls, self.title, ext, total_size, headers=headers, output_dir=kwargs['output_dir'], merge=kwargs['merge'], - av=stream_id in self.dash_streams) + av=stream_id in self.dash_streams, + vid=self.vid) if 'caption' not in kwargs or not kwargs['caption']: print('Skipping captions or danmaku.') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1555/src/you_get/extractors/bilibili.py new/you-get-0.4.1602/src/you_get/extractors/bilibili.py --- old/you-get-0.4.1555/src/you_get/extractors/bilibili.py 2021-11-03 18:32:35.000000000 +0100 +++ new/you-get-0.4.1602/src/you_get/extractors/bilibili.py 2022-04-22 23:44:08.000000000 +0200 @@ -12,8 +12,12 @@ # Bilibili media encoding options, in descending quality order. stream_types = [ - {'id': 'hdflv2', 'quality': 125, 'audio_quality': 30280, - 'container': 'FLV', 'video_resolution': '3840p', 'desc': '?????? HDR'}, + {'id': 'hdflv2_8k', 'quality': 127, 'audio_quality': 30280, + 'container': 'FLV', 'video_resolution': '4320p', 'desc': '????????? 8K'}, + {'id': 'hdflv2_dolby', 'quality': 126, 'audio_quality': 30280, + 'container': 'FLV', 'video_resolution': '3840p', 'desc': '????????????'}, + {'id': 'hdflv2_hdr', 'quality': 125, 'audio_quality': 30280, + 'container': 'FLV', 'video_resolution': '2160p', 'desc': '?????? HDR'}, {'id': 'hdflv2_4k', 'quality': 120, 'audio_quality': 30280, 'container': 'FLV', 'video_resolution': '2160p', 'desc': '?????? 4K'}, {'id': 'flv_p60', 'quality': 116, 'audio_quality': 30280, @@ -113,11 +117,15 @@ return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps) @staticmethod + def bilibili_series_archives_api(mid, sid, pn=1, ps=100): + return 'https://api.bilibili.com/x/series/archives?mid=%s&series_id=%s&pn=%s&ps=%s&only_normal=true&sort=asc&jsonp=jsonp' % (mid, sid, pn, ps) + + @staticmethod def bilibili_space_favlist_api(fid, pn=1, ps=20): return 'https://api.bilibili.com/x/v3/fav/resource/list?media_id=%s&pn=%s&ps=%s&order=mtime&type=0&tid=0&jsonp=jsonp' % (fid, pn, ps) @staticmethod - def bilibili_space_video_api(mid, pn=1, ps=100): + def bilibili_space_video_api(mid, pn=1, ps=50): return "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%s&ps=%s&tid=0&keyword=&order=pubdate&jsonp=jsonp" % (mid, pn, ps) @staticmethod @@ -137,6 +145,8 @@ def prepare(self, **kwargs): self.stream_qualities = {s['quality']: s for s in self.stream_types} + self.streams.clear() + self.dash_streams.clear() try: html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url)) @@ -167,6 +177,11 @@ self.url = 'https://www.bilibili.com/%s' % match1(self.url, r'/s/(.+)') html_content = get_content(self.url, headers=self.bilibili_headers()) + # redirect: festival + elif re.match(r'https?://(www\.)?bilibili\.com/festival/(.+)', self.url): + self.url = 'https://www.bilibili.com/video/%s' % match1(self.url, r'bvid=([^&]+)') + html_content = get_content(self.url, headers=self.bilibili_headers()) + # sort it out if re.match(r'https?://(www\.)?bilibili\.com/audio/au(\d+)', self.url): sort = 'audio' @@ -178,7 +193,7 @@ sort = 'live' elif re.match(r'https?://vc\.bilibili\.com/video/(\d+)', self.url): sort = 'vc' - elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|(BV(\S+)))', self.url): + elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|(bv(\S+))|(BV(\S+)))', self.url): sort = 'video' elif re.match(r'https?://h\.?bilibili\.com/(\d+)', self.url): sort = 'h' @@ -193,30 +208,43 @@ playinfo_text = match1(html_content, r'__playinfo__=(.*?)</script><script>') # FIXME playinfo = json.loads(playinfo_text) if playinfo_text else None - playinfo = playinfo if playinfo['code'] == 0 else None + playinfo = playinfo if playinfo and playinfo.get('code') == 0 else None html_content_ = get_content(self.url, headers=self.bilibili_headers(cookie='CURRENT_FNVAL=16')) playinfo_text_ = match1(html_content_, r'__playinfo__=(.*?)</script><script>') # FIXME playinfo_ = json.loads(playinfo_text_) if playinfo_text_ else None - playinfo_ = playinfo_ if playinfo_['code'] == 0 else None + playinfo_ = playinfo_ if playinfo_ and playinfo_.get('code') == 0 else None - # warn if it is a multi-part video - pn = initial_state['videoData']['videos'] - if pn > 1 and not kwargs.get('playlist'): - log.w('This is a multipart video. (use --playlist to download all parts.)') + if 'videoData' in initial_state: + # (standard video) - # set video title - self.title = initial_state['videoData']['title'] - # refine title for a specific part, if it is a multi-part video - p = int(match1(self.url, r'[\?&]p=(\d+)') or match1(self.url, r'/index_(\d+)') or - '1') # use URL to decide p-number, not initial_state['p'] - if pn > 1: - part = initial_state['videoData']['pages'][p - 1]['part'] - self.title = '%s (P%s. %s)' % (self.title, p, part) + # warn if it is a multi-part video + pn = initial_state['videoData']['videos'] + if pn > 1 and not kwargs.get('playlist'): + log.w('This is a multipart video. (use --playlist to download all parts.)') + + # set video title + self.title = initial_state['videoData']['title'] + # refine title for a specific part, if it is a multi-part video + p = int(match1(self.url, r'[\?&]p=(\d+)') or match1(self.url, r'/index_(\d+)') or + '1') # use URL to decide p-number, not initial_state['p'] + if pn > 1: + part = initial_state['videoData']['pages'][p - 1]['part'] + self.title = '%s (P%s. %s)' % (self.title, p, part) + + # construct playinfos + avid = initial_state['aid'] + cid = initial_state['videoData']['pages'][p - 1]['cid'] # use p-number, not initial_state['videoData']['cid'] + else: + # (festival video) + + # set video title + self.title = initial_state['videoInfo']['title'] + + # construct playinfos + avid = initial_state['videoInfo']['aid'] + cid = initial_state['videoInfo']['cid'] - # construct playinfos - avid = initial_state['aid'] - cid = initial_state['videoData']['pages'][p - 1]['cid'] # use p-number, not initial_state['videoData']['cid'] current_quality, best_quality = None, None if playinfo is not None: current_quality = playinfo['data']['quality'] or None # 0 indicates an error, fallback to None @@ -594,10 +622,12 @@ elif re.match(r'https?://(www\.)?bilibili\.com/bangumi/media/md(\d+)', self.url) or \ re.match(r'https?://bangumi\.bilibili\.com/anime/(\d+)', self.url): sort = 'bangumi_md' - elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|BV(\S+))', self.url): + elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|bv(\S+)|BV(\S+))', self.url): sort = 'video' elif re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/detail\?.*cid=(\d+)', self.url): sort = 'space_channel' + elif re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/seriesdetail\?.*sid=(\d+)', self.url): + sort = 'space_channel_series' elif re.match(r'https?://space\.?bilibili\.com/(\d+)/favlist\?.*fid=(\d+)', self.url): sort = 'space_favlist' elif re.match(r'https?://space\.?bilibili\.com/(\d+)/video', self.url): @@ -707,6 +737,20 @@ i += 1; log.w('Extracting %s of %s videos ...' % (i, epn)) url = 'https://www.bilibili.com/video/av%s' % video['aid'] self.__class__().download_playlist_by_url(url, **kwargs) + + elif sort == 'space_channel_series': + m = re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/seriesdetail\?.*sid=(\d+)', self.url) + mid, sid = m.group(1), m.group(2) + api_url = self.bilibili_series_archives_api(mid, sid) + api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url)) + archives_info = json.loads(api_content) + # TBD: channel of more than 100 videos + + epn, i = len(archives_info['data']['archives']), 0 + for video in archives_info['data']['archives']: + i += 1; log.w('Extracting %s of %s videos ...' % (i, epn)) + url = 'https://www.bilibili.com/video/av%s' % video['aid'] + self.__class__().download_playlist_by_url(url, **kwargs) elif sort == 'space_favlist': m = re.match(r'https?://space\.?bilibili\.com/(\d+)/favlist\?.*fid=(\d+)', self.url) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1555/src/you_get/extractors/imgur.py new/you-get-0.4.1602/src/you_get/extractors/imgur.py --- old/you-get-0.4.1555/src/you_get/extractors/imgur.py 2021-11-03 18:32:35.000000000 +0100 +++ new/you-get-0.4.1602/src/you_get/extractors/imgur.py 2022-04-22 23:44:08.000000000 +0200 @@ -52,7 +52,8 @@ else: # gallery image content = get_content(self.url) - url = match1(content, r'(https?://i.imgur.com/[^"]+)') + url = match1(content, r'meta property="og:video"[^>]+(https?://i.imgur.com/[^"?]+)') or \ + match1(content, r'meta property="og:image"[^>]+(https?://i.imgur.com/[^"?]+)') _, container, size = url_info(url) self.streams = { 'original': { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1555/src/you_get/extractors/instagram.py new/you-get-0.4.1602/src/you_get/extractors/instagram.py --- old/you-get-0.4.1555/src/you_get/extractors/instagram.py 2021-11-03 18:32:35.000000000 +0100 +++ new/you-get-0.4.1602/src/you_get/extractors/instagram.py 2022-04-22 23:44:08.000000000 +0200 @@ -25,7 +25,7 @@ try: info = json.loads(data.group(1)) post = info['entry_data']['PostPage'][0] - assert post + assert post['items'] except: # with logged-in cookies data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', cont) @@ -33,13 +33,12 @@ log.e('[Warning] Cookies needed.') post = json.loads(data.group(1)) - if 'edge_sidecar_to_children' in post['graphql']['shortcode_media']: - edges = post['graphql']['shortcode_media']['edge_sidecar_to_children']['edges'] - for edge in edges: - title = edge['node']['shortcode'] - image_url = edge['node']['display_url'] - if 'video_url' in edge['node']: - image_url = edge['node']['video_url'] + for item in post['items']: + code = item['code'] + carousel_media = item.get('carousel_media') or [item] + for i, media in enumerate(carousel_media): + title = '%s [%s]' % (code, i) + image_url = media['image_versions2']['candidates'][0]['url'] ext = image_url.split('?')[0].split('.')[-1] size = int(get_head(image_url)['Content-Length']) @@ -50,21 +49,20 @@ ext=ext, total_size=size, output_dir=output_dir) - else: - title = post['graphql']['shortcode_media']['shortcode'] - image_url = post['graphql']['shortcode_media']['display_url'] - if 'video_url' in post['graphql']['shortcode_media']: - image_url = post['graphql']['shortcode_media']['video_url'] - ext = image_url.split('?')[0].split('.')[-1] - size = int(get_head(image_url)['Content-Length']) - print_info(site_info, title, ext, size) - if not info_only: - download_urls(urls=[image_url], - title=title, - ext=ext, - total_size=size, - output_dir=output_dir) + # download videos (if any) + if 'video_versions' in media: + video_url = media['video_versions'][0]['url'] + ext = video_url.split('?')[0].split('.')[-1] + size = int(get_head(video_url)['Content-Length']) + + print_info(site_info, title, ext, size) + if not info_only: + download_urls(urls=[video_url], + title=title, + ext=ext, + total_size=size, + output_dir=output_dir) site_info = "Instagram.com" download = instagram_download diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1555/src/you_get/extractors/ixigua.py new/you-get-0.4.1602/src/you_get/extractors/ixigua.py --- old/you-get-0.4.1555/src/you_get/extractors/ixigua.py 2021-11-03 18:32:35.000000000 +0100 +++ new/you-get-0.4.1602/src/you_get/extractors/ixigua.py 2022-04-22 23:44:08.000000000 +0200 @@ -18,121 +18,95 @@ } -def int_overflow(val): - maxint = 2147483647 - if not -maxint - 1 <= val <= maxint: - val = (val + (maxint + 1)) % (2 * (maxint + 1)) - maxint - 1 - return val - - -def unsigned_right_shitf(n, i): - if n < 0: - n = ctypes.c_uint32(n).value - if i < 0: - return -int_overflow(n << abs(i)) - return int_overflow(n >> i) - - -def get_video_url_from_video_id(video_id): - """Splicing URLs according to video ID to get video details""" - # from js - data = [""] * 256 - for index, _ in enumerate(data): - t = index - for i in range(8): - t = -306674912 ^ unsigned_right_shitf(t, 1) if 1 & t else unsigned_right_shitf(t, 1) - data[index] = t - - def tmp(): - rand_num = random.random() - path = "/video/urls/v/1/toutiao/mp4/{video_id}?r={random_num}".format(video_id=video_id, - random_num=str(rand_num)[2:]) - e = o = r = -1 - i, a = 0, len(path) - while i < a: - e = ord(path[i]) - i += 1 - if e < 128: - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ e)] - else: - if e < 2048: - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (192 | e >> 6 & 31))] - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))] - else: - if 55296 <= e < 57344: - e = (1023 & e) + 64 - i += 1 - o = 1023 & t.url(i) - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (240 | e >> 8 & 7))] - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 2 & 63))] - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | o >> 6 & 15 | (3 & e) << 4))] - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & o))] - else: - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (224 | e >> 12 & 15))] - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 6 & 63))] - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))] - - return "https://ib.365yg.com{path}&s={param}".format(path=path, param=unsigned_right_shitf(r ^ -1, 0)) - - while 1: - url = tmp() - if url.split("=")[-1][0] != "-": # ??????s??????????????? - return url - - -def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): +def ixigua_download(url, output_dir='.', merge=True, info_only=False, stream_id='', **kwargs): # example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422 - resp = urlopen_with_retry(request.Request(url)) + headers['cookie'] = "MONITOR_WEB_ID=7892c49b-296e-4499-8704-e47c1b15123; " \ + "ixigua-a-s=1; ttcid=af99669b6304453480454f1507011d5c234; BD_REF=1; " \ + "__ac_nonce=060d88ff000a75e8d17eb; __ac_signature=_02B4Z6wo100f01kX9ZpgAAIDAKIBBQUIPYT5F2WIAAPG2ad; " \ + "ttwid=1%7CcIsVF_3vqSIk4XErhPB0H2VaTxT0tdsTMRbMjrJOPN8%7C1624806049%7C08ce7dd6f7d20506a41ba0a331ef96a6505d96731e6ad9f6c8c709f53f227ab1; " + + resp = urlopen_with_retry(request.Request(url, headers=headers)) html = resp.read().decode('utf-8') _cookies = [] for c in resp.getheader('Set-Cookie').split("httponly,"): _cookies.append(c.strip().split(' ')[0]) - headers['cookie'] = ' '.join(_cookies) + headers['cookie'] += ' '.join(_cookies) - conf = loads(match1(html, r"window\.config = (.+);")) - if not conf: - log.e("Get window.config from url failed, url: {}".format(url)) - return - verify_url = conf['prefix'] + conf['url'] + '?key=' + conf['key'] + '&psm=' + conf['psm'] \ - + '&_signature=' + ''.join(random.sample(string.ascii_letters + string.digits, 31)) - try: - ok = get_content(verify_url) - except Exception as e: - ok = e.msg - if ok != 'OK': - log.e("Verify failed, verify_url: {}, result: {}".format(verify_url, ok)) - return - html = get_content(url, headers=headers) + match_txt = match1(html, r"<script id=\"SSR_HYDRATED_DATA\">window._SSR_HYDRATED_DATA=(.*?)<\/script>") + if not match_txt: + log.e("Get video info from url failed, url: {}".format(url)) + return + video_info = loads(match_txt.replace('":undefined', '":null')) + if not video_info: + log.e("video_info not found, url:{}".format(url)) + return + + title = video_info['anyVideo']['gidInformation']['packerData']['video']['title'] + video_resource = video_info['anyVideo']['gidInformation']['packerData']['video']['videoResource'] + if video_resource.get('dash', None): + video_list = video_resource['dash'] + elif video_resource.get('dash_120fps', None): + video_list = video_resource['dash_120fps'] + elif video_resource.get('normal', None): + video_list = video_resource['normal'] + else: + log.e("video_list not found, url:{}".format(url)) + return + + streams = [ + # {'file_id': 'fc1b9bf8e8e04a849d90a5172d3f6919', 'quality': "normal", 'size': 0, + # 'definition': '720p', 'video_url': '','audio_url':'','v_type':'dash'}, + ] + # ?????????????????????????????????????????????????????????????????????????????????mp4 + if video_list.get('dynamic_video', None): + audio_url = base64.b64decode( + video_list['dynamic_video']['dynamic_audio_list'][0]['main_url'].encode("utf-8")).decode("utf-8") + dynamic_video_list = video_list['dynamic_video']['dynamic_video_list'] + streams = convertStreams(dynamic_video_list, audio_url) + elif video_list.get('video_list', None): + dynamic_video_list = video_list['video_list'] + streams = convertStreams(dynamic_video_list, "") + + print("title: %s" % title) + for stream in streams: + if stream_id != "" and stream_id != stream['definition']: + continue + + print(" - format: %s" % stream['definition']) + print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size'])) + print(" quality: %s " % stream['quality']) + print(" v_type: %s " % stream['v_type']) + # print(" video_url: %s " % stream['video_url']) + # print(" audio_url: %s " % stream['audio_url']) + print() + + # ????????????????????????????????????????????? + if not info_only: + urls = [stream['video_url']] + if stream['audio_url'] != "": + urls.append(stream['audio_url']) + kwargs['av'] = 'av' # ???????????????????????? + + download_urls(urls, title, "mp4", stream['size'], output_dir, merge=merge, headers=headers, + **kwargs) + return + + +def convertStreams(video_list, audio_url): + streams = [] + for dynamic_video in video_list: + streams.append({ + 'file_id': dynamic_video['file_hash'], + 'quality': dynamic_video['quality'], + 'size': dynamic_video['size'], + 'definition': dynamic_video['definition'], + 'video_url': base64.b64decode(dynamic_video['main_url'].encode("utf-8")).decode("utf-8"), + 'audio_url': audio_url, + 'v_type': dynamic_video['vtype'], + }) - video_id = match1(html, r"\"vid\":\"([^\"]+)") - title = match1(html, r"\"player__videoTitle\">.*?<h1.*?>(.*)<\/h1><\/div>") - if not video_id: - log.e("video_id not found, url:{}".format(url)) - return - video_info_url = get_video_url_from_video_id(video_id) - video_info = loads(get_content(video_info_url)) - if video_info.get("code", 1) != 0: - log.e("Get video info from {} error: server return code {}".format(video_info_url, video_info.get("code", 1))) - return - if not video_info.get("data", None): - log.e("Get video info from {} error: The server returns JSON value" - " without data or data is empty".format(video_info_url)) - return - if not video_info["data"].get("video_list", None): - log.e("Get video info from {} error: The server returns JSON value" - " without data.video_list or data.video_list is empty".format(video_info_url)) - return - if not video_info["data"]["video_list"].get("video_1", None): - log.e("Get video info from {} error: The server returns JSON value" - " without data.video_list.video_1 or data.video_list.video_1 is empty".format(video_info_url)) - return - bestQualityVideo = list(video_info["data"]["video_list"].keys())[-1] #There is not only video_1, there might be video_2 - size = int(video_info["data"]["video_list"][bestQualityVideo]["size"]) - print_info(site_info=site_info, title=title, type="mp4", size=size) # ???????????????mp4???????????? - if not info_only: - video_url = base64.b64decode(video_info["data"]["video_list"][bestQualityVideo]["main_url"].encode("utf-8")) - download_urls([video_url.decode("utf-8")], title, "mp4", size, output_dir, merge=merge, headers=headers, **kwargs) + return streams def ixigua_download_playlist_by_url(url, output_dir='.', merge=True, info_only=False, **kwargs): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1555/src/you_get/extractors/tiktok.py new/you-get-0.4.1602/src/you_get/extractors/tiktok.py --- old/you-get-0.4.1555/src/you_get/extractors/tiktok.py 2021-11-03 18:32:35.000000000 +0100 +++ new/you-get-0.4.1602/src/you_get/extractors/tiktok.py 2022-04-22 23:44:08.000000000 +0200 @@ -5,42 +5,37 @@ from ..common import * def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - referUrl = url.split('?')[0] - headers = fake_headers - - # trick or treat - html = get_content(url, headers=headers) - data = r1(r'<script id="__NEXT_DATA__".*?>(.*?)</script>', html) - info = json.loads(data) - wid = info['props']['initialProps']['$wid'] - cookie = 'tt_webid=%s; tt_webid_v2=%s' % (wid, wid) - - # here's the cookie - headers['Cookie'] = cookie - - # try again - html = get_content(url, headers=headers) - data = r1(r'<script id="__NEXT_DATA__".*?>(.*?)</script>', html) + while True: + m = re.match('https://([^/]+)(/.*)', url) + host = m.group(1) + if host == 'www.tiktok.com': # canonical URL reached + url = m.group(2).split('?')[0] + vid = url.split('/')[3] # should be a string of numbers + break + else: + url = get_location(url) + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0', + 'Accept-Encoding': 'gzip, deflate', + 'Accept': '*/*', + 'Connection': 'keep-alive' # important + } + + html = getHttps(host, url, headers=headers) + data = r1(r'window\[\'SIGI_STATE\'\]=(.*?);window\[\'SIGI_RETRY\'\]', html) or \ + r1(r'<script id="SIGI_STATE" type="application/json">(.*?)</script>', html) info = json.loads(data) - wid = info['props']['initialProps']['$wid'] - cookie = 'tt_webid=%s; tt_webid_v2=%s' % (wid, wid) - - videoData = info['props']['pageProps']['itemInfo']['itemStruct'] - videoId = videoData['id'] - videoUrl = videoData['video']['downloadAddr'] - uniqueId = videoData['author'].get('uniqueId') - nickName = videoData['author'].get('nickname') - - title = '%s [%s]' % (nickName or uniqueId, videoId) - - # we also need the referer - headers['Referer'] = referUrl + downloadAddr = info['ItemModule'][vid]['video']['downloadAddr'] + author = info['ItemModule'][vid]['author'] # same as uniqueId + nickname = info['UserModule']['users'][author]['nickname'] + title = '%s [%s]' % (nickname or author, vid) - mime, ext, size = url_info(videoUrl, headers=headers) + mime, ext, size = url_info(downloadAddr, headers=headers) print_info(site_info, title, mime, size) if not info_only: - download_urls([videoUrl], title, ext, size, output_dir=output_dir, merge=merge, headers=headers) + download_urls([downloadAddr], title, ext, size, output_dir=output_dir, merge=merge, headers=headers) site_info = "TikTok.com" download = tiktok_download diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1555/src/you_get/extractors/twitter.py new/you-get-0.4.1602/src/you_get/extractors/twitter.py --- old/you-get-0.4.1555/src/you_get/extractors/twitter.py 2021-11-03 18:32:35.000000000 +0100 +++ new/you-get-0.4.1602/src/you_get/extractors/twitter.py 2022-04-22 23:44:08.000000000 +0200 @@ -51,7 +51,12 @@ api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token}) info = json.loads(api_content) - if 'extended_entities' in info['globalObjects']['tweets'][item_id]: + if item_id not in info['globalObjects']['tweets']: + # something wrong here + log.w(info['timeline']['instructions'][0]['addEntries']['entries'][0]['content']['item']['content']['tombstone']['tombstoneInfo']['richText']['text']) + return + + elif 'extended_entities' in info['globalObjects']['tweets'][item_id]: # if the tweet contains media, download them media = info['globalObjects']['tweets'][item_id]['extended_entities']['media'] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1555/src/you_get/extractors/youtube.py new/you-get-0.4.1602/src/you_get/extractors/youtube.py --- old/you-get-0.4.1555/src/you_get/extractors/youtube.py 2021-11-03 18:32:35.000000000 +0100 +++ new/you-get-0.4.1602/src/you_get/extractors/youtube.py 2022-04-22 23:44:08.000000000 +0200 @@ -78,6 +78,7 @@ # - https://www.youtube.com/yts/jsbin/player_ias-vfl-jbnrr/da_DK/base.js # - https://www.youtube.com/s/player/0b643cd1/player_ias.vflset/sv_SE/base.js # - https://www.youtube.com/s/player/50e823fc/player_ias.vflset/sv_SE/base.js + # - https://www.youtube.com/s/player/3b5d5649/player_ias.vflset/sv_SE/base.js def tr_js(code): code = re.sub(r'function', r'def', code) # add prefix '_sig_' to prevent namespace pollution @@ -117,7 +118,9 @@ f2 = re.sub(r'\$', '_dollar', f2) code = code + 'global _sig_%s\n' % f2 + tr_js(f2def) - f1 = re.sub(r'(as|if|in|is|or)', r'_\1', f1) + # if f1 contains more than 2 characters, no need to do substitution + # FIXME: we probably shouldn't do any substitution here at all? + f1 = re.sub(r'^(as|if|in|is|or)$', r'_\1', f1) f1 = re.sub(r'\$', '_dollar', f1) code = code + '_sig=_sig_%s(s)' % f1 exec(code, globals(), locals()) @@ -141,6 +144,7 @@ """ return match1(url, r'youtu\.be/([^?/]+)') or \ match1(url, r'youtube\.com/embed/([^/?]+)') or \ + match1(url, r'youtube\.com/shorts/([^/?]+)') or \ match1(url, r'youtube\.com/v/([^/?]+)') or \ match1(url, r'youtube\.com/watch/([^/?]+)') or \ parse_query_param(url, 'v') or \ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1555/src/you_get/extractors/zhihu.py new/you-get-0.4.1602/src/you_get/extractors/zhihu.py --- old/you-get-0.4.1555/src/you_get/extractors/zhihu.py 2021-11-03 18:32:35.000000000 +0100 +++ new/you-get-0.4.1602/src/you_get/extractors/zhihu.py 2022-04-22 23:44:08.000000000 +0200 @@ -31,8 +31,8 @@ play_list = video_info["playlist"] # first High Definition - # second Second Standard Definition - # third ld. What is ld ? + # second Standard Definition + # third Low Definition # finally continue data = play_list.get("hd", play_list.get("sd", play_list.get("ld", None))) if not data: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1555/src/you_get/processor/ffmpeg.py new/you-get-0.4.1602/src/you_get/processor/ffmpeg.py --- old/you-get-0.4.1555/src/you_get/processor/ffmpeg.py 2021-11-03 18:32:35.000000000 +0100 +++ new/you-get-0.4.1602/src/you_get/processor/ffmpeg.py 2022-04-22 23:44:08.000000000 +0200 @@ -93,7 +93,7 @@ # Use concat demuxer on FFmpeg >= 1.1 if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)): concat_list = generate_concat_list(files, output) - params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1', + params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '0', '-i', concat_list, '-c', 'copy'] params.extend(['--', output]) if subprocess.call(params, stdin=STDIN) == 0: @@ -149,7 +149,7 @@ # Use concat demuxer on FFmpeg >= 1.1 if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)): concat_list = generate_concat_list(files, output) - params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1', + params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '0', '-i', concat_list, '-c', 'copy', '-bsf:a', 'aac_adtstoasc'] params.extend(['--', output]) @@ -203,7 +203,7 @@ # Use concat demuxer on FFmpeg >= 1.1 if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)): concat_list = generate_concat_list(files, output) - params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1', + params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '0', '-i', concat_list, '-c', 'copy', '-bsf:a', 'aac_adtstoasc'] params.extend(['--', output]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1555/src/you_get/version.py new/you-get-0.4.1602/src/you_get/version.py --- old/you-get-0.4.1555/src/you_get/version.py 2021-11-03 18:32:35.000000000 +0100 +++ new/you-get-0.4.1602/src/you_get/version.py 2022-04-22 23:44:08.000000000 +0200 @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1555' +__version__ = '0.4.1602' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1555/tests/test.py new/you-get-0.4.1602/tests/test.py --- old/you-get-0.4.1555/tests/test.py 2021-11-03 18:32:35.000000000 +0100 +++ new/you-get-0.4.1602/tests/test.py 2022-04-22 23:44:08.000000000 +0200 @@ -29,10 +29,10 @@ 'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True ) youtube.download('http://youtu.be/pzKerr0JIPA', info_only=True) - youtube.download( - 'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa - info_only=True - ) + #youtube.download( + # 'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa + # info_only=True + #) #youtube.download( # 'https://www.youtube.com/watch?v=Fpr4fQSh1cc', info_only=True #) @@ -40,6 +40,9 @@ def test_acfun(self): acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True) + #def test_bilibili(self): + # bilibili.download('https://www.bilibili.com/video/BV1sL4y177sC', info_only=True) + #def test_soundcloud(self): ## single song #soundcloud.download( @@ -50,10 +53,9 @@ # 'https://soundcloud.com/anthony-flieger/sets/cytus', info_only=True #) - #def tests_tiktok(self): - # tiktok.download('https://www.tiktok.com/@nmb48_official/video/6850796940293164290', info_only=True) - # tiktok.download('https://t.tiktok.com/i18n/share/video/6850796940293164290/', info_only=True) - # tiktok.download('https://vt.tiktok.com/UGJR4R/', info_only=True) + def test_tiktok(self): + tiktok.download('https://www.tiktok.com/@nmb48_official/video/6850796940293164290', info_only=True) + tiktok.download('https://t.tiktok.com/i18n/share/video/6850796940293164290/', info_only=True) if __name__ == '__main__': diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1555/you-get.json new/you-get-0.4.1602/you-get.json --- old/you-get-0.4.1555/you-get.json 2021-11-03 18:32:35.000000000 +0100 +++ new/you-get-0.4.1602/you-get.json 2022-04-22 23:44:08.000000000 +0200 @@ -18,9 +18,6 @@ "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.2", - "Programming Language :: Python :: 3.3", - "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7",
