Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package you-get for openSUSE:Factory checked in at 2024-08-20 16:13:38 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/you-get (Old) and /work/SRC/openSUSE:Factory/.you-get.new.2698 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "you-get" Tue Aug 20 16:13:38 2024 rev:50 rq:1194773 version:0.4.1730 Changes: -------- --- /work/SRC/openSUSE:Factory/you-get/you-get.changes 2024-07-15 19:49:33.524303251 +0200 +++ /work/SRC/openSUSE:Factory/.you-get.new.2698/you-get.changes 2024-08-20 16:14:08.460020028 +0200 @@ -1,0 +2,16 @@ +Mon Aug 19 20:18:30 UTC 2024 - Luigi Baldoni <aloi...@gmx.com> + +- Update to version 0.4.1730 + * [vimeo] cleanup + * [tests] disable test_youtube temporarily + * fix "SyntaxWarning: invalid escape sequence" for Python 3.12 + * [youtube] update self.ua (fix extraction) + * [youtube] add self.ua + * [tiktok] support short URLs (vt.tiktok.com) + * Add Flox as an installation option + * [instagram] fix extraction + * Add ytInitialPlayerResponse checker to let user easily know + problem + * [youtube] sloppy fix + +------------------------------------------------------------------- Old: ---- you-get-0.4.1718.tar.gz New: ---- you-get-0.4.1730.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ you-get.spec ++++++ --- /var/tmp/diff_new_pack.L2Jaa4/_old 2024-08-20 16:14:09.532064029 +0200 +++ /var/tmp/diff_new_pack.L2Jaa4/_new 2024-08-20 16:14:09.536064193 +0200 @@ -17,7 +17,7 @@ Name: you-get -Version: 0.4.1718 +Version: 0.4.1730 Release: 0 Summary: Dumb downloader that scrapes the web License: MIT ++++++ you-get-0.4.1718.tar.gz -> you-get-0.4.1730.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/README.md new/you-get-0.4.1730/README.md --- old/you-get-0.4.1718/README.md 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/README.md 2024-08-19 21:53:12.000000000 +0200 @@ -127,6 +127,14 @@ # pkg install you-get ``` +### Option 7: Flox (Mac, Linux, and Windows WSL) + +You can install `you-get` easily via: + +``` +$ flox install you-get +``` + ### Shell completion Completion definitions for Bash, Fish and Zsh can be found in [`contrib/completion`](https://github.com/soimort/you-get/tree/develop/contrib/completion). Please consult your shell's manual for how to take advantage of them. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/common.py new/you-get-0.4.1730/src/you_get/common.py --- old/you-get-0.4.1718/src/you_get/common.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/common.py 2024-08-19 21:53:12.000000000 +0200 @@ -145,7 +145,7 @@ 'Accept-Charset': 'UTF-8,*;q=0.5', 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'en-US,en;q=0.8', - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.2592.68' # Latest Edge + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/126.0.2592.113' # Latest Edge } if sys.stdout.isatty(): @@ -715,7 +715,7 @@ bar.done() if not force and auto_rename: path, ext = os.path.basename(filepath).rsplit('.', 1) - finder = re.compile(' \([1-9]\d*?\)$') + finder = re.compile(r' \([1-9]\d*?\)$') if (finder.search(path) is None): thisfile = path + ' (1).' + ext else: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/acfun.py new/you-get-0.4.1730/src/you_get/extractors/acfun.py --- old/you-get-0.4.1718/src/you_get/extractors/acfun.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/acfun.py 2024-08-19 21:53:12.000000000 +0200 @@ -32,7 +32,7 @@ self.title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0] currentVideoInfo = json_data.get('currentVideoInfo') - elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", self.url): + elif re.match(r"https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", self.url): html = get_content(self.url, headers=fake_headers) tag_script = match1(html, r'<script>\s*window\.pageInfo([^<]+)</script>') json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1] @@ -180,7 +180,7 @@ title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0] currentVideoInfo = json_data.get('currentVideoInfo') m3u8_url = getM3u8UrlFromCurrentVideoInfo(currentVideoInfo) - elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", url): + elif re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)', url): html = get_content(url, headers=fake_headers) tag_script = match1(html, r'<script>\s*window\.pageInfo([^<]+)</script>') json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/baidu.py new/you-get-0.4.1730/src/you_get/extractors/baidu.py --- old/you-get-0.4.1718/src/you_get/extractors/baidu.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/baidu.py 2024-08-19 21:53:12.000000000 +0200 @@ -116,7 +116,7 @@ id = r1(r'https?://music.baidu.com/album/(\d+)', url) baidu_download_album(id, output_dir, merge, info_only) - elif re.match('https?://music.baidu.com/song/\d+', url): + elif re.match(r'https?://music.baidu.com/song/\d+', url): id = r1(r'https?://music.baidu.com/song/(\d+)', url) baidu_download_song(id, output_dir, merge, info_only) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/coub.py new/you-get-0.4.1730/src/you_get/extractors/coub.py --- old/you-get-0.4.1718/src/you_get/extractors/coub.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/coub.py 2024-08-19 21:53:12.000000000 +0200 @@ -58,7 +58,7 @@ def get_title_and_urls(json_data): - title = legitimize(re.sub('[\s*]', "_", json_data['title'])) + title = legitimize(re.sub(r'[\s*]', "_", json_data['title'])) video_info = json_data['file_versions']['html5']['video'] if 'high' not in video_info: if 'med' not in video_info: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/douban.py new/you-get-0.4.1730/src/you_get/extractors/douban.py --- old/you-get-0.4.1718/src/you_get/extractors/douban.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/douban.py 2024-08-19 21:53:12.000000000 +0200 @@ -10,7 +10,7 @@ if re.match(r'https?://movie', url): title = match1(html, 'name="description" content="([^"]+)') - tid = match1(url, 'trailer/(\d+)') + tid = match1(url, r'trailer/(\d+)') real_url = 'https://movie.douban.com/trailer/video_url?tid=%s' % tid type, ext, size = url_info(real_url) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/embed.py new/you-get-0.4.1730/src/you_get/extractors/embed.py --- old/you-get-0.4.1718/src/you_get/extractors/embed.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/embed.py 2024-08-19 21:53:12.000000000 +0200 @@ -20,18 +20,18 @@ """ refer to http://open.youku.com/tools """ -youku_embed_patterns = [ 'youku\.com/v_show/id_([a-zA-Z0-9=]+)', - 'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf', - 'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)', - 'player\.youku\.com/embed/([a-zA-Z0-9=]+)', - 'YKU.Player\(\'[a-zA-Z0-9]+\',{ client_id: \'[a-zA-Z0-9]+\', vid: \'([a-zA-Z0-9]+)\'' +youku_embed_patterns = [ r'youku\.com/v_show/id_([a-zA-Z0-9=]+)', + r'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf', + r'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)', + r'player\.youku\.com/embed/([a-zA-Z0-9=]+)', + r'YKU.Player\(\'[a-zA-Z0-9]+\',{ client_id: \'[a-zA-Z0-9]+\', vid: \'([a-zA-Z0-9]+)\'' ] """ http://www.tudou.com/programs/view/html5embed.action?type=0&code=3LS_URGvl54&lcode=&resourceId=0_06_05_99 """ -tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([a-zA-Z0-9_-]+)\&', - 'www\.tudou\.com/v/([a-zA-Z0-9_-]+)/[^"]*v\.swf' +tudou_embed_patterns = [ r'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([a-zA-Z0-9_-]+)\&', + r'www\.tudou\.com/v/([a-zA-Z0-9_-]+)/[^"]*v\.swf' ] """ @@ -39,18 +39,18 @@ """ tudou_api_patterns = [ ] -iqiyi_embed_patterns = [ 'player\.video\.qiyi\.com/([^/]+)/[^/]+/[^/]+/[^/]+\.swf[^"]+tvId=(\d+)' ] +iqiyi_embed_patterns = [ r'player\.video\.qiyi\.com/([^/]+)/[^/]+/[^/]+/[^/]+\.swf[^"]+tvId=(\d+)' ] -netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ] +netease_embed_patterns = [ r'(http://\w+\.163\.com/movie/[^\'"]+)' ] -vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ] +vimeo_embed_patters = [ r'player\.vimeo\.com/video/(\d+)' ] -dailymotion_embed_patterns = [ 'www\.dailymotion\.com/embed/video/(\w+)' ] +dailymotion_embed_patterns = [ r'www\.dailymotion\.com/embed/video/(\w+)' ] """ check the share button on http://www.bilibili.com/video/av5079467/ """ -bilibili_embed_patterns = [ 'static\.hdslb\.com/miniloader\.swf.*aid=(\d+)' ] +bilibili_embed_patterns = [ r'static\.hdslb\.com/miniloader\.swf.*aid=(\d+)' ] ''' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/funshion.py new/you-get-0.4.1730/src/you_get/extractors/funshion.py --- old/you-get-0.4.1718/src/you_get/extractors/funshion.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/funshion.py 2024-08-19 21:53:12.000000000 +0200 @@ -84,7 +84,7 @@ moz_ec_name = search_dict(sym_to_name, 'mozEcName') push = search_dict(sym_to_name, 'push') - patt = '{}\.{}\("(.+?)"\)'.format(moz_ec_name, push) + patt = r'{}\.{}\("(.+?)"\)'.format(moz_ec_name, push) ec_list = re.findall(patt, code) [magic_list.append(sym_to_name[ec]) for ec in ec_list] return magic_list diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/instagram.py new/you-get-0.4.1730/src/you_get/extractors/instagram.py --- old/you-get-0.4.1718/src/you_get/extractors/instagram.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/instagram.py 2024-08-19 21:53:12.000000000 +0200 @@ -5,8 +5,13 @@ from ..common import * def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.2592.87', + 'sec-fetch-mode': 'navigate' # important + } + url = r1(r'([^?]*)', url) - cont = get_content(url, headers=fake_headers) + cont = get_content(url, headers=headers) vid = r1(r'instagram.com/\w+/([^/]+)', url) description = r1(r'<meta property="og:title" content="([^"]*)"', cont) or \ @@ -15,6 +20,8 @@ appId = r1(r'"appId":"(\d+)"', cont) media_id = r1(r'"media_id":"(\d+)"', cont) + logging.debug('appId: %s' % appId) + logging.debug('media_id: %s' % media_id) api_url = 'https://i.instagram.com/api/v1/media/%s/info/' % media_id try: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/ku6.py new/you-get-0.4.1730/src/you_get/extractors/ku6.py --- old/you-get-0.4.1718/src/you_get/extractors/ku6.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/ku6.py 2024-08-19 21:53:12.000000000 +0200 @@ -50,7 +50,7 @@ vid = vid.group(1) else: raise Exception('Unsupported url') - this_meta = re.search('"?'+vid+'"?:\{(.+?)\}', meta) + this_meta = re.search('"?'+vid+r'"?:\{(.+?)\}', meta) if this_meta is not None: this_meta = this_meta.group(1) title = re.search('title:"(.+?)"', this_meta).group(1) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/kugou.py new/you-get-0.4.1730/src/you_get/extractors/kugou.py --- old/you-get-0.4.1718/src/you_get/extractors/kugou.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/kugou.py 2024-08-19 21:53:12.000000000 +0200 @@ -32,8 +32,8 @@ def kugou_download_by_hash(url, output_dir='.', merge=True, info_only=False): # sample # url_sample:http://www.kugou.com/song/#hash=93F7D2FC6E95424739448218B591AEAF&album_id=9019462 - hash_val = match1(url, 'hash=(\w+)') - album_id = match1(url, 'album_id=(\d+)') + hash_val = match1(url, r'hash=(\w+)') + album_id = match1(url, r'album_id=(\d+)') if not album_id: album_id = 123 html = get_html("http://www.kugou.com/yy/index.php?r=play/getdata&hash={}&album_id={}&mid=123".format(hash_val, album_id)) @@ -60,7 +60,7 @@ res = pattern.findall(html) for song in res: res = get_html(song) - pattern_url = re.compile('"hash":"(\w+)".*"album_id":(\d)+') + pattern_url = re.compile(r'"hash":"(\w+)".*"album_id":(\d)+') hash_val, album_id = res = pattern_url.findall(res)[0] if not album_id: album_id = 123 @@ -70,7 +70,7 @@ # album sample: http://www.kugou.com/yy/album/single/1645030.html elif url.lower().find('album') != -1: html = get_html(url) - pattern = re.compile('var data=(\[.*?\]);') + pattern = re.compile(r'var data=(\[.*?\]);') res = pattern.findall(html)[0] for v in json.loads(res): urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v['hash'], v['album_id'])) @@ -79,7 +79,7 @@ # playlist sample:http://www.kugou.com/yy/special/single/487279.html else: html = get_html(url) - pattern = re.compile('data="(\w+)\|(\d+)"') + pattern = re.compile(r'data="(\w+)\|(\d+)"') for v in pattern.findall(html): urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v[0], v[1])) print('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v[0], v[1])) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/kuwo.py new/you-get-0.4.1730/src/you_get/extractors/kuwo.py --- old/you-get-0.4.1718/src/you_get/extractors/kuwo.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/kuwo.py 2024-08-19 21:53:12.000000000 +0200 @@ -18,7 +18,7 @@ def kuwo_playlist_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): html=get_content(url) - matched=set(re.compile("yinyue/(\d+)").findall(html))#reduce duplicated + matched=set(re.compile(r"yinyue/(\d+)").findall(html))#reduce duplicated for rid in matched: kuwo_download_by_rid(rid,output_dir,merge,info_only) @@ -26,7 +26,7 @@ def kuwo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): if "www.kuwo.cn/yinyue" in url: - rid=match1(url,'yinyue/(\d+)') + rid=match1(url, r'yinyue/(\d+)') kuwo_download_by_rid(rid,output_dir, merge, info_only) else: kuwo_playlist_download(url,output_dir,merge,info_only) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/mgtv.py new/you-get-0.4.1730/src/you_get/extractors/mgtv.py --- old/you-get-0.4.1718/src/you_get/extractors/mgtv.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/mgtv.py 2024-08-19 21:53:12.000000000 +0200 @@ -44,11 +44,11 @@ def get_vid_from_url(url): """Extracts video ID from URL. """ - vid = match1(url, 'https?://www.mgtv.com/(?:b|l)/\d+/(\d+).html') + vid = match1(url, r'https?://www.mgtv.com/(?:b|l)/\d+/(\d+).html') if not vid: - vid = match1(url, 'https?://www.mgtv.com/hz/bdpz/\d+/(\d+).html') + vid = match1(url, r'https?://www.mgtv.com/hz/bdpz/\d+/(\d+).html') if not vid: - vid = match1(url, 'https?://www.mgtv.com/s/(\d+).html') + vid = match1(url, r'https?://www.mgtv.com/s/(\d+).html') return vid # ---------------------------------------------------------------------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/qq.py new/you-get-0.4.1730/src/you_get/extractors/qq.py --- old/you-get-0.4.1718/src/you_get/extractors/qq.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/qq.py 2024-08-19 21:53:12.000000000 +0200 @@ -83,7 +83,7 @@ playurl = json_data['data']['playurl'] videourl = json_data['data']['playurl_video'] real_url = playurl if playurl else videourl - real_url = real_url.replace('\/', '/') + real_url = real_url.replace(r'\/', '/') ksong_mid = json_data['data']['ksong_mid'] lyric_url = 'http://cgi.kg.qq.com/fcgi-bin/fcg_lyric?jsonpCallback=jsopgetlrcdata&outCharset=utf-8&ksongmid=' + ksong_mid diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/sohu.py new/you-get-0.4.1730/src/you_get/extractors/sohu.py --- old/you-get-0.4.1718/src/you_get/extractors/sohu.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/sohu.py 2024-08-19 21:53:12.000000000 +0200 @@ -23,7 +23,7 @@ def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_proxy=None, **kwargs): if re.match(r'http://share.vrs.sohu.com', url): - vid = r1('id=(\d+)', url) + vid = r1(r'id=(\d+)', url) else: html = get_html(url) vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html) or r1(r'bid=(\d+)', html) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/tiktok.py new/you-get-0.4.1730/src/you_get/extractors/tiktok.py --- old/you-get-0.4.1718/src/you_get/extractors/tiktok.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/tiktok.py 2024-08-19 21:53:12.000000000 +0200 @@ -16,6 +16,8 @@ m = re.match('(https?://)?([^/]+)(/.*)', url) host = m.group(2) if host != 'www.tiktok.com': # non-canonical URL + if host == 'vt.tiktok.com': # short URL + url = get_location(url) vid = r1(r'/video/(\d+)', url) url = 'https://www.tiktok.com/@/video/%s/' % vid host = 'www.tiktok.com' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/tudou.py new/you-get-0.4.1730/src/you_get/extractors/tudou.py --- old/you-get-0.4.1718/src/you_get/extractors/tudou.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/tudou.py 2024-08-19 21:53:12.000000000 +0200 @@ -71,7 +71,7 @@ # obsolete? def parse_playlist(url): - aid = r1('http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url) + aid = r1(r'http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url) html = get_decoded_html(url) if not aid: aid = r1(r"aid\s*[:=]\s*'(\d+)'", html) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/tumblr.py new/you-get-0.4.1730/src/you_get/extractors/tumblr.py --- old/you-get-0.4.1718/src/you_get/extractors/tumblr.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/tumblr.py 2024-08-19 21:53:12.000000000 +0200 @@ -34,7 +34,7 @@ post_data_raw='{"eu_resident":true,"gdpr_is_acceptable_age":true,"gdpr_consent_core":true,"gdpr_consent_first_party_ads":true,"gdpr_consent_third_party_ads":true,"gdpr_consent_search_history":true,"redirect_to":"%s","gdpr_reconsent":false}' % url) page = get_html(url, faker=True) - html = parse.unquote(page).replace('\/', '/') + html = parse.unquote(page).replace(r'\/', '/') feed = r1(r'<meta property="og:type" content="tumblr-feed:(\w+)" />', html) if feed in ['photo', 'photoset', 'entry'] or feed is None: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/twitter.py new/you-get-0.4.1730/src/you_get/extractors/twitter.py --- old/you-get-0.4.1718/src/you_get/extractors/twitter.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/twitter.py 2024-08-19 21:53:12.000000000 +0200 @@ -33,7 +33,7 @@ **kwargs) return - m = re.match('^https?://(mobile\.)?(x|twitter)\.com/([^/]+)/status/(\d+)', url) + m = re.match(r'^https?://(mobile\.)?(x|twitter)\.com/([^/]+)/status/(\d+)', url) assert m screen_name, item_id = m.group(3), m.group(4) page_title = "{} [{}]".format(screen_name, item_id) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/universal.py new/you-get-0.4.1730/src/you_get/extractors/universal.py --- old/you-get-0.4.1718/src/you_get/extractors/universal.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/universal.py 2024-08-19 21:53:12.000000000 +0200 @@ -48,7 +48,7 @@ else: return - hls_urls = re.findall(r'(https?://[^;"\'\\]+' + '\.m3u8?' + + hls_urls = re.findall(r'(https?://[^;"\'\\]+' + r'\.m3u8?' + r'[^;"\'\\]*)', page) if hls_urls: try: @@ -64,14 +64,14 @@ return # most common media file extensions on the Internet - media_exts = ['\.flv', '\.mp3', '\.mp4', '\.webm', - '[-_]1\d\d\d\.jpe?g', '[-_][6-9]\d\d\.jpe?g', # tumblr - '[-_]1\d\d\dx[6-9]\d\d\.jpe?g', - '[-_][6-9]\d\dx1\d\d\d\.jpe?g', - '[-_][6-9]\d\dx[6-9]\d\d\.jpe?g', - 's1600/[\w%]+\.jpe?g', # blogger - 'blogger\.googleusercontent\.com/img/a/\w*', # blogger - 'img[6-9]\d\d/[\w%]+\.jpe?g' # oricon? + media_exts = [r'\.flv', r'\.mp3', r'\.mp4', r'\.webm', + r'[-_]1\d\d\d\.jpe?g', r'[-_][6-9]\d\d\.jpe?g', # tumblr + r'[-_]1\d\d\dx[6-9]\d\d\.jpe?g', + r'[-_][6-9]\d\dx1\d\d\d\.jpe?g', + r'[-_][6-9]\d\dx[6-9]\d\d\.jpe?g', + r's1600/[\w%]+\.jpe?g', # blogger + r'blogger\.googleusercontent\.com/img/a/\w*', # blogger + r'img[6-9]\d\d/[\w%]+\.jpe?g' # oricon? ] urls = [] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/vimeo.py new/you-get-0.4.1730/src/you_get/extractors/vimeo.py --- old/you-get-0.4.1718/src/you_get/extractors/vimeo.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/vimeo.py 2024-08-19 21:53:12.000000000 +0200 @@ -102,7 +102,7 @@ pos = 0 while pos < len(lines): if lines[pos].startswith('#EXT-X-STREAM-INF'): - patt = 'RESOLUTION=(\d+)x(\d+)' + patt = r'RESOLUTION=(\d+)x(\d+)' hit = re.search(patt, lines[pos]) if hit is None: continue @@ -132,34 +132,6 @@ def vimeo_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs): - ''' - try: - # normal Vimeo video - html = get_content('https://vimeo.com/' + id) - cfg_patt = r'clip_page_config\s*=\s*(\{.+?\});' - cfg = json.loads(match1(html, cfg_patt)) - video_page = get_content(cfg['player']['config_url'], headers=fake_headers) - title = cfg['clip']['title'] - info = loads(video_page) - except: - # embedded player - referer may be required - if 'referer' in kwargs: - fake_headers['Referer'] = kwargs['referer'] - - video_page = get_content('http://player.vimeo.com/video/%s' % id, headers=fake_headers) - title = r1(r'<title>([^<]+)</title>', video_page) - info = loads(match1(video_page, r'var t=(\{.+?\});')) - - streams = info['request']['files']['progressive'] - streams = sorted(streams, key=lambda i: i['height']) - url = streams[-1]['url'] - - type, ext, size = url_info(url, faker=True) - - print_info(site_info, title, type, size) - if not info_only: - download_urls([url], title, ext, size, output_dir, merge=merge, faker=True) - ''' site = VimeoExtractor() site.download_by_vid(id, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/youku.py new/you-get-0.4.1730/src/you_get/extractors/youku.py --- old/you-get-0.4.1718/src/you_get/extractors/youku.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/youku.py 2024-08-19 21:53:12.000000000 +0200 @@ -242,7 +242,7 @@ def youku_download_playlist_by_url(url, **kwargs): video_page_pt = 'https?://v.youku.com/v_show/id_([A-Za-z0-9=]+)' - js_cb_pt = '\(({.+})\)' + js_cb_pt = r'\(({.+})\)' if re.match(video_page_pt, url): youku_obj = Youku() youku_obj.url = url @@ -272,14 +272,14 @@ page = get_content(url) show_id = re.search(r'showid:"(\d+)"', page).group(1) ep = 'http://list.youku.com/show/module?id={}&tab=showInfo&callback=jQuery'.format(show_id) - xhr_page = get_content(ep).replace('\/', '/').replace('\"', '"') + xhr_page = get_content(ep).replace(r'\/', '/').replace(r'\"', '"') video_url = re.search(r'(v.youku.com/v_show/id_(?:[A-Za-z0-9=]+)\.html)', xhr_page).group(1) youku_download_playlist_by_url('http://'+video_url, **kwargs) return - elif re.match('https?://list.youku.com/albumlist/show/id_(\d+)\.html', url): + elif re.match(r'https?://list.youku.com/albumlist/show/id_(\d+)\.html', url): # http://list.youku.com/albumlist/show/id_2336634.html # UGC playlist - list_id = re.search('https?://list.youku.com/albumlist/show/id_(\d+)\.html', url).group(1) + list_id = re.search(r'https?://list.youku.com/albumlist/show/id_(\d+)\.html', url).group(1) ep = 'http://list.youku.com/albumlist/items?id={}&page={}&size=20&ascending=1&callback=tuijsonp6' first_u = ep.format(list_id, 1) @@ -294,7 +294,7 @@ for i in range(2, req_cnt+2): req_u = ep.format(list_id, i) xhr_page = get_content(req_u) - json_data = json.loads(re.search(js_cb_pt, xhr_page).group(1).replace('\/', '/')) + json_data = json.loads(re.search(js_cb_pt, xhr_page).group(1).replace(r'\/', '/')) xhr_html = json_data['html'] page_videos = re.findall(r'(v.youku.com/v_show/id_(?:[A-Za-z0-9=]+)\.html)', xhr_html) v_urls.extend(page_videos) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/extractors/youtube.py new/you-get-0.4.1730/src/you_get/extractors/youtube.py --- old/you-get-0.4.1718/src/you_get/extractors/youtube.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/extractors/youtube.py 2024-08-19 21:53:12.000000000 +0200 @@ -175,7 +175,19 @@ pass # FIXME: show DASH stream sizes (by default) for playlist videos + def check_playability_response(self, ytInitialPlayerResponse): + STATUS_OK = "OK" + + playerResponseStatus = ytInitialPlayerResponse["playabilityStatus"]["status"] + if playerResponseStatus != STATUS_OK: + reason = ytInitialPlayerResponse["playabilityStatus"].get("reason", "") + raise AssertionError( + f"Server refused to provide video details. Returned status: {playerResponseStatus}, reason: {reason}." + ) + def prepare(self, **kwargs): + self.ua = 'Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.6533.103 Mobile Safari/537.36' + assert self.url or self.vid if not self.vid and self.url: @@ -185,15 +197,15 @@ self.download_playlist_by_url(self.url, **kwargs) exit(0) - if re.search('\Wlist=', self.url) and not kwargs.get('playlist'): + if re.search(r'\Wlist=', self.url) and not kwargs.get('playlist'): log.w('This video is from a playlist. (use --playlist to download all videos in the playlist.)') # Extract from video page logging.debug('Extracting from the video page...') - video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid) + video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid, headers={'User-Agent': self.ua}) try: - jsUrl = re.search('([^"]*/base\.js)"', video_page).group(1) + jsUrl = re.search(r'([^"]*/base\.js)"', video_page).group(1) except: log.wtf('[Failed] Unable to find base.js on the video page') self.html5player = 'https://www.youtube.com' + jsUrl @@ -201,7 +213,8 @@ self.js = get_content(self.html5player).replace('\n', ' ') logging.debug('Loading ytInitialPlayerResponse...') - ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});(\n|</script>)', video_page).group(1)) + ytInitialPlayerResponse = json.loads(re.search(r'ytInitialPlayerResponse\s*=\s*([^\n]+?});(\n|</script>|var )', video_page).group(1)) + self.check_playability_response(ytInitialPlayerResponse) # Get the video title self.title = ytInitialPlayerResponse["videoDetails"]["title"] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/src/you_get/version.py new/you-get-0.4.1730/src/you_get/version.py --- old/you-get-0.4.1718/src/you_get/version.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/src/you_get/version.py 2024-08-19 21:53:12.000000000 +0200 @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1718' +__version__ = '0.4.1730' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1718/tests/test.py new/you-get-0.4.1730/tests/test.py --- old/you-get-0.4.1718/tests/test.py 2024-07-14 08:29:25.000000000 +0200 +++ new/you-get-0.4.1730/tests/test.py 2024-08-19 21:53:12.000000000 +0200 @@ -27,18 +27,18 @@ info_only=True ) - def test_youtube(self): - youtube.download( - 'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True - ) + #def test_youtube(self): + #youtube.download( + # 'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True + #) #youtube.download('http://youtu.be/pzKerr0JIPA', info_only=True) #youtube.download( # 'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa # info_only=True #) - youtube.download( - 'https://www.youtube.com/watch?v=oRdxUFDoQe0', info_only=True - ) + #youtube.download( + # 'https://www.youtube.com/watch?v=oRdxUFDoQe0', info_only=True + #) def test_acfun(self): acfun.download('https://www.acfun.cn/v/ac44560432', info_only=True) @@ -57,12 +57,14 @@ #) def test_tiktok(self): - tiktok.download('https://www.tiktok.com/@nmb48_official/video/6850796940293164290', info_only=True) - tiktok.download('https://www.tiktok.com/@/video/6850796940293164290', info_only=True) - tiktok.download('https://t.tiktok.com/i18n/share/video/6850796940293164290/', info_only=True) + tiktok.download('https://www.tiktok.com/@zukky_48/video/7398162058153315605', info_only=True) + tiktok.download('https://www.tiktok.com/@/video/7398162058153315605', info_only=True) + tiktok.download('https://t.tiktok.com/i18n/share/video/7398162058153315605/', info_only=True) + tiktok.download('https://vt.tiktok.com/ZSYKjKt6M/', info_only=True) def test_twitter(self): twitter.download('https://twitter.com/elonmusk/status/1530516552084234244', info_only=True) + twitter.download('https://x.com/elonmusk/status/1530516552084234244', info_only=True) def test_weibo(self): miaopai.download('https://video.weibo.com/show?fid=1034:4825403706245135', info_only=True)