Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package you-get for openSUSE:Factory checked in at 2021-07-12 01:25:13 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/you-get (Old) and /work/SRC/openSUSE:Factory/.you-get.new.2625 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "you-get" Mon Jul 12 01:25:13 2021 rev:39 rq:905695 version:0.4.1536 Changes: -------- --- /work/SRC/openSUSE:Factory/you-get/you-get.changes 2021-06-02 22:12:31.060111934 +0200 +++ /work/SRC/openSUSE:Factory/.you-get.new.2625/you-get.changes 2021-07-12 01:25:34.452983300 +0200 @@ -1,0 +2,5 @@ +Sun Jul 11 17:18:48 UTC 2021 - Luigi Baldoni <aloi...@gmx.com> + +- Update to version 0.4.1536 (no changelog) + +------------------------------------------------------------------- Old: ---- you-get-0.4.1527.tar.gz New: ---- you-get-0.4.1536.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ you-get.spec ++++++ --- /var/tmp/diff_new_pack.mV2ahA/_old 2021-07-12 01:25:34.920979702 +0200 +++ /var/tmp/diff_new_pack.mV2ahA/_new 2021-07-12 01:25:34.924979671 +0200 @@ -17,7 +17,7 @@ Name: you-get -Version: 0.4.1527 +Version: 0.4.1536 Release: 0 Summary: Dumb downloader that scrapes the web License: MIT ++++++ you-get-0.4.1527.tar.gz -> you-get-0.4.1536.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1527/src/you_get/common.py new/you-get-0.4.1536/src/you_get/common.py --- old/you-get-0.4.1527/src/you_get/common.py 2021-06-01 18:33:26.000000000 +0200 +++ new/you-get-0.4.1536/src/you_get/common.py 2021-07-11 18:46:41.000000000 +0200 @@ -433,8 +433,17 @@ req = request.Request(url, headers=headers) if cookies: - cookies.add_cookie_header(req) - req.headers.update(req.unredirected_hdrs) + # NOTE: Do not use cookies.add_cookie_header(req) + # #HttpOnly_ cookies were not supported by CookieJar and MozillaCookieJar properly until python 3.10 + # See also: + # - https://github.com/python/cpython/pull/17471 + # - https://bugs.python.org/issue2190 + # Here we add cookies to the request headers manually + cookie_strings = [] + for cookie in list(cookies): + cookie_strings.append(cookie.name + '=' + cookie.value) + cookie_headers = {'Cookie': '; '.join(cookie_strings)} + req.headers.update(cookie_headers) response = urlopen_with_retry(req) data = response.read() @@ -477,8 +486,17 @@ req = request.Request(url, headers=headers) if cookies: - cookies.add_cookie_header(req) - req.headers.update(req.unredirected_hdrs) + # NOTE: Do not use cookies.add_cookie_header(req) + # #HttpOnly_ cookies were not supported by CookieJar and MozillaCookieJar properly until python 3.10 + # See also: + # - https://github.com/python/cpython/pull/17471 + # - https://bugs.python.org/issue2190 + # Here we add cookies to the request headers manually + cookie_strings = [] + for cookie in list(cookies): + cookie_strings.append(cookie.name + '=' + cookie.value) + cookie_headers = {'Cookie': '; '.join(cookie_strings)} + req.headers.update(cookie_headers) if kwargs.get('post_data_raw'): post_data_enc = bytes(kwargs['post_data_raw'], 'utf-8') else: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1527/src/you_get/extractors/douyin.py new/you-get-0.4.1536/src/you_get/extractors/douyin.py --- old/you-get-0.4.1527/src/you_get/extractors/douyin.py 2021-06-01 18:33:26.000000000 +0200 +++ new/you-get-0.4.1536/src/you_get/extractors/douyin.py 2021-07-11 18:46:41.000000000 +0200 @@ -2,6 +2,7 @@ import re import json +from urllib.parse import unquote from ..common import ( url_size, @@ -18,17 +19,17 @@ def douyin_download_by_url(url, **kwargs): page_content = get_content(url, headers=fake_headers) - match_rule = re.compile(r'var data = \[(.*?)\];') - video_info = json.loads(match_rule.findall(page_content)[0]) - video_url = video_info['video']['play_addr']['url_list'][0] - # fix: https://www.douyin.com/share/video/6553248251821165832 - # if there is no title, use desc - cha_list = video_info['cha_list'] - if cha_list: - title = cha_list[0]['cha_name'] - else: - title = video_info['desc'] + # The video player and video source are rendered client-side, the data + # contains in a <script id="RENDER_DATA" type="application/json"> tag + # quoted, unquote the whole page content then search using regex with + # regular string. + page_content = unquote(page_content) + title = re.findall(r'"desc":"([^"]*)"', page_content)[0].strip() video_format = 'mp4' + # video URLs are in this pattern {"src":"THE_URL"}, in json format + urls_pattern = r'"playAddr":(\[.*?\])' + urls = json.loads(re.findall(urls_pattern, page_content)[0]) + video_url = 'https:' + urls[0]['src'] size = url_size(video_url, faker=True) print_info( site_info='douyin.com', title=title, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1527/src/you_get/extractors/instagram.py new/you-get-0.4.1536/src/you_get/extractors/instagram.py --- old/you-get-0.4.1527/src/you_get/extractors/instagram.py 2021-06-01 18:33:26.000000000 +0200 +++ new/you-get-0.4.1536/src/you_get/extractors/instagram.py 2021-07-11 18:46:41.000000000 +0200 @@ -6,14 +6,14 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwargs): url = r1(r'([^?]*)', url) - html = get_html(url, faker=True) + cont = get_content(url, headers=fake_headers) vid = r1(r'instagram.com/\w+/([^/]+)', url) - description = r1(r'<meta property="og:title" content="([^"]*)"', html) or \ - r1(r'<title>\s([^<]*)</title>', html) # with logged-in cookies + description = r1(r'<meta property="og:title" content="([^"]*)"', cont) or \ + r1(r'<title>\s([^<]*)</title>', cont) # with logged-in cookies title = "{} [{}]".format(description.replace("\n", " "), vid) - stream = r1(r'<meta property="og:video" content="([^"]*)"', html) + stream = r1(r'<meta property="og:video" content="([^"]*)"', cont) if stream: _, ext, size = url_info(stream) @@ -21,14 +21,14 @@ if not info_only: download_urls([stream], title, ext, size, output_dir, merge=merge) else: - data = re.search(r'window\._sharedData\s*=\s*(.*);</script>', html) + data = re.search(r'window\._sharedData\s*=\s*(.*);</script>', cont) try: info = json.loads(data.group(1)) post = info['entry_data']['PostPage'][0] assert post except: # with logged-in cookies - data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', html) + data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', cont) if data is not None: log.e('[Warning] Cookies needed.') post = json.loads(data.group(1)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1527/src/you_get/extractors/miaopai.py new/you-get-0.4.1536/src/you_get/extractors/miaopai.py --- old/you-get-0.4.1527/src/you_get/extractors/miaopai.py 2021-06-01 18:33:26.000000000 +0200 +++ new/you-get-0.4.1536/src/you_get/extractors/miaopai.py 2021-07-11 18:46:41.000000000 +0200 @@ -19,7 +19,7 @@ def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = False, **kwargs): '''Source: Android mobile''' - page_url = 'http://video.weibo.com/show?fid=' + fid + '&type=mp4' + page_url = 'https://video.weibo.com/show?fid=' + fid + '&type=mp4' mobile_page = get_content(page_url, headers=fake_headers_mobile) url = match1(mobile_page, r'<video id=.*?src=[\'"](.*?)[\'"]\W') @@ -78,6 +78,51 @@ download_urls([stream_url], fs.legitimize(title), ext, total_size=None, headers=fake_headers_mobile, **kwargs) +def miaopai_download_h5api(url, output_dir='.', merge=False, info_only=False, **kwargs): + oid = match1(url, r'/show/(\d{4}:\w+)') + page = "/show/%s" % oid + data_url = 'https://h5.video.weibo.com/api/component?%s' % parse.urlencode({ + 'page': page + }) + headers = {} + headers.update(fake_headers_mobile) + headers['origin'] = 'https://h5.video.weibo.com' + headers['page-referer'] = page + headers['referer'] = 'https://h5.video.weibo.com/show/%s' % oid + post_data = { + "data": json.dumps({ + "Component_Play_Playinfo": {"oid": oid} + }) + } + data_content = post_content(data_url, headers=headers, post_data=post_data) + data = json.loads(data_content) + if data['msg'] != 'succ': + raise Exception('Weibo api returns non-success: (%s)%s'.format(data['code'], data['msg'])) + + play_info = data['data']['Component_Play_Playinfo'] + title = play_info['title'] + + # get video formats and sort by size desc + video_formats = [] + for fmt, relative_uri in play_info['urls'].items(): + url = "https:%s" % relative_uri + type, ext, size = url_info(url, headers=headers) + video_formats.append({ + 'fmt': fmt, + 'url': url, + 'type': type, + 'ext': ext, + 'size': size, + }) + video_formats.sort(key=lambda v:v['size'], reverse=True) + selected_video = video_formats[0] + video_url, ext, size = selected_video['url'], selected_video['ext'], selected_video['size'] + + print_info(site_info, title, ext, size) + if not info_only: + download_urls([video_url], fs.legitimize(title), ext, total_size=size, headers=headers, **kwargs) + + def miaopai_download_direct(url, output_dir='.', merge=False, info_only=False, **kwargs): mobile_page = get_content(url, headers=fake_headers_mobile) try: @@ -108,12 +153,16 @@ if re.match(r'^http[s]://.*\.weibo\.com/tv/v/(\w+)', url): return miaopai_download_direct(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs) + if re.match(r'^http[s]://(.+\.)?weibo\.com/(tv/)?show/(\d{4}:\w+)', url): + return miaopai_download_h5api(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs) + fid = match1(url, r'\?fid=(\d{4}:\w+)') if fid is not None: miaopai_download_by_fid(fid, output_dir, merge, info_only) elif '/p/230444' in url: fid = match1(url, r'/p/230444(\w+)') miaopai_download_by_fid('1034:'+fid, output_dir, merge, info_only) + pass else: mobile_page = get_content(url, headers = fake_headers_mobile) hit = re.search(r'"page_url"\s*:\s*"([^"]+)"', mobile_page) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1527/src/you_get/extractors/missevan.py new/you-get-0.4.1536/src/you_get/extractors/missevan.py --- old/you-get-0.4.1527/src/you_get/extractors/missevan.py 2021-06-01 18:33:26.000000000 +0200 +++ new/you-get-0.4.1536/src/you_get/extractors/missevan.py 2021-07-11 18:46:41.000000000 +0200 @@ -75,17 +75,13 @@ raise _NoMatchException() missevan_stream_types = [ - {'id': 'source', 'quality': '?????????', 'url_json_key': 'soundurl', - 'resource_url_fmt': 'sound/{resource_url}'}, - {'id': '320', 'quality': '320 Kbps', 'url_json_key': 'soundurl_64'}, + {'id': 'source', 'quality': '?????????', 'url_json_key': 'soundurl'}, {'id': '128', 'quality': '128 Kbps', 'url_json_key': 'soundurl_128'}, - {'id': '32', 'quality': '32 Kbps', 'url_json_key': 'soundurl_32'}, {'id': 'covers', 'desc': '?????????', 'url_json_key': 'cover_image', 'default_src': 'covers/nocover.png', 'resource_url_fmt': 'covers/{resource_url}'}, - {'id': 'coversmini', 'desc': '???????????????', 'url_json_key': 'cover_image', - 'default_src': 'coversmini/nocover.png', - 'resource_url_fmt': 'coversmini/{resource_url}'} + {'id': 'coversmini', 'desc': '???????????????', 'url_json_key': 'front_cover', + 'default_src': 'coversmini/nocover.png'} ] def _get_resource_uri(data, stream_type): @@ -353,7 +349,7 @@ @staticmethod def url_resource(uri): - return 'https://static.missevan.com/' + uri + return uri if re.match(r'^https?:/{2}\w.+$', uri) else 'https://static.missevan.com/' + uri site = MissEvan() site_info = 'MissEvan.com' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1527/src/you_get/version.py new/you-get-0.4.1536/src/you_get/version.py --- old/you-get-0.4.1527/src/you_get/version.py 2021-06-01 18:33:26.000000000 +0200 +++ new/you-get-0.4.1536/src/you_get/version.py 2021-07-11 18:46:41.000000000 +0200 @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1527' +__version__ = '0.4.1536'