commit you-get for openSUSE:Factory

Source-Sync Sun, 11 Jul 2021 16:25:45 -0700

Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package you-get for openSUSE:Factory checked 
in at 2021-07-12 01:25:13
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/you-get (Old)
 and      /work/SRC/openSUSE:Factory/.you-get.new.2625 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "you-get"

Mon Jul 12 01:25:13 2021 rev:39 rq:905695 version:0.4.1536

Changes:
--------
--- /work/SRC/openSUSE:Factory/you-get/you-get.changes  2021-06-02 
22:12:31.060111934 +0200
+++ /work/SRC/openSUSE:Factory/.you-get.new.2625/you-get.changes        
2021-07-12 01:25:34.452983300 +0200
@@ -1,0 +2,5 @@
+Sun Jul 11 17:18:48 UTC 2021 - Luigi Baldoni <aloi...@gmx.com>
+
+- Update to version 0.4.1536 (no changelog)
+
+-------------------------------------------------------------------

Old:
----
  you-get-0.4.1527.tar.gz

New:
----
  you-get-0.4.1536.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ you-get.spec ++++++
--- /var/tmp/diff_new_pack.mV2ahA/_old  2021-07-12 01:25:34.920979702 +0200
+++ /var/tmp/diff_new_pack.mV2ahA/_new  2021-07-12 01:25:34.924979671 +0200
@@ -17,7 +17,7 @@
 
 
 Name:           you-get
-Version:        0.4.1527
+Version:        0.4.1536
 Release:        0
 Summary:        Dumb downloader that scrapes the web
 License:        MIT

++++++ you-get-0.4.1527.tar.gz -> you-get-0.4.1536.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1527/src/you_get/common.py 
new/you-get-0.4.1536/src/you_get/common.py
--- old/you-get-0.4.1527/src/you_get/common.py  2021-06-01 18:33:26.000000000 
+0200
+++ new/you-get-0.4.1536/src/you_get/common.py  2021-07-11 18:46:41.000000000 
+0200
@@ -433,8 +433,17 @@
 
     req = request.Request(url, headers=headers)
     if cookies:
-        cookies.add_cookie_header(req)
-        req.headers.update(req.unredirected_hdrs)
+        # NOTE: Do not use cookies.add_cookie_header(req)
+        # #HttpOnly_ cookies were not supported by CookieJar and 
MozillaCookieJar properly until python 3.10
+        # See also:
+        # - https://github.com/python/cpython/pull/17471
+        # - https://bugs.python.org/issue2190
+        # Here we add cookies to the request headers manually
+        cookie_strings = []
+        for cookie in list(cookies):
+            cookie_strings.append(cookie.name + '=' + cookie.value)
+        cookie_headers = {'Cookie': '; '.join(cookie_strings)}
+        req.headers.update(cookie_headers)
 
     response = urlopen_with_retry(req)
     data = response.read()
@@ -477,8 +486,17 @@
 
     req = request.Request(url, headers=headers)
     if cookies:
-        cookies.add_cookie_header(req)
-        req.headers.update(req.unredirected_hdrs)
+        # NOTE: Do not use cookies.add_cookie_header(req)
+        # #HttpOnly_ cookies were not supported by CookieJar and 
MozillaCookieJar properly until python 3.10
+        # See also:
+        # - https://github.com/python/cpython/pull/17471
+        # - https://bugs.python.org/issue2190
+        # Here we add cookies to the request headers manually
+        cookie_strings = []
+        for cookie in list(cookies):
+            cookie_strings.append(cookie.name + '=' + cookie.value)
+        cookie_headers = {'Cookie': '; '.join(cookie_strings)}
+        req.headers.update(cookie_headers)
     if kwargs.get('post_data_raw'):
         post_data_enc = bytes(kwargs['post_data_raw'], 'utf-8')
     else:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1527/src/you_get/extractors/douyin.py 
new/you-get-0.4.1536/src/you_get/extractors/douyin.py
--- old/you-get-0.4.1527/src/you_get/extractors/douyin.py       2021-06-01 
18:33:26.000000000 +0200
+++ new/you-get-0.4.1536/src/you_get/extractors/douyin.py       2021-07-11 
18:46:41.000000000 +0200
@@ -2,6 +2,7 @@
 
 import re
 import json
+from urllib.parse import unquote
 
 from ..common import (
     url_size,
@@ -18,17 +19,17 @@
 
 def douyin_download_by_url(url, **kwargs):
     page_content = get_content(url, headers=fake_headers)
-    match_rule = re.compile(r'var data = \[(.*?)\];')
-    video_info = json.loads(match_rule.findall(page_content)[0])
-    video_url = video_info['video']['play_addr']['url_list'][0]
-    # fix: https://www.douyin.com/share/video/6553248251821165832
-    # if there is no title, use desc
-    cha_list = video_info['cha_list']
-    if cha_list:
-        title = cha_list[0]['cha_name']
-    else:
-        title = video_info['desc']
+    # The video player and video source are rendered client-side, the data
+    # contains in a <script id="RENDER_DATA" type="application/json"> tag
+    # quoted, unquote the whole page content then search using regex with
+    # regular string.
+    page_content = unquote(page_content)
+    title = re.findall(r'"desc":"([^"]*)"', page_content)[0].strip()
     video_format = 'mp4'
+    # video URLs are in this pattern {"src":"THE_URL"}, in json format
+    urls_pattern = r'"playAddr":(\[.*?\])'
+    urls = json.loads(re.findall(urls_pattern, page_content)[0])
+    video_url = 'https:' + urls[0]['src']
     size = url_size(video_url, faker=True)
     print_info(
         site_info='douyin.com', title=title,
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1527/src/you_get/extractors/instagram.py 
new/you-get-0.4.1536/src/you_get/extractors/instagram.py
--- old/you-get-0.4.1527/src/you_get/extractors/instagram.py    2021-06-01 
18:33:26.000000000 +0200
+++ new/you-get-0.4.1536/src/you_get/extractors/instagram.py    2021-07-11 
18:46:41.000000000 +0200
@@ -6,14 +6,14 @@
 
 def instagram_download(url, output_dir='.', merge=True, info_only=False, 
**kwargs):
     url = r1(r'([^?]*)', url)
-    html = get_html(url, faker=True)
+    cont = get_content(url, headers=fake_headers)
 
     vid = r1(r'instagram.com/\w+/([^/]+)', url)
-    description = r1(r'<meta property="og:title" content="([^"]*)"', html) or \
-        r1(r'<title>\s([^<]*)</title>', html) # with logged-in cookies
+    description = r1(r'<meta property="og:title" content="([^"]*)"', cont) or \
+        r1(r'<title>\s([^<]*)</title>', cont) # with logged-in cookies
     title = "{} [{}]".format(description.replace("\n", " "), vid)
 
-    stream = r1(r'<meta property="og:video" content="([^"]*)"', html)
+    stream = r1(r'<meta property="og:video" content="([^"]*)"', cont)
     if stream:
         _, ext, size = url_info(stream)
 
@@ -21,14 +21,14 @@
         if not info_only:
             download_urls([stream], title, ext, size, output_dir, merge=merge)
     else:
-        data = re.search(r'window\._sharedData\s*=\s*(.*);</script>', html)
+        data = re.search(r'window\._sharedData\s*=\s*(.*);</script>', cont)
         try:
             info = json.loads(data.group(1))
             post = info['entry_data']['PostPage'][0]
             assert post
         except:
             # with logged-in cookies
-            data = 
re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', html)
+            data = 
re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', cont)
             if data is not None:
                 log.e('[Warning] Cookies needed.')
             post = json.loads(data.group(1))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1527/src/you_get/extractors/miaopai.py 
new/you-get-0.4.1536/src/you_get/extractors/miaopai.py
--- old/you-get-0.4.1527/src/you_get/extractors/miaopai.py      2021-06-01 
18:33:26.000000000 +0200
+++ new/you-get-0.4.1536/src/you_get/extractors/miaopai.py      2021-07-11 
18:46:41.000000000 +0200
@@ -19,7 +19,7 @@
 
 def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = 
False, **kwargs):
     '''Source: Android mobile'''
-    page_url = 'http://video.weibo.com/show?fid=' + fid + '&type=mp4'
+    page_url = 'https://video.weibo.com/show?fid=' + fid + '&type=mp4'
 
     mobile_page = get_content(page_url, headers=fake_headers_mobile)
     url = match1(mobile_page, r'<video id=.*?src=[\'"](.*?)[\'"]\W')
@@ -78,6 +78,51 @@
         download_urls([stream_url], fs.legitimize(title), ext, 
total_size=None, headers=fake_headers_mobile, **kwargs)
 
 
+def miaopai_download_h5api(url, output_dir='.', merge=False, info_only=False, 
**kwargs):
+    oid = match1(url, r'/show/(\d{4}:\w+)')
+    page = "/show/%s" % oid
+    data_url = 'https://h5.video.weibo.com/api/component?%s' % 
parse.urlencode({
+        'page': page
+    })
+    headers = {}
+    headers.update(fake_headers_mobile)
+    headers['origin'] = 'https://h5.video.weibo.com'
+    headers['page-referer'] = page
+    headers['referer'] = 'https://h5.video.weibo.com/show/%s' % oid
+    post_data = {
+        "data": json.dumps({
+            "Component_Play_Playinfo": {"oid": oid}
+        })
+    }
+    data_content = post_content(data_url, headers=headers, post_data=post_data)
+    data = json.loads(data_content)
+    if data['msg'] != 'succ':
+        raise Exception('Weibo api returns non-success: 
(%s)%s'.format(data['code'], data['msg']))
+
+    play_info = data['data']['Component_Play_Playinfo']
+    title = play_info['title']
+
+    # get video formats and sort by size desc
+    video_formats = []
+    for fmt, relative_uri in play_info['urls'].items():
+        url = "https:%s" % relative_uri
+        type, ext, size = url_info(url, headers=headers)
+        video_formats.append({
+            'fmt': fmt,
+            'url': url,
+            'type': type,
+            'ext': ext,
+            'size': size,
+        })
+    video_formats.sort(key=lambda v:v['size'], reverse=True)
+    selected_video = video_formats[0]
+    video_url, ext, size = selected_video['url'], selected_video['ext'], 
selected_video['size']
+
+    print_info(site_info, title, ext, size)
+    if not info_only:
+        download_urls([video_url], fs.legitimize(title), ext, total_size=size, 
headers=headers, **kwargs)
+
+
 def miaopai_download_direct(url, output_dir='.', merge=False, info_only=False, 
**kwargs):
     mobile_page = get_content(url, headers=fake_headers_mobile)
     try:
@@ -108,12 +153,16 @@
     if re.match(r'^http[s]://.*\.weibo\.com/tv/v/(\w+)', url):
         return miaopai_download_direct(url, info_only=info_only, 
output_dir=output_dir, merge=merge, **kwargs)
 
+    if re.match(r'^http[s]://(.+\.)?weibo\.com/(tv/)?show/(\d{4}:\w+)', url):
+        return miaopai_download_h5api(url, info_only=info_only, 
output_dir=output_dir, merge=merge, **kwargs)
+
     fid = match1(url, r'\?fid=(\d{4}:\w+)')
     if fid is not None:
         miaopai_download_by_fid(fid, output_dir, merge, info_only)
     elif '/p/230444' in url:
         fid = match1(url, r'/p/230444(\w+)')
         miaopai_download_by_fid('1034:'+fid, output_dir, merge, info_only)
+        pass
     else:
         mobile_page = get_content(url, headers = fake_headers_mobile)
         hit = re.search(r'"page_url"\s*:\s*"([^"]+)"', mobile_page)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1527/src/you_get/extractors/missevan.py 
new/you-get-0.4.1536/src/you_get/extractors/missevan.py
--- old/you-get-0.4.1527/src/you_get/extractors/missevan.py     2021-06-01 
18:33:26.000000000 +0200
+++ new/you-get-0.4.1536/src/you_get/extractors/missevan.py     2021-07-11 
18:46:41.000000000 +0200
@@ -75,17 +75,13 @@
         raise _NoMatchException()
 
 missevan_stream_types = [
-    {'id': 'source', 'quality': '?????????', 'url_json_key': 'soundurl',
-     'resource_url_fmt': 'sound/{resource_url}'},
-    {'id': '320', 'quality': '320 Kbps', 'url_json_key': 'soundurl_64'},
+    {'id': 'source', 'quality': '?????????', 'url_json_key': 'soundurl'},
     {'id': '128', 'quality': '128 Kbps', 'url_json_key': 'soundurl_128'},
-    {'id': '32', 'quality': '32 Kbps', 'url_json_key': 'soundurl_32'},
     {'id': 'covers', 'desc': '?????????', 'url_json_key': 'cover_image',
      'default_src': 'covers/nocover.png',
      'resource_url_fmt': 'covers/{resource_url}'},
-    {'id': 'coversmini', 'desc': '???????????????', 'url_json_key': 
'cover_image',
-     'default_src': 'coversmini/nocover.png',
-     'resource_url_fmt': 'coversmini/{resource_url}'}
+    {'id': 'coversmini', 'desc': '???????????????', 'url_json_key': 
'front_cover',
+     'default_src': 'coversmini/nocover.png'}
 ]
 
 def _get_resource_uri(data, stream_type):
@@ -353,7 +349,7 @@
 
     @staticmethod
     def url_resource(uri):
-        return 'https://static.missevan.com/' + uri
+        return uri if re.match(r'^https?:/{2}\w.+$', uri) else 
'https://static.missevan.com/' + uri
 
 site = MissEvan()
 site_info = 'MissEvan.com'
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/you-get-0.4.1527/src/you_get/version.py 
new/you-get-0.4.1536/src/you_get/version.py
--- old/you-get-0.4.1527/src/you_get/version.py 2021-06-01 18:33:26.000000000 
+0200
+++ new/you-get-0.4.1536/src/you_get/version.py 2021-07-11 18:46:41.000000000 
+0200
@@ -1,4 +1,4 @@
 #!/usr/bin/env python
 
 script_name = 'you-get'
-__version__ = '0.4.1527'
+__version__ = '0.4.1536'

commit you-get for openSUSE:Factory

Reply via email to