Hello community, here is the log from the commit of package you-get for openSUSE:Factory checked in at 2018-09-13 12:12:14 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/you-get (Old) and /work/SRC/openSUSE:Factory/.you-get.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "you-get" Thu Sep 13 12:12:14 2018 rev:11 rq:635271 version:0.4.1148 Changes: -------- --- /work/SRC/openSUSE:Factory/you-get/you-get.changes 2018-09-11 17:18:10.695329035 +0200 +++ /work/SRC/openSUSE:Factory/.you-get.new/you-get.changes 2018-09-13 12:14:00.846124633 +0200 @@ -1,0 +2,5 @@ +Wed Sep 12 07:22:55 UTC 2018 - [email protected] + +- Update to version 0.4.1148 (no changelog supplied) + +------------------------------------------------------------------- Old: ---- you-get-0.4.1140.tar.gz New: ---- you-get-0.4.1148.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ you-get.spec ++++++ --- /var/tmp/diff_new_pack.WfSn4Q/_old 2018-09-13 12:14:01.218124212 +0200 +++ /var/tmp/diff_new_pack.WfSn4Q/_new 2018-09-13 12:14:01.222124207 +0200 @@ -17,7 +17,7 @@ Name: you-get -Version: 0.4.1140 +Version: 0.4.1148 Release: 0 Summary: Dumb downloader that scrapes the web License: MIT ++++++ you-get-0.4.1140.tar.gz -> you-get-0.4.1148.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1140/src/you_get/common.py new/you-get-0.4.1148/src/you_get/common.py --- old/you-get-0.4.1140/src/you_get/common.py 2018-09-08 00:48:10.000000000 +0200 +++ new/you-get-0.4.1148/src/you_get/common.py 2018-09-11 23:51:43.000000000 +0200 @@ -439,7 +439,7 @@ return data -def post_content(url, headers={}, post_data={}, decoded=True): +def post_content(url, headers={}, post_data={}, decoded=True, **kwargs): """Post the content of a URL via sending a HTTP POST request. Args: @@ -450,14 +450,19 @@ Returns: The content as a string. """ - - logging.debug('post_content: %s \n post_data: %s' % (url, post_data)) + if kwargs.get('post_data_raw'): + logging.debug('post_content: %s\npost_data_raw: %s' % (url, kwargs['post_data_raw'])) + else: + logging.debug('post_content: %s\npost_data: %s' % (url, post_data)) req = request.Request(url, headers=headers) if cookies: cookies.add_cookie_header(req) req.headers.update(req.unredirected_hdrs) - post_data_enc = bytes(parse.urlencode(post_data), 'utf-8') + if kwargs.get('post_data_raw'): + post_data_enc = bytes(kwargs['post_data_raw'], 'utf-8') + else: + post_data_enc = bytes(parse.urlencode(post_data), 'utf-8') response = urlopen_with_retry(req, data=post_data_enc) data = response.read() @@ -602,7 +607,12 @@ # the key must be 'Referer' for the hack here if refer is not None: tmp_headers['Referer'] = refer - file_size = url_size(url, faker=faker, headers=tmp_headers) + if type(url) is list: + file_size = urls_size(url, faker=faker, headers=tmp_headers) + is_chunked, urls = True, url + else: + file_size = url_size(url, faker=faker, headers=tmp_headers) + is_chunked, urls = False, [url] continue_renameing = True while continue_renameing: @@ -655,70 +665,78 @@ else: open_mode = 'wb' - if received < file_size: - if faker: - tmp_headers = fake_headers - ''' - if parameter headers passed in, we have it copied as tmp_header - elif headers: - headers = headers - else: - headers = {} - ''' - if received: - tmp_headers['Range'] = 'bytes=' + str(received) + '-' - if refer: - tmp_headers['Referer'] = refer - - if timeout: - response = urlopen_with_retry( - request.Request(url, headers=tmp_headers), timeout=timeout - ) - else: - response = urlopen_with_retry( - request.Request(url, headers=tmp_headers) - ) - try: - range_start = int( - response.headers[ - 'content-range' - ][6:].split('/')[0].split('-')[0] - ) - end_length = int( - response.headers['content-range'][6:].split('/')[1] - ) - range_length = end_length - range_start - except: - content_length = response.headers['content-length'] - range_length = int(content_length) if content_length is not None \ - else float('inf') - - if file_size != received + range_length: - received = 0 - if bar: - bar.received = 0 - open_mode = 'wb' - - with open(temp_filepath, open_mode) as output: - while True: - buffer = None - try: - buffer = response.read(1024 * 256) - except socket.timeout: - pass - if not buffer: - if received == file_size: # Download finished - break - # Unexpected termination. Retry request - tmp_headers['Range'] = 'bytes=' + str(received) + '-' - response = urlopen_with_retry( - request.Request(url, headers=tmp_headers) - ) - continue - output.write(buffer) - received += len(buffer) + for url in urls: + received_chunk = 0 + if received < file_size: + if faker: + tmp_headers = fake_headers + ''' + if parameter headers passed in, we have it copied as tmp_header + elif headers: + headers = headers + else: + headers = {} + ''' + if received and not is_chunked: # only request a range when not chunked + tmp_headers['Range'] = 'bytes=' + str(received) + '-' + if refer: + tmp_headers['Referer'] = refer + + if timeout: + response = urlopen_with_retry( + request.Request(url, headers=tmp_headers), timeout=timeout + ) + else: + response = urlopen_with_retry( + request.Request(url, headers=tmp_headers) + ) + try: + range_start = int( + response.headers[ + 'content-range' + ][6:].split('/')[0].split('-')[0] + ) + end_length = int( + response.headers['content-range'][6:].split('/')[1] + ) + range_length = end_length - range_start + except: + content_length = response.headers['content-length'] + range_length = int(content_length) if content_length is not None \ + else float('inf') + + if is_chunked: # always append if chunked + open_mode = 'ab' + elif file_size != received + range_length: # is it ever necessary? + received = 0 if bar: - bar.update_received(len(buffer)) + bar.received = 0 + open_mode = 'wb' + + with open(temp_filepath, open_mode) as output: + while True: + buffer = None + try: + buffer = response.read(1024 * 256) + except socket.timeout: + pass + if not buffer: + if is_chunked and received_chunk == range_length: + break + elif not is_chunked and received == file_size: # Download finished + break + # Unexpected termination. Retry request + if not is_chunked: # when + tmp_headers['Range'] = 'bytes=' + str(received) + '-' + response = urlopen_with_retry( + request.Request(url, headers=tmp_headers) + ) + continue + output.write(buffer) + received += len(buffer) + received_chunk += len(buffer) + if bar: + bar.update_received(len(buffer)) assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % ( received, os.path.getsize(temp_filepath), temp_filepath diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1140/src/you_get/extractors/bilibili.py new/you-get-0.4.1148/src/you_get/extractors/bilibili.py --- old/you-get-0.4.1140/src/you_get/extractors/bilibili.py 2018-09-08 00:48:10.000000000 +0200 +++ new/you-get-0.4.1148/src/you_get/extractors/bilibili.py 2018-09-11 23:51:43.000000000 +0200 @@ -115,7 +115,7 @@ self.url = 'http://www.bilibili.com/video/av{}/'.format(aid) self.ua = fake_headers['User-Agent'] - self.url = url_locations([self.url])[0] + self.url = url_locations([self.url], faker=True)[0] frag = urllib.parse.urlparse(self.url).fragment # http://www.bilibili.com/video/av3141144/index_2.html#page=3 if frag: @@ -125,7 +125,7 @@ aid = re.search(r'av(\d+)', self.url).group(1) self.url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format(aid, page) self.referer = self.url - self.page = get_content(self.url) + self.page = get_content(self.url, headers=fake_headers) m = re.search(r'<h1.*?>(.*?)</h1>', self.page) or re.search(r'<h1 title="([^"]+)">', self.page) if m is not None: @@ -381,7 +381,7 @@ def bilibili_download_playlist_by_url(url, **kwargs): - url = url_locations([url])[0] + url = url_locations([url], faker=True)[0] kwargs['playlist'] = True # a bangumi here? possible? if 'live.bilibili' in url: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1140/src/you_get/extractors/tumblr.py new/you-get-0.4.1148/src/you_get/extractors/tumblr.py --- old/you-get-0.4.1140/src/you_get/extractors/tumblr.py 2018-09-08 00:48:10.000000000 +0200 +++ new/you-get-0.4.1148/src/you_get/extractors/tumblr.py 2018-09-11 23:51:43.000000000 +0200 @@ -13,7 +13,29 @@ universal_download(url, output_dir, merge=merge, info_only=info_only) return - html = parse.unquote(get_html(url)).replace('\/', '/') + import ssl + ssl_context = request.HTTPSHandler(context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) + cookie_handler = request.HTTPCookieProcessor() + opener = request.build_opener(ssl_context, cookie_handler) + request.install_opener(opener) + + page = get_html(url) + form_key = match1(page, r'id="tumblr_form_key" content="([^"]+)"') + if form_key is not None: + # bypass GDPR consent page + referer = 'https://www.tumblr.com/privacy/consent?redirect=%s' % parse.quote_plus(url) + post_content('https://www.tumblr.com/svc/privacy/consent', + headers={ + 'Content-Type': 'application/json', + 'User-Agent': fake_headers['User-Agent'], + 'Referer': referer, + 'X-tumblr-form-key': form_key, + 'X-Requested-With': 'XMLHttpRequest' + }, + post_data_raw='{"eu_resident":true,"gdpr_is_acceptable_age":true,"gdpr_consent_core":true,"gdpr_consent_first_party_ads":true,"gdpr_consent_third_party_ads":true,"gdpr_consent_search_history":true,"redirect_to":"%s","gdpr_reconsent":false}' % url) + page = get_html(url) + + html = parse.unquote(page).replace('\/', '/') feed = r1(r'<meta property="og:type" content="tumblr-feed:(\w+)" />', html) if feed in ['photo', 'photoset', 'entry'] or feed is None: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1140/src/you_get/extractors/universal.py new/you-get-0.4.1148/src/you_get/extractors/universal.py --- old/you-get-0.4.1140/src/you_get/extractors/universal.py 2018-09-08 00:48:10.000000000 +0200 +++ new/you-get-0.4.1148/src/you_get/extractors/universal.py 2018-09-11 23:51:43.000000000 +0200 @@ -106,6 +106,9 @@ title = '%s' % i i += 1 + if r1(r'(https://pinterest.com/pin/)', url): + continue + candies.append({'url': url, 'title': title}) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1140/src/you_get/extractors/youtube.py new/you-get-0.4.1148/src/you_get/extractors/youtube.py --- old/you-get-0.4.1140/src/you_get/extractors/youtube.py 2018-09-08 00:48:10.000000000 +0200 +++ new/you-get-0.4.1148/src/you_get/extractors/youtube.py 2018-09-11 23:51:43.000000000 +0200 @@ -81,6 +81,16 @@ exec(code, globals(), locals()) return locals()['sig'] + def chunk_by_range(url, size): + urls = [] + chunk_size = 10485760 + start, end = 0, chunk_size - 1 + urls.append('%s&range=%s-%s' % (url, start, end)) + while end + 1 < size: # processed size < expected size + start, end = end + 1, end + chunk_size + urls.append('%s&range=%s-%s' % (url, start, end)) + return urls + def get_url_from_vid(vid): return 'https://youtu.be/{}'.format(vid) @@ -290,13 +300,15 @@ if not dash_size: try: dash_size = url_size(dash_url) except: continue + dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size)) + dash_mp4_a_urls = self.__class__.chunk_by_range(dash_mp4_a_url, int(dash_mp4_a_size)) self.dash_streams[itag] = { 'quality': '%sx%s' % (w, h), 'itag': itag, 'type': mimeType, 'mime': mimeType, 'container': 'mp4', - 'src': [dash_url, dash_mp4_a_url], + 'src': [dash_urls, dash_mp4_a_urls], 'size': int(dash_size) + int(dash_mp4_a_size) } elif mimeType == 'video/webm': @@ -310,13 +322,15 @@ if not dash_size: try: dash_size = url_size(dash_url) except: continue + dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size)) + dash_webm_a_urls = self.__class__.chunk_by_range(dash_webm_a_url, int(dash_webm_a_size)) self.dash_streams[itag] = { 'quality': '%sx%s' % (w, h), 'itag': itag, 'type': mimeType, 'mime': mimeType, 'container': 'webm', - 'src': [dash_url, dash_webm_a_url], + 'src': [dash_urls, dash_webm_a_urls], 'size': int(dash_size) + int(dash_webm_a_size) } except: @@ -353,13 +367,15 @@ dash_url += '&signature={}'.format(sig) dash_size = stream['clen'] itag = stream['itag'] + dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size)) + dash_mp4_a_urls = self.__class__.chunk_by_range(dash_mp4_a_url, int(dash_mp4_a_size)) self.dash_streams[itag] = { 'quality': stream['size'], 'itag': itag, 'type': mimeType, 'mime': mimeType, 'container': 'mp4', - 'src': [dash_url, dash_mp4_a_url], + 'src': [dash_urls, dash_mp4_a_urls], 'size': int(dash_size) + int(dash_mp4_a_size) } elif stream['type'].startswith('video/webm'): @@ -378,13 +394,15 @@ except UnboundLocalError as e: audio_url = dash_mp4_a_url audio_size = int(dash_mp4_a_size) + dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size)) + audio_urls = self.__class__.chunk_by_range(audio_url, int(audio_size)) self.dash_streams[itag] = { 'quality': stream['size'], 'itag': itag, 'type': mimeType, 'mime': mimeType, 'container': 'webm', - 'src': [dash_url, audio_url], + 'src': [dash_urls, audio_urls], 'size': int(dash_size) + int(audio_size) } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/you-get-0.4.1140/src/you_get/version.py new/you-get-0.4.1148/src/you_get/version.py --- old/you-get-0.4.1140/src/you_get/version.py 2018-09-08 00:48:10.000000000 +0200 +++ new/you-get-0.4.1148/src/you_get/version.py 2018-09-11 23:51:43.000000000 +0200 @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1140' +__version__ = '0.4.1148'
