[sproutvideo] Fix extractor

2019-08-05 22:49:13 +02:00 · 2019-08-05 22:49:13 +02:00 · 36071a8d06
parent 662087e491
commit 36071a8d06
5 changed files with 34 additions and 34 deletions
--- a/youtube_dl/downloader/init.py
+++ b/youtube_dl/downloader/init.py
@ -43,9 +43,6 @@ def get_suitable_downloader(info_dict, params={}):
        if ed.can_download(info_dict):
            return ed

-    if info_dict.get('force_hlsdl') is True:
-        return HlsFD
-
    if protocol.startswith('m3u8') and info_dict.get('is_live'):
        return FFmpegFD

--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@ -121,8 +121,6 @@ class FragmentFD(FileDownloader):
            del ctx['fragment_filename_sanitized']

    def _prepare_frag_download(self, ctx):
-        if 'hls' not in ctx:
-            ctx['hls'] = False
        if 'live' not in ctx:
            ctx['live'] = False
        if not ctx['live']:
@ -145,7 +143,6 @@ class FragmentFD(FileDownloader):
                'retries': self.params.get('retries', 0),
                'nopart': self.params.get('nopart', False),
                'test': self.params.get('test', False),
-                'hls': ctx['hls'],
            }
        )
        tmpfilename = self.temp_name(ctx['filename'])
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@ -105,7 +105,6 @@ class HlsFD(FragmentFD):
            'filename': filename,
            'total_frags': media_frags,
            'ad_frags': ad_frags,
-            'hls': '#EXT-X-KEY:METHOD=AES-128' in s,
        }

        self._prepare_and_start_frag_download(ctx)
@ -176,8 +175,10 @@ class HlsFD(FragmentFD):
                        iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
                        decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
                            self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
-                        frag_content = AES.new(
-                            decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
+                        # We don't decrypt fragments during the test
+                        if not test:
+                            frag_content = AES.new(
+                                decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
                    self._append_fragment(ctx, frag_content)
                    # We only download the first fragment during the test
                    if test:
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@ -45,8 +45,7 @@ class HttpFD(FileDownloader):
            headers.update(add_headers)

        is_test = self.params.get('test', False)
-        is_hls = self.params.get('hls', False)
-        chunk_size = self._TEST_FILE_SIZE if is_test and not is_hls else (
+        chunk_size = self._TEST_FILE_SIZE if is_test else (
            info_dict.get('downloader_options', {}).get('http_chunk_size')
            or self.params.get('http_chunk_size') or 0)

@ -195,8 +194,7 @@ class HttpFD(FileDownloader):
            # However, for a test we still would like to download just a piece of a file.
            # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
            # block size when downloading a file.
-            # If we are using HLS we cannot cut the fragment because it will break the decryption.
-            if is_test and not is_hls and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
+            if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
                data_len = self._TEST_FILE_SIZE

            if data_len is not None:
--- a/youtube_dl/extractor/sproutvideo.py
+++ b/youtube_dl/extractor/sproutvideo.py
@ -12,10 +12,11 @@ from ..compat import (


 class SproutVideoIE(InfoExtractor):
-    _VALID_URL = r'(?:https?:|)//videos.sproutvideo.com/embed/(?P<id>[a-f0-9]+)/[a-f0-9]+\??.*'
+    _NOSCHEMA_URL = r'//videos.sproutvideo.com/embed/(?P<id>[a-f0-9]+)/[a-f0-9]+'
+    _VALID_URL = r'https?:%s' % _NOSCHEMA_URL
    _TEST = {
        'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3',
-        'md5': '1343ce1a6cb39d67889bfa07c7b02b0e',
+        'md5': 'fbc675bb97437e797d11d14d99563f50',
        'info_dict': {
            'id': '4c9dddb01910e3c9c4',
            'ext': 'mp4',
@ -26,9 +27,14 @@ class SproutVideoIE(InfoExtractor):
    @staticmethod
    def _extract_url(webpage):
        sproutvideo = re.search(
-            r'(?:<iframe\s+class=[\'\"]sproutvideo-player.*src|href)=[\'\"](?P<url>%s)[\'\"]' % SproutVideoIE._VALID_URL, webpage)
+            r'(?:<iframe\s+class=[\'\"]sproutvideo-player.*src|href)=[\'\"](?P<url>(?:https?:|)%s[^\'\"]+)[\'\"]' % SproutVideoIE._NOSCHEMA_URL, webpage)
        if sproutvideo:
-            return sproutvideo.group('url')
+            video_url = sproutvideo.group('url')
+            # Fix the video URL if the iframe doesn't have a defined schema
+            if video_url[:2] == '//':
+                video_url = 'https:' + video_url
+            return video_url
+

    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -42,30 +48,31 @@ class SproutVideoIE(InfoExtractor):
        # signature->m for manifests
        # signature->k for keys
        # signature->t for segments
-        m_sig = self._policy_to_qs(parsed_data, 'm')
-        k_sig = self._policy_to_qs(parsed_data, 'k')
-        t_sig = self._policy_to_qs(parsed_data, 't')
+        m_sign = self._policy_to_qs(parsed_data, 'm')
+        k_sign = self._policy_to_qs(parsed_data, 'k')
+        t_sign = self._policy_to_qs(parsed_data, 't')

-        url = "https://{0}.videos.sproutvideo.com/{1}/{2}/video/index.m3u8?{3}"
-        url = url.format(parsed_data['base'],
-                         parsed_data['s3_user_hash'],
-                         parsed_data['s3_video_hash'],
-                         m_sig)
+        resource_url = 'https://{0}.videos.sproutvideo.com/{1}/{2}/video/index.m3u8?{3}'
+        resource_url = resource_url.format(parsed_data['base'],
+                                           parsed_data['s3_user_hash'],
+                                           parsed_data['s3_video_hash'],
+                                           m_sign)

-        formats = self._extract_m3u8_formats(url, video_id, 'mp4', 'm3u8_native',
+        formats = self._extract_m3u8_formats(resource_url, video_id, 'mp4', entry_protocol='m3u8_native',
                                             m3u8_id='hls', fatal=False)
        self._sort_formats(formats)

-        for i in range(len(formats)):
-            formats[i]['url'] = "{}?{}".format(formats[i]['url'], m_sig)
+        for entry in formats:
+            entry.update({
+                'url': '{0}?{1}'.format(entry['url'], m_sign),
+                'extra_param_to_segment_url': t_sign,
+                'extra_param_to_key_url': k_sign,
+            })

        return {
            'id': video_id,
            'title': parsed_data['title'],
            'formats': formats,
-            'force_hlsdl': True,  # currently FFmpeg is not supported
-            'extra_param_to_segment_url': t_sig,
-            'extra_param_to_key_url': k_sig
        }

    def _format_qsdata(self, qs_data):
@ -75,6 +82,6 @@ class SproutVideoIE(InfoExtractor):
        return parsed_dict

    def _policy_to_qs(self, policy, key):
-        sig = self._format_qsdata(policy['signatures'][key])
-        sig['sessionID'] = policy['sessionID']
-        return compat_urllib_parse_urlencode(sig, doseq=True)
+        sign = self._format_qsdata(policy['signatures'][key])
+        sign['sessionID'] = policy['sessionID']
+        return compat_urllib_parse_urlencode(sign, doseq=True)