[extractor/common] add generic support for akamai http format extraction

This commit is contained in:
Remita Amine 2020-11-22 12:54:55 +01:00
parent c4cabf040e
commit 193422e12a
1 changed files with 27 additions and 0 deletions

View File

@ -2596,6 +2596,7 @@ class InfoExtractor(object):
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}): def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
formats = [] formats = []
hdcore_sign = 'hdcore=3.7.0' hdcore_sign = 'hdcore=3.7.0'
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m') f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
hds_host = hosts.get('hds') hds_host = hosts.get('hds')
@ -2608,6 +2609,7 @@ class InfoExtractor(object):
for entry in f4m_formats: for entry in f4m_formats:
entry.update({'extra_param_to_segment_url': hdcore_sign}) entry.update({'extra_param_to_segment_url': hdcore_sign})
formats.extend(f4m_formats) formats.extend(f4m_formats)
m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8') m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
hls_host = hosts.get('hls') hls_host = hosts.get('hls')
if hls_host: if hls_host:
@ -2615,6 +2617,31 @@ class InfoExtractor(object):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False))
http_host = hosts.get('http')
if http_host and 'hdnea=' not in manifest_url:
REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+'
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
qualities_length = len(qualities)
if len(formats) in (qualities_length + 1, qualities_length * 2 + 1):
i = 0
http_formats = []
for f in formats:
if f['protocol'] == 'm3u8_native' and f['vcodec'] != 'none':
for protocol in ('http', 'https'):
http_f = f.copy()
del http_f['manifest_url']
http_url = re.sub(
REPL_REGEX, protocol + r'://%s/\1%s\3' % (http_host, qualities[i]), f['url'])
http_f.update({
'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
'url': http_url,
'protocol': protocol,
})
http_formats.append(http_f)
i += 1
formats.extend(http_formats)
return formats return formats
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]): def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):