[extractor/common] improve Akamai HTTP format extraction

- Allow m3u8 manifest without an additional audio format
- Fix extraction for qualities starting with a number
Solution provided by @nixxo based on: https://stackoverflow.com/a/5984688
This commit is contained in:
Remita Amine 2020-12-02 21:37:14 +01:00
parent 64554c12e1
commit 664dd8ba85
1 changed files with 2 additions and 2 deletions

View File

@ -2623,7 +2623,7 @@ class InfoExtractor(object):
REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+' REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+'
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',') qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
qualities_length = len(qualities) qualities_length = len(qualities)
if len(formats) in (qualities_length + 1, qualities_length * 2 + 1): if len(formats) in (qualities_length, qualities_length + 1, qualities_length * 2, qualities_length * 2 + 1):
i = 0 i = 0
http_formats = [] http_formats = []
for f in formats: for f in formats:
@ -2632,7 +2632,7 @@ class InfoExtractor(object):
http_f = f.copy() http_f = f.copy()
del http_f['manifest_url'] del http_f['manifest_url']
http_url = re.sub( http_url = re.sub(
REPL_REGEX, protocol + r'://%s/\1%s\3' % (http_host, qualities[i]), f['url']) REPL_REGEX, protocol + r'://%s/\g<1>%s\3' % (http_host, qualities[i]), f['url'])
http_f.update({ http_f.update({
'format_id': http_f['format_id'].replace('hls-', protocol + '-'), 'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
'url': http_url, 'url': http_url,