[ard] Extract all formats
This commit is contained in:
parent
1d18e26eca
commit
e37c92ec6d
|
@ -35,6 +35,87 @@ class ARDMediathekIE(InfoExtractor):
|
||||||
'skip': 'Blocked outside of Germany',
|
'skip': 'Blocked outside of Germany',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||||
|
media_info = self._download_json(
|
||||||
|
media_info_url, video_id, 'Downloading media JSON')
|
||||||
|
|
||||||
|
formats = self._extract_formats(media_info, video_id)
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
if '"fsk"' in webpage:
|
||||||
|
raise ExtractorError(
|
||||||
|
'This video is only available after 20:00', expected=True)
|
||||||
|
elif media_info.get('_geoblocked'):
|
||||||
|
raise ExtractorError('This video is not available due to geo restriction', expected=True)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
duration = int_or_none(media_info.get('_duration'))
|
||||||
|
thumbnail = media_info.get('_previewImage')
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
subtitle_url = media_info.get('_subtitleUrl')
|
||||||
|
if subtitle_url:
|
||||||
|
subtitles['de'] = [{
|
||||||
|
'ext': 'srt',
|
||||||
|
'url': subtitle_url,
|
||||||
|
}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_formats(self, media_info, video_id):
|
||||||
|
type_ = media_info.get('_type')
|
||||||
|
media_array = media_info.get('_mediaArray', [])
|
||||||
|
formats = []
|
||||||
|
for num, media in enumerate(media_array):
|
||||||
|
for stream in media.get('_mediaStreamArray', []):
|
||||||
|
stream_urls = stream.get('_stream')
|
||||||
|
if not stream_urls:
|
||||||
|
continue
|
||||||
|
if not isinstance(stream_urls, list):
|
||||||
|
stream_urls = [stream_urls]
|
||||||
|
quality = stream.get('_quality')
|
||||||
|
server = stream.get('_server')
|
||||||
|
for stream_url in stream_urls:
|
||||||
|
ext = determine_ext(stream_url)
|
||||||
|
if ext == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
|
||||||
|
video_id, preference=-1, f4m_id='hds'))
|
||||||
|
elif ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
stream_url, video_id, 'mp4', preference=1, m3u8_id='hls'))
|
||||||
|
else:
|
||||||
|
if server and server.startswith('rtmp'):
|
||||||
|
f = {
|
||||||
|
'url': server,
|
||||||
|
'play_path': stream_url,
|
||||||
|
'format_id': 'a%s-rtmp-%s' % (num, quality),
|
||||||
|
}
|
||||||
|
elif stream_url.startswith('http'):
|
||||||
|
f = {
|
||||||
|
'url': stream_url,
|
||||||
|
'format_id': 'a%s-%s-%s' % (num, ext, quality)
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url)
|
||||||
|
if m:
|
||||||
|
f.update({
|
||||||
|
'width': int(m.group('width')),
|
||||||
|
'height': int(m.group('height')),
|
||||||
|
})
|
||||||
|
if type_ == 'audio':
|
||||||
|
f['vcodec'] = 'none'
|
||||||
|
formats.append(f)
|
||||||
|
return formats
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# determine video id from url
|
# determine video id from url
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
|
@ -92,46 +173,22 @@ class ARDMediathekIE(InfoExtractor):
|
||||||
'format_id': fid,
|
'format_id': fid,
|
||||||
'url': furl,
|
'url': furl,
|
||||||
})
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info = {
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
else: # request JSON file
|
else: # request JSON file
|
||||||
media_info = self._download_json(
|
info = self._extract_media_info(
|
||||||
'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
|
'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id)
|
||||||
# The second element of the _mediaArray contains the standard http urls
|
|
||||||
streams = media_info['_mediaArray'][1]['_mediaStreamArray']
|
|
||||||
if not streams:
|
|
||||||
if '"fsk"' in webpage:
|
|
||||||
raise ExtractorError('This video is only available after 20:00')
|
|
||||||
|
|
||||||
formats = []
|
info.update({
|
||||||
for s in streams:
|
|
||||||
if type(s['_stream']) == list:
|
|
||||||
for index, url in enumerate(s['_stream'][::-1]):
|
|
||||||
quality = s['_quality'] + index
|
|
||||||
formats.append({
|
|
||||||
'quality': quality,
|
|
||||||
'url': url,
|
|
||||||
'format_id': '%s-%s' % (determine_ext(url), quality)
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
|
|
||||||
format = {
|
|
||||||
'quality': s['_quality'],
|
|
||||||
'url': s['_stream'],
|
|
||||||
}
|
|
||||||
|
|
||||||
format['format_id'] = '%s-%s' % (
|
|
||||||
determine_ext(format['url']), format['quality'])
|
|
||||||
|
|
||||||
formats.append(format)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
})
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
class ARDIE(InfoExtractor):
|
class ARDIE(InfoExtractor):
|
||||||
|
|
Loading…
Reference in New Issue