[udemy] Extract asset captions

This commit is contained in:
Sergey M․ 2018-05-08 22:57:01 +07:00
parent 0ce76801e8
commit 2fbd86352e
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 17 additions and 1 deletions

View File

@ -18,6 +18,7 @@ from ..utils import (
int_or_none, int_or_none,
js_to_json, js_to_json,
sanitized_Request, sanitized_Request,
try_get,
unescapeHTML, unescapeHTML,
urlencode_postdata, urlencode_postdata,
) )
@ -105,7 +106,7 @@ class UdemyIE(InfoExtractor):
% (course_id, lecture_id), % (course_id, lecture_id),
lecture_id, 'Downloading lecture JSON', query={ lecture_id, 'Downloading lecture JSON', query={
'fields[lecture]': 'title,description,view_html,asset', 'fields[lecture]': 'title,description,view_html,asset',
'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,data', 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data',
}) })
def _handle_error(self, response): def _handle_error(self, response):
@ -308,6 +309,21 @@ class UdemyIE(InfoExtractor):
if isinstance(urls, dict): if isinstance(urls, dict):
extract_formats(urls.get('Video')) extract_formats(urls.get('Video'))
captions = asset.get('captions')
if isinstance(captions, list):
for cc in captions:
if not isinstance(cc, dict):
continue
cc_url = cc.get('url')
if not cc_url or not isinstance(cc_url, compat_str):
continue
lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
sub_dict = (automatic_captions if cc.get('source') == 'auto'
else subtitles)
sub_dict.setdefault(lang or 'en', []).append({
'url': cc_url,
})
view_html = lecture.get('view_html') view_html = lecture.get('view_html')
if view_html: if view_html:
view_html_urls = set() view_html_urls = set()