FileDownloader: Simplify and document

This commit is contained in:
Philipp Hagemeister 2013-05-05 20:49:42 +02:00
parent 891629c84a
commit b338f1b154
1 changed files with 51 additions and 52 deletions

View File

@ -435,47 +435,40 @@ class FileDownloader(object):
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
return None return None
def extract_info(self, url, download = True, ie_name = None): def extract_info(self, url, download=True, ie_key=None):
''' '''
Returns a list with a dictionary for each video we find. Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos. If 'download', also downloads the videos.
''' '''
suitable_found = False
#We copy the original list if ie_key:
ies = list(self._ies) ie = get_info_extractor(ie_key)()
ie.set_downloader(self)
if ie_name is not None: ies = [ie]
#We put in the first place the given info extractor else:
first_ie = get_info_extractor(ie_name)() ies = self._ies
first_ie.set_downloader(self)
ies.insert(0, first_ie)
for ie in ies: for ie in ies:
# Go to next InfoExtractor if not suitable
if not ie.suitable(url): if not ie.suitable(url):
continue continue
# Warn if the _WORKING attribute is False
if not ie.working(): if not ie.working():
self.report_warning(u'the program functionality for this site has been marked as broken, ' self.report_warning(u'The program functionality for this site has been marked as broken, '
u'and will probably not work. If you want to go on, use the -i option.') u'and will probably not work.')
# Suitable InfoExtractor found
suitable_found = True
# Extract information from URL and process it
try: try:
ie_results = ie.extract(url) ie_result = ie.extract(url)
if ie_results is None: # Finished already (backwards compatibility; listformats and friends should be moved here) if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
break break
results = [] if isinstance(ie_result, list):
for ie_result in ie_results: # Backwards compatibility: old IE result format
if not 'extractor' in ie_result: ie_result = {
#The extractor has already been set somewhere else '_type': 'compat_list',
ie_result['extractor'] = ie.IE_NAME 'entries': ie_result,
results.append(self.process_ie_result(ie_result, download)) }
return results if 'extractor' not in ie_result:
ie_result['extractor'] = ie.IE_NAME
return self.process_ie_result(ie_result, download=download)
except ExtractorError as de: # An error we somewhat expected except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback()) self.report_error(compat_str(de), de.format_traceback())
break break
@ -485,33 +478,31 @@ class FileDownloader(object):
break break
else: else:
raise raise
if not suitable_found: else:
self.report_error(u'no suitable InfoExtractor: %s' % url) self.report_error(u'no suitable InfoExtractor: %s' % url)
def process_ie_result(self, ie_result, download = True): def process_ie_result(self, ie_result, download=True):
""" """
Take the result of the ie and return a list of videos. Take the result of the ie(may be modified) and resolve all unresolved
For url elements it will search the suitable ie and get the videos references (URLs, playlist items).
For playlist elements it will process each of the elements of the 'entries' key
It will also download the videos if 'download'. It will also download the videos if 'download'.
Returns the resolved ie_result.
""" """
result_type = ie_result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
if result_type == 'video': if result_type == 'video':
if 'playlist' not in ie_result: if 'playlist' not in ie_result:
#It isn't part of a playlist # It isn't part of a playlist
ie_result['playlist'] = None ie_result['playlist'] = None
ie_result['playlist_index'] = None ie_result['playlist_index'] = None
if download: if download:
#Do the download:
self.process_info(ie_result) self.process_info(ie_result)
return ie_result return ie_result
elif result_type == 'url': elif result_type == 'url':
#We get the video pointed by the url return self.extract_info(ie_result['url'], download, ie_key=ie_result.get('ie_key'))
result = self.extract_info(ie_result['url'], download, ie_name = ie_result['ie_key'])[0]
return result
elif result_type == 'playlist': elif result_type == 'playlist':
#We process each entry in the playlist # We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None) playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen(u'[download] Downloading playlist: %s' % playlist) self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@ -533,23 +524,31 @@ class FileDownloader(object):
for i,entry in enumerate(entries,1): for i,entry in enumerate(entries,1):
self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries)) self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
entry_result = self.process_ie_result(entry, False) entry['playlist'] = playlist
entry_result['playlist'] = playlist entry['playlist_index'] = i + playliststart
entry_result['playlist_index'] = i + playliststart entry_result = self.process_ie_result(entry, download=download)
#We must do the download here to correctly set the 'playlist' key
if download:
self.process_info(entry_result)
playlist_results.append(entry_result) playlist_results.append(entry_result)
result = ie_result.copy() ie_result['entries'] = playlist_results
result['entries'] = playlist_results return ie_result
return result elif result_type == 'compat_list':
def _fixup(r):
r.setdefault('extractor', ie_result['extractor'])
return r
ie_result['entries'] = [
self.process_ie_result(_fixup(r), download=download)
for r in ie_result['entries']
]
return ie_result
else:
raise Exception('Invalid result type: %s' % result_type)
def process_info(self, info_dict): def process_info(self, info_dict):
"""Process a single dictionary returned by an InfoExtractor.""" """Process a single resolved IE result."""
assert info_dict.get('_type', 'video') == 'video'
#We increment the download the download count here to match the previous behaviour. #We increment the download the download count here to match the previous behaviour.
self.increment_downloads() self.increment_downloads()
info_dict['fulltitle'] = info_dict['title'] info_dict['fulltitle'] = info_dict['title']
if len(info_dict['title']) > 200: if len(info_dict['title']) > 200:
info_dict['title'] = info_dict['title'][:197] + u'...' info_dict['title'] = info_dict['title'][:197] + u'...'