Add basic --download-archive option
Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time. When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.
This commit is contained in:
parent
226113c880
commit
c1c9a79c49
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import errno
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
@ -84,6 +85,9 @@ class YoutubeDL(object):
|
||||||
cachedir: Location of the cache files in the filesystem.
|
cachedir: Location of the cache files in the filesystem.
|
||||||
None to disable filesystem cache.
|
None to disable filesystem cache.
|
||||||
noplaylist: Download single video instead of a playlist if in doubt.
|
noplaylist: Download single video instead of a playlist if in doubt.
|
||||||
|
downloadarchive: File name of a file where all downloads are recorded.
|
||||||
|
Videos already present in the file are not downloaded
|
||||||
|
again.
|
||||||
|
|
||||||
The following parameters are not used by YoutubeDL itself, they are used by
|
The following parameters are not used by YoutubeDL itself, they are used by
|
||||||
the FileDownloader:
|
the FileDownloader:
|
||||||
|
@ -309,6 +313,9 @@ class YoutubeDL(object):
|
||||||
dateRange = self.params.get('daterange', DateRange())
|
dateRange = self.params.get('daterange', DateRange())
|
||||||
if date not in dateRange:
|
if date not in dateRange:
|
||||||
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
|
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
|
||||||
|
if self.in_download_archive(info_dict):
|
||||||
|
return (u'%(title)s) has already been recorded in archive'
|
||||||
|
% info_dict)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def extract_info(self, url, download=True, ie_key=None, extra_info={}):
|
def extract_info(self, url, download=True, ie_key=None, extra_info={}):
|
||||||
|
@ -578,6 +585,8 @@ class YoutubeDL(object):
|
||||||
self.report_error(u'postprocessing: %s' % str(err))
|
self.report_error(u'postprocessing: %s' % str(err))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
self.record_download_archive(info_dict)
|
||||||
|
|
||||||
def download(self, url_list):
|
def download(self, url_list):
|
||||||
"""Download a given list of URLs."""
|
"""Download a given list of URLs."""
|
||||||
if len(url_list) > 1 and self.fixed_template():
|
if len(url_list) > 1 and self.fixed_template():
|
||||||
|
@ -617,3 +626,26 @@ class YoutubeDL(object):
|
||||||
os.remove(encodeFilename(filename))
|
os.remove(encodeFilename(filename))
|
||||||
except (IOError, OSError):
|
except (IOError, OSError):
|
||||||
self.report_warning(u'Unable to remove downloaded video file')
|
self.report_warning(u'Unable to remove downloaded video file')
|
||||||
|
|
||||||
|
def in_download_archive(self, info_dict):
|
||||||
|
fn = self.params.get('download_archive')
|
||||||
|
if fn is None:
|
||||||
|
return False
|
||||||
|
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
||||||
|
try:
|
||||||
|
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
||||||
|
for line in archive_file:
|
||||||
|
if line.strip() == vid_id:
|
||||||
|
return True
|
||||||
|
except IOError as ioe:
|
||||||
|
if ioe.errno != errno.ENOENT:
|
||||||
|
raise
|
||||||
|
return False
|
||||||
|
|
||||||
|
def record_download_archive(self, info_dict):
|
||||||
|
fn = self.params.get('download_archive')
|
||||||
|
if fn is None:
|
||||||
|
return
|
||||||
|
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
||||||
|
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
||||||
|
archive_file.write(vid_id + u'\n')
|
||||||
|
|
|
@ -188,6 +188,9 @@ def parseOpts(overrideArguments=None):
|
||||||
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
|
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
|
||||||
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
|
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
|
||||||
selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
|
selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
|
||||||
|
selection.add_option('--download-archive', metavar='FILE',
|
||||||
|
dest='download_archive',
|
||||||
|
help='Download only videos not present in the archive file. Record all downloaded videos in it.')
|
||||||
|
|
||||||
|
|
||||||
authentication.add_option('-u', '--username',
|
authentication.add_option('-u', '--username',
|
||||||
|
@ -631,6 +634,7 @@ def _real_main(argv=None):
|
||||||
'daterange': date,
|
'daterange': date,
|
||||||
'cachedir': opts.cachedir,
|
'cachedir': opts.cachedir,
|
||||||
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
||||||
|
'download_archive': opts.download_archive,
|
||||||
})
|
})
|
||||||
|
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
|
|
|
@ -830,3 +830,99 @@ def get_cachedir(params={}):
|
||||||
cache_root = os.environ.get('XDG_CACHE_HOME',
|
cache_root = os.environ.get('XDG_CACHE_HOME',
|
||||||
os.path.expanduser('~/.cache'))
|
os.path.expanduser('~/.cache'))
|
||||||
return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
|
return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
|
||||||
|
|
||||||
|
|
||||||
|
# Cross-platform file locking
|
||||||
|
if sys.platform == 'win32':
|
||||||
|
import ctypes.wintypes
|
||||||
|
import msvcrt
|
||||||
|
|
||||||
|
class OVERLAPPED(ctypes.Structure):
|
||||||
|
_fields_ = [
|
||||||
|
('Internal', ctypes.wintypes.LPVOID),
|
||||||
|
('InternalHigh', ctypes.wintypes.LPVOID),
|
||||||
|
('Offset', ctypes.wintypes.DWORD),
|
||||||
|
('OffsetHigh', ctypes.wintypes.DWORD),
|
||||||
|
('hEvent', ctypes.wintypes.HANDLE),
|
||||||
|
]
|
||||||
|
|
||||||
|
kernel32 = ctypes.windll.kernel32
|
||||||
|
LockFileEx = kernel32.LockFileEx
|
||||||
|
LockFileEx.argtypes = [
|
||||||
|
ctypes.wintypes.HANDLE, # hFile
|
||||||
|
ctypes.wintypes.DWORD, # dwFlags
|
||||||
|
ctypes.wintypes.DWORD, # dwReserved
|
||||||
|
ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
|
||||||
|
ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
|
||||||
|
ctypes.POINTER(OVERLAPPED) # Overlapped
|
||||||
|
]
|
||||||
|
LockFileEx.restype = ctypes.wintypes.BOOL
|
||||||
|
UnlockFileEx = kernel32.UnlockFileEx
|
||||||
|
UnlockFileEx.argtypes = [
|
||||||
|
ctypes.wintypes.HANDLE, # hFile
|
||||||
|
ctypes.wintypes.DWORD, # dwReserved
|
||||||
|
ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
|
||||||
|
ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
|
||||||
|
ctypes.POINTER(OVERLAPPED) # Overlapped
|
||||||
|
]
|
||||||
|
UnlockFileEx.restype = ctypes.wintypes.BOOL
|
||||||
|
whole_low = 0xffffffff
|
||||||
|
whole_high = 0x7fffffff
|
||||||
|
|
||||||
|
def _lock_file(f, exclusive):
|
||||||
|
overlapped = OVERLAPPED()
|
||||||
|
overlapped.Offset = 0
|
||||||
|
overlapped.OffsetHigh = 0
|
||||||
|
overlapped.hEvent = 0
|
||||||
|
f._lock_file_overlapped_p = ctypes.pointer(overlapped)
|
||||||
|
handle = msvcrt.get_osfhandle(f.fileno())
|
||||||
|
if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
|
||||||
|
whole_low, whole_high, f._lock_file_overlapped_p):
|
||||||
|
raise OSError('Locking file failed: %r' % ctypes.FormatError())
|
||||||
|
|
||||||
|
def _unlock_file(f):
|
||||||
|
assert f._lock_file_overlapped_p
|
||||||
|
handle = msvcrt.get_osfhandle(f.fileno())
|
||||||
|
if not UnlockFileEx(handle, 0,
|
||||||
|
whole_low, whole_high, f._lock_file_overlapped_p):
|
||||||
|
raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
|
||||||
|
|
||||||
|
else:
|
||||||
|
import fcntl
|
||||||
|
|
||||||
|
def _lock_file(f, exclusive):
|
||||||
|
fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
|
||||||
|
|
||||||
|
def _unlock_file(f):
|
||||||
|
fcntl.lockf(f, fcntl.LOCK_UN)
|
||||||
|
|
||||||
|
|
||||||
|
class locked_file(object):
|
||||||
|
def __init__(self, filename, mode, encoding=None):
|
||||||
|
assert mode in ['r', 'a', 'w']
|
||||||
|
self.f = io.open(filename, mode, encoding=encoding)
|
||||||
|
self.mode = mode
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
exclusive = self.mode != 'r'
|
||||||
|
try:
|
||||||
|
_lock_file(self.f, exclusive)
|
||||||
|
except IOError:
|
||||||
|
self.f.close()
|
||||||
|
raise
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, etype, value, traceback):
|
||||||
|
try:
|
||||||
|
_unlock_file(self.f)
|
||||||
|
finally:
|
||||||
|
self.f.close()
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self.f)
|
||||||
|
|
||||||
|
def write(self, *args):
|
||||||
|
return self.f.write(*args)
|
||||||
|
|
||||||
|
def read(self, *args):
|
||||||
|
return self.f.read(*args)
|
||||||
|
|
Loading…
Reference in New Issue