[pornhub] Implement lazy playlist extraction

This commit is contained in:
Sergey M․ 2021-02-04 04:36:57 +07:00
parent 2adc0c51cd
commit 89c5a7d5aa
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 11 additions and 10 deletions

View File

@ -547,13 +547,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
<button[^>]+\bid=["\']moreDataBtn <button[^>]+\bid=["\']moreDataBtn
''', webpage) is not None ''', webpage) is not None
def _real_extract(self, url): def _entries(self, url, host, item_id):
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host')
item_id = mobj.group('id')
self._login(host)
page = self._extract_page(url) page = self._extract_page(url)
VIDEOS = '/videos' VIDEOS = '/videos'
@ -566,7 +560,6 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
def is_404(e): def is_404(e):
return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404 return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
entries = []
base_url = url base_url = url
has_page = page is not None has_page = page is not None
first_page = page if has_page else 1 first_page = page if has_page else 1
@ -590,11 +583,19 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
page_entries = self._extract_entries(webpage, host) page_entries = self._extract_entries(webpage, host)
if not page_entries: if not page_entries:
break break
entries.extend(page_entries) for e in page_entries:
yield e
if not self._has_more(webpage): if not self._has_more(webpage):
break break
return self.playlist_result(orderedSet(entries), item_id) def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host')
item_id = mobj.group('id')
self._login(host)
return self.playlist_result(self._entries(url, host, item_id), item_id)
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):