[youtube:tab] Extract channels only from channels tab (closes #27266)

This commit is contained in:
Sergey M․ 2020-12-01 04:44:10 +07:00
parent 3e4e338133
commit 132aece1ed
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 13 additions and 4 deletions

View File

@ -2796,12 +2796,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
# TODO # TODO
pass pass
def _shelf_entries(self, shelf_renderer): def _shelf_entries(self, shelf_renderer, skip_channels=False):
ep = try_get( ep = try_get(
shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
compat_str) compat_str)
shelf_url = urljoin('https://www.youtube.com', ep) shelf_url = urljoin('https://www.youtube.com', ep)
if shelf_url: if shelf_url:
# Skipping links to another channels, note that checking for
# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
# will not work
if skip_channels and '/channels?' in shelf_url:
return
title = try_get( title = try_get(
shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str) shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
yield self.url_result(shelf_url, video_title=title) yield self.url_result(shelf_url, video_title=title)
@ -2912,9 +2917,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
} }
def _entries(self, tab, identity_token): def _entries(self, tab, identity_token):
slr_renderer = try_get(tab, lambda x: x['sectionListRenderer'], dict) tab_content = try_get(tab, lambda x: x['content'], dict)
if not tab_content:
return
slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
if not slr_renderer: if not slr_renderer:
return return
is_channels_tab = tab.get('title') == 'Channels'
continuation = None continuation = None
slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or [] slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or []
for slr_content in slr_contents: for slr_content in slr_contents:
@ -2941,7 +2950,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
continue continue
renderer = isr_content.get('shelfRenderer') renderer = isr_content.get('shelfRenderer')
if renderer: if renderer:
for entry in self._shelf_entries(renderer): for entry in self._shelf_entries(renderer, not is_channels_tab):
yield entry yield entry
continue continue
renderer = isr_content.get('backstagePostThreadRenderer') renderer = isr_content.get('backstagePostThreadRenderer')
@ -3071,7 +3080,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
description = None description = None
playlist_id = item_id playlist_id = item_id
playlist = self.playlist_result( playlist = self.playlist_result(
self._entries(selected_tab['content'], identity_token), self._entries(selected_tab, identity_token),
playlist_id=playlist_id, playlist_title=title, playlist_id=playlist_id, playlist_title=title,
playlist_description=description) playlist_description=description)
playlist.update(self._extract_uploader(data)) playlist.update(self._extract_uploader(data))