[youtube:search] fix view_count and try to extract all video sections(closes #27588)(closes #27604)

This commit is contained in:
Remita Amine 2021-01-11 17:35:13 +01:00
parent d0fc289f45
commit 4759543f6e
1 changed files with 50 additions and 67 deletions

View File

@ -308,6 +308,36 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
default='{}'), video_id, fatal=False) default='{}'), video_id, fatal=False)
def _extract_video(self, renderer):
video_id = renderer['videoId']
title = try_get(
renderer,
(lambda x: x['title']['runs'][0]['text'],
lambda x: x['title']['simpleText']), compat_str)
description = try_get(
renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
compat_str)
duration = parse_duration(try_get(
renderer, lambda x: x['lengthText']['simpleText'], compat_str))
view_count_text = try_get(
renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
view_count = str_to_int(self._search_regex(
r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
'view count', default=None))
uploader = try_get(
renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
return {
'_type': 'url_transparent',
'ie_key': YoutubeIE.ie_key(),
'id': video_id,
'url': video_id,
'title': title,
'description': description,
'duration': duration,
'view_count': view_count,
'uploader': uploader,
}
class YoutubeIE(YoutubeBaseInfoExtractor): class YoutubeIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com' IE_DESC = 'YouTube.com'
@ -2765,36 +2795,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
if renderer: if renderer:
return renderer return renderer
def _extract_video(self, renderer):
video_id = renderer.get('videoId')
title = try_get(
renderer,
(lambda x: x['title']['runs'][0]['text'],
lambda x: x['title']['simpleText']), compat_str)
description = try_get(
renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
compat_str)
duration = parse_duration(try_get(
renderer, lambda x: x['lengthText']['simpleText'], compat_str))
view_count_text = try_get(
renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
view_count = str_to_int(self._search_regex(
r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
'view count', default=None))
uploader = try_get(
renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
return {
'_type': 'url_transparent',
'ie_key': YoutubeIE.ie_key(),
'id': video_id,
'url': video_id,
'title': title,
'description': description,
'duration': duration,
'view_count': view_count,
'uploader': uploader,
}
def _grid_entries(self, grid_renderer): def _grid_entries(self, grid_renderer):
for item in grid_renderer['items']: for item in grid_renderer['items']:
if not isinstance(item, dict): if not isinstance(item, dict):
@ -3417,46 +3417,29 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
list) list)
if not slr_contents: if not slr_contents:
break break
isr_contents = try_get( for slr_content in slr_contents:
slr_contents, isr_contents = try_get(
lambda x: x[0]['itemSectionRenderer']['contents'], slr_content,
list) lambda x: x['itemSectionRenderer']['contents'],
if not isr_contents: list)
break if not isr_contents:
for content in isr_contents:
if not isinstance(content, dict):
continue continue
video = content.get('videoRenderer') for content in isr_contents:
if not isinstance(video, dict): if not isinstance(content, dict):
continue continue
video_id = video.get('videoId') video = content.get('videoRenderer')
if not video_id: if not isinstance(video, dict):
continue continue
title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str) video_id = video.get('videoId')
description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str) if not video_id:
duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str)) continue
view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or '' yield self._extract_video(video)
view_count = int_or_none(self._search_regex( total += 1
r'^(\d+)', re.sub(r'\s', '', view_count_text), if total == n:
'view count', default=None)) return
uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
total += 1
yield {
'_type': 'url_transparent',
'ie_key': YoutubeIE.ie_key(),
'id': video_id,
'url': video_id,
'title': title,
'description': description,
'duration': duration,
'view_count': view_count,
'uploader': uploader,
}
if total == n:
return
token = try_get( token = try_get(
slr_contents, slr_contents,
lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'], lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
compat_str) compat_str)
if not token: if not token:
break break