[youtube] Improve xsrf token extraction (closes #27442)
This commit is contained in:
parent
3729c52f9d
commit
942b8ca3be
|
@ -300,6 +300,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
|
self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
|
||||||
video_id)
|
video_id)
|
||||||
|
|
||||||
|
def _extract_ytcfg(self, video_id, webpage):
|
||||||
|
return self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
||||||
|
default='{}'), video_id, fatal=False)
|
||||||
|
|
||||||
|
|
||||||
class YoutubeIE(YoutubeBaseInfoExtractor):
|
class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
IE_DESC = 'YouTube.com'
|
IE_DESC = 'YouTube.com'
|
||||||
|
@ -2283,16 +2289,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
# annotations
|
# annotations
|
||||||
video_annotations = None
|
video_annotations = None
|
||||||
if self._downloader.params.get('writeannotations', False):
|
if self._downloader.params.get('writeannotations', False):
|
||||||
xsrf_token = self._search_regex(
|
xsrf_token = None
|
||||||
r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
|
ytcfg = self._extract_ytcfg(video_id, video_webpage)
|
||||||
video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
|
if ytcfg:
|
||||||
|
xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
|
||||||
|
if not xsrf_token:
|
||||||
|
xsrf_token = self._search_regex(
|
||||||
|
r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
|
||||||
|
video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
|
||||||
invideo_url = try_get(
|
invideo_url = try_get(
|
||||||
player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
|
player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
|
||||||
if xsrf_token and invideo_url:
|
if xsrf_token and invideo_url:
|
||||||
xsrf_field_name = self._search_regex(
|
xsrf_field_name = None
|
||||||
r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
|
if ytcfg:
|
||||||
video_webpage, 'xsrf field name',
|
xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
|
||||||
group='xsrf_field_name', default='session_token')
|
if not xsrf_field_name:
|
||||||
|
xsrf_field_name = self._search_regex(
|
||||||
|
r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
|
||||||
|
video_webpage, 'xsrf field name',
|
||||||
|
group='xsrf_field_name', default='session_token')
|
||||||
video_annotations = self._download_webpage(
|
video_annotations = self._download_webpage(
|
||||||
self._proto_relative_url(invideo_url),
|
self._proto_relative_url(invideo_url),
|
||||||
video_id, note='Downloading annotations',
|
video_id, note='Downloading annotations',
|
||||||
|
@ -3130,10 +3145,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
playlist_title=title)
|
playlist_title=title)
|
||||||
|
|
||||||
def _extract_identity_token(self, webpage, item_id):
|
def _extract_identity_token(self, webpage, item_id):
|
||||||
ytcfg = self._parse_json(
|
ytcfg = self._extract_ytcfg(item_id, webpage)
|
||||||
self._search_regex(
|
|
||||||
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
|
||||||
default='{}'), item_id, fatal=False)
|
|
||||||
if ytcfg:
|
if ytcfg:
|
||||||
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
|
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
|
||||||
if token:
|
if token:
|
||||||
|
|
Loading…
Reference in New Issue