[ign] fix extraction(closes #24771)

This commit is contained in:
Remita Amine 2021-02-08 15:56:42 +01:00
parent 311ebdd9a5
commit 7f8b8bc418
2 changed files with 200 additions and 175 deletions

View File

@ -470,8 +470,8 @@ from .hungama import (
from .hypem import HypemIE from .hypem import HypemIE
from .ign import ( from .ign import (
IGNIE, IGNIE,
OneUPIE, IGNVideoIE,
PCMagIE, IGNArticleIE,
) )
from .iheart import ( from .iheart import (
IHeartRadioIE, IHeartRadioIE,

View File

@ -3,230 +3,255 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
HEADRequest,
determine_ext,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
strip_or_none,
try_get,
) )
class IGNIE(InfoExtractor): class IGNBaseIE(InfoExtractor):
def _call_api(self, slug):
return self._download_json(
'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
class IGNIE(IGNBaseIE):
""" """
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com. Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
Some videos of it.ign.com are also supported Some videos of it.ign.com are also supported
""" """
_VALID_URL = r'https?://.+?\.ign\.com/(?:[^/]+/)?(?P<type>videos|show_videos|articles|feature|(?:[^/]+/\d+/video))(/.+)?/(?P<name_or_id>.+)' _VALID_URL = r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[^/?&#]+)'
IE_NAME = 'ign.com' IE_NAME = 'ign.com'
_PAGE_TYPE = 'video'
_API_URL_TEMPLATE = 'http://apis.ign.com/video/v3/videos/%s' _TESTS = [{
_EMBED_RE = r'<iframe[^>]+?["\']((?:https?:)?//.+?\.ign\.com.+?/embed.+?)["\']' 'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
'md5': 'd2e1586d9987d40fad7867bf96a018ea',
_TESTS = [ 'info_dict': {
{ 'id': '8f862beef863986b2785559b9e1aa599',
'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review', 'ext': 'mp4',
'md5': 'febda82c4bafecd2d44b6e1a18a595f8', 'title': 'The Last of Us Review',
'info_dict': { 'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
'id': '8f862beef863986b2785559b9e1aa599', 'timestamp': 1370440800,
'ext': 'mp4', 'upload_date': '20130605',
'title': 'The Last of Us Review', 'tags': 'count:9',
'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c', }
'timestamp': 1370440800, }, {
'upload_date': '20130605', 'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
'uploader_id': 'cberidon@ign.com', 'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
} 'info_dict': {
}, 'id': 'ee10d774b508c9b8ec07e763b9125b91',
{ 'ext': 'mp4',
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', 'title': 'What\'s New Now: Is GoGo Snooping on Your Data?',
'info_dict': { 'description': 'md5:817a20299de610bd56f13175386da6fa',
'id': '100-little-things-in-gta-5-that-will-blow-your-mind', 'timestamp': 1420571160,
}, 'upload_date': '20150106',
'playlist': [ 'tags': 'count:4',
{ }
'info_dict': { }, {
'id': '5ebbd138523268b93c9141af17bec937', 'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
'ext': 'mp4', 'only_matching': True,
'title': 'GTA 5 Video Review', }]
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
'timestamp': 1379339880,
'upload_date': '20130916',
'uploader_id': 'danieljkrupa@gmail.com',
},
},
{
'info_dict': {
'id': '638672ee848ae4ff108df2a296418ee2',
'ext': 'mp4',
'title': '26 Twisted Moments from GTA 5 in Slow Motion',
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
'timestamp': 1386878820,
'upload_date': '20131212',
'uploader_id': 'togilvie@ign.com',
},
},
],
'params': {
'skip_download': True,
},
},
{
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
'md5': '618fedb9c901fd086f6f093564ef8558',
'info_dict': {
'id': '078fdd005f6d3c02f63d795faa1b984f',
'ext': 'mp4',
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
'description': 'Brian and Jared explore Michel Ancel\'s captivating new preview.',
'timestamp': 1408047180,
'upload_date': '20140814',
'uploader_id': 'jamesduggan1990@gmail.com',
},
},
{
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
'only_matching': True,
},
{
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
'only_matching': True,
},
{
# videoId pattern
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
'only_matching': True,
},
]
def _find_video_id(self, webpage):
res_id = [
r'"video_id"\s*:\s*"(.*?)"',
r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
r'data-video-id="(.+?)"',
r'<object id="vid_(.+?)"',
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
r'videoId&quot;\s*:\s*&quot;(.+?)&quot;',
r'videoId["\']\s*:\s*["\']([^"\']+?)["\']',
]
return self._search_regex(res_id, webpage, 'video id', default=None)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) display_id = self._match_id(url)
name_or_id = mobj.group('name_or_id') video = self._call_api(display_id)
page_type = mobj.group('type') video_id = video['videoId']
webpage = self._download_webpage(url, name_or_id) metadata = video['metadata']
if page_type != 'video': title = metadata.get('longTitle') or metadata.get('title') or metadata['name']
multiple_urls = re.findall(
r'<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
webpage)
if multiple_urls:
entries = [self.url_result(u, ie='IGN') for u in multiple_urls]
return {
'_type': 'playlist',
'id': name_or_id,
'entries': entries,
}
video_id = self._find_video_id(webpage)
if not video_id:
return self.url_result(self._search_regex(
self._EMBED_RE, webpage, 'embed url'))
return self._get_video_info(video_id)
def _get_video_info(self, video_id):
api_data = self._download_json(
self._API_URL_TEMPLATE % video_id, video_id)
formats = [] formats = []
m3u8_url = api_data['refs'].get('m3uUrl') refs = video.get('refs') or {}
m3u8_url = refs.get('m3uUrl')
if m3u8_url: if m3u8_url:
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False))
f4m_url = api_data['refs'].get('f4mUrl')
f4m_url = refs.get('f4mUrl')
if f4m_url: if f4m_url:
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
f4m_url, video_id, f4m_id='hds', fatal=False)) f4m_url, video_id, f4m_id='hds', fatal=False))
for asset in api_data['assets']:
for asset in (video.get('assets') or []):
asset_url = asset.get('url')
if not asset_url:
continue
formats.append({ formats.append({
'url': asset['url'], 'url': asset_url,
'tbr': asset.get('actual_bitrate_kbps'), 'tbr': int_or_none(asset.get('bitrate'), 1000),
'fps': asset.get('frame_rate'), 'fps': int_or_none(asset.get('frame_rate')),
'height': int_or_none(asset.get('height')), 'height': int_or_none(asset.get('height')),
'width': int_or_none(asset.get('width')), 'width': int_or_none(asset.get('width')),
}) })
mezzanine_url = try_get(video, lambda x: x['system']['mezzanineUrl'])
if mezzanine_url:
formats.append({
'ext': determine_ext(mezzanine_url, 'mp4'),
'format_id': 'mezzanine',
'preference': 1,
'url': mezzanine_url,
})
self._sort_formats(formats) self._sort_formats(formats)
thumbnails = [{ thumbnails = []
'url': thumbnail['url'] for thumbnail in (video.get('thumbnails') or []):
} for thumbnail in api_data.get('thumbnails', [])] thumbnail_url = thumbnail.get('url')
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
})
metadata = api_data['metadata'] tags = []
for tag in (video.get('tags') or []):
display_name = tag.get('displayName')
if not display_name:
continue
tags.append(display_name)
return { return {
'id': api_data.get('videoId') or video_id, 'id': video_id,
'title': metadata.get('longTitle') or metadata.get('name') or metadata.get['title'], 'title': title,
'description': metadata.get('description'), 'description': strip_or_none(metadata.get('description')),
'timestamp': parse_iso8601(metadata.get('publishDate')), 'timestamp': parse_iso8601(metadata.get('publishDate')),
'duration': int_or_none(metadata.get('duration')), 'duration': int_or_none(metadata.get('duration')),
'display_id': metadata.get('slug') or video_id, 'display_id': display_id,
'uploader_id': metadata.get('creator'),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'formats': formats, 'formats': formats,
'tags': tags,
} }
class OneUPIE(IGNIE): class IGNVideoIE(InfoExtractor):
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html' _VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
IE_NAME = '1up.com'
_TESTS = [{ _TESTS = [{
'url': 'http://gamevideos.1up.com/video/id/34976.html', 'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
'md5': 'c9cc69e07acb675c31a16719f909e347', 'md5': 'dd9aca7ed2657c4e118d8b261e5e9de1',
'info_dict': { 'info_dict': {
'id': '34976', 'id': 'e9be7ea899a9bbfc0674accc22a36cc8',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Sniper Elite V2 - Trailer', 'title': 'How Hitman Aims to Be Different Than Every Other Stealth Game - NYCC 2015',
'description': 'md5:bf0516c5ee32a3217aa703e9b1bc7826', 'description': 'Taking out assassination targets in Hitman has never been more stylish.',
'timestamp': 1313099220, 'timestamp': 1444665600,
'upload_date': '20110811', 'upload_date': '20151012',
'uploader_id': 'IGN',
} }
}, {
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
'only_matching': True,
}, {
# Youtube embed
'url': 'https://me.ign.com/ar/ratchet-clank-rift-apart/144327/trailer/embed',
'only_matching': True,
}, {
# Twitter embed
'url': 'http://adria.ign.com/sherlock-season-4/9687/trailer/embed',
'only_matching': True,
}, {
# Vimeo embed
'url': 'https://kr.ign.com/bic-2018/3307/trailer/embed',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
result = super(OneUPIE, self)._real_extract(url) req = HEADRequest(url.rsplit('/', 1)[0] + '/embed')
result['id'] = mobj.group('name_or_id') url = self._request_webpage(req, video_id).geturl()
return result ign_url = compat_parse_qs(
compat_urllib_parse_urlparse(url).query).get('url', [None])[0]
if ign_url:
return self.url_result(ign_url, IGNIE.ie_key())
return self.url_result(url)
class PCMagIE(IGNIE): class IGNArticleIE(IGNBaseIE):
_VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)' _VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?feature/\d+)/(?P<id>[^/?&#]+)'
IE_NAME = 'pcmag' _PAGE_TYPE = 'article'
_EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]'
_TESTS = [{ _TESTS = [{
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data', 'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
'md5': '212d6154fd0361a2781075f1febbe9ad',
'info_dict': { 'info_dict': {
'id': 'ee10d774b508c9b8ec07e763b9125b91', 'id': '524497489e4e8ff5848ece34',
'ext': 'mp4', 'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
'title': '010615_What\'s New Now: Is GoGo Snooping on Your Data?', },
'description': 'md5:a7071ae64d2f68cc821c729d4ded6bb3', 'playlist': [
'timestamp': 1420571160, {
'upload_date': '20150106', 'info_dict': {
'uploader_id': 'cozzipix@gmail.com', 'id': '5ebbd138523268b93c9141af17bec937',
} 'ext': 'mp4',
'title': 'GTA 5 Video Review',
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
'timestamp': 1379339880,
'upload_date': '20130916',
},
},
{
'info_dict': {
'id': '638672ee848ae4ff108df2a296418ee2',
'ext': 'mp4',
'title': '26 Twisted Moments from GTA 5 in Slow Motion',
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
'timestamp': 1386878820,
'upload_date': '20131212',
},
},
],
'params': {
'playlist_items': '2-3',
'skip_download': True,
},
}, { }, {
'url': 'http://www.pcmag.com/article2/0,2817,2470156,00.asp', 'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
'md5': '94130c1ca07ba0adb6088350681f16c1',
'info_dict': { 'info_dict': {
'id': '042e560ba94823d43afcb12ddf7142ca', 'id': '53ee806780a81ec46e0790f8',
'ext': 'mp4', 'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
'title': 'HTC\'s Weird New Re Camera - What\'s New Now', },
'description': 'md5:53433c45df96d2ea5d0fda18be2ca908', 'playlist_count': 2,
'timestamp': 1412953920, }, {
'upload_date': '20141010', # videoId pattern
'uploader_id': 'chris_snyder@pcmag.com', 'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
} 'only_matching': True,
}, {
# Youtube embed
'url': 'https://www.ign.com/articles/2021-mvp-named-in-puppy-bowl-xvii',
'only_matching': True,
}, {
# IMDB embed
'url': 'https://www.ign.com/articles/2014/08/07/sons-of-anarchy-final-season-trailer',
'only_matching': True,
}, {
# Facebook embed
'url': 'https://www.ign.com/articles/2017/09/20/marvels-the-punisher-watch-the-new-trailer-for-the-netflix-series',
'only_matching': True,
}, {
# Brightcove embed
'url': 'https://www.ign.com/articles/2016/01/16/supergirl-goes-flying-with-martian-manhunter-in-new-clip',
'only_matching': True,
}] }]
def _real_extract(self, url):
display_id = self._match_id(url)
article = self._call_api(display_id)
def entries():
media_url = try_get(article, lambda x: x['mediaRelations'][0]['media']['metadata']['url'])
if media_url:
yield self.url_result(media_url, IGNIE.ie_key())
for content in (article.get('content') or []):
for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
yield self.url_result(video_url)
return self.playlist_result(
entries(), article.get('articleId'),
strip_or_none(try_get(article, lambda x: x['metadata']['headline'])))