InfoExtractor: add some helper methods to extract OpenGraph info
This commit is contained in:
parent
d8269e1dfb
commit
46720279c2
|
@ -257,6 +257,30 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
return (username, password)
|
return (username, password)
|
||||||
|
|
||||||
|
# Helper functions for extracting OpenGraph info
|
||||||
|
@staticmethod
|
||||||
|
def _og_regex(property):
|
||||||
|
return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % property
|
||||||
|
|
||||||
|
def _og_search_property(self, property, html, name=None, **kargs):
|
||||||
|
if name is None:
|
||||||
|
name = 'OpenGraph %s' % property
|
||||||
|
return self._html_search_regex(self._og_regex(property), html, name, **kargs)
|
||||||
|
|
||||||
|
def _og_search_thumbnail(self, html, **kargs):
|
||||||
|
return self._og_search_property('image', html, 'thumbnail url', fatal=False, **kargs)
|
||||||
|
|
||||||
|
def _og_search_description(self, html, **kargs):
|
||||||
|
return self._og_search_property('description', html, fatal=False, **kargs)
|
||||||
|
|
||||||
|
def _og_search_title(self, html, **kargs):
|
||||||
|
return self._og_search_property('title', html, **kargs)
|
||||||
|
|
||||||
|
def _og_search_video_url(self, html, name='video url', **kargs):
|
||||||
|
return self._html_search_regex([self._og_regex('video:secure_url'),
|
||||||
|
self._og_regex('video')],
|
||||||
|
html, name, **kargs)
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
Base class for paged search queries extractors.
|
Base class for paged search queries extractors.
|
||||||
|
|
|
@ -34,8 +34,6 @@ class CSpanIE(InfoExtractor):
|
||||||
description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"',
|
description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"',
|
||||||
webpage, 'description',
|
webpage, 'description',
|
||||||
flags=re.MULTILINE|re.DOTALL)
|
flags=re.MULTILINE|re.DOTALL)
|
||||||
thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.*?)"',
|
|
||||||
webpage, 'thumbnail')
|
|
||||||
|
|
||||||
url = self._search_regex(r'<string name="URL">(.*?)</string>',
|
url = self._search_regex(r'<string name="URL">(.*?)</string>',
|
||||||
video_info, 'video url')
|
video_info, 'video url')
|
||||||
|
@ -49,5 +47,5 @@ class CSpanIE(InfoExtractor):
|
||||||
'url': url,
|
'url': url,
|
||||||
'play_path': path,
|
'play_path': path,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,9 +39,6 @@ class DailymotionIE(InfoExtractor):
|
||||||
# Extract URL, uploader and title from webpage
|
# Extract URL, uploader and title from webpage
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<meta property="og:title" content="(.*?)" />',
|
|
||||||
webpage, 'title')
|
|
||||||
|
|
||||||
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
|
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
|
||||||
# Looking for official user
|
# Looking for official user
|
||||||
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
|
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
|
||||||
|
@ -76,7 +73,7 @@ class DailymotionIE(InfoExtractor):
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'title': video_title,
|
'title': self._og_search_title(webpage),
|
||||||
'ext': video_extension,
|
'ext': video_extension,
|
||||||
'thumbnail': info['thumbnail_url']
|
'thumbnail': info['thumbnail_url']
|
||||||
}]
|
}]
|
||||||
|
|
|
@ -28,14 +28,9 @@ class EHowIE(InfoExtractor):
|
||||||
video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
|
video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
|
||||||
webpage, u'video URL')
|
webpage, u'video URL')
|
||||||
final_url = compat_urllib_parse.unquote(video_url)
|
final_url = compat_urllib_parse.unquote(video_url)
|
||||||
thumbnail_url = self._search_regex(r'<meta property="og:image" content="(.+?)" />',
|
|
||||||
webpage, u'thumbnail URL')
|
|
||||||
uploader = self._search_regex(r'<meta name="uploader" content="(.+?)" />',
|
uploader = self._search_regex(r'<meta name="uploader" content="(.+?)" />',
|
||||||
webpage, u'uploader')
|
webpage, u'uploader')
|
||||||
title = self._search_regex(r'<meta property="og:title" content="(.+?)" />',
|
title = self._og_search_title(webpage).replace(' | eHow', '')
|
||||||
webpage, u'Video title').replace(' | eHow', '')
|
|
||||||
description = self._search_regex(r'<meta property="og:description" content="(.+?)" />',
|
|
||||||
webpage, u'video description')
|
|
||||||
ext = determine_ext(final_url)
|
ext = determine_ext(final_url)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -44,8 +39,8 @@ class EHowIE(InfoExtractor):
|
||||||
'url': final_url,
|
'url': final_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail_url,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'description': description,
|
'description': self._og_search_description(webpage),
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -36,11 +36,7 @@ class EscapistIE(InfoExtractor):
|
||||||
videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
|
videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
|
||||||
webpage, u'description', fatal=False)
|
webpage, u'description', fatal=False)
|
||||||
|
|
||||||
imgUrl = self._html_search_regex('<meta property="og:image" content="([^"]*)"',
|
playerUrl = self._og_search_video_url(webpage, name='player url')
|
||||||
webpage, u'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
playerUrl = self._html_search_regex('<meta property="og:video" content="([^"]*)"',
|
|
||||||
webpage, u'player url')
|
|
||||||
|
|
||||||
title = self._html_search_regex('<meta name="title" content="([^"]*)"',
|
title = self._html_search_regex('<meta name="title" content="([^"]*)"',
|
||||||
webpage, u'player url').split(' : ')[-1]
|
webpage, u'player url').split(' : ')[-1]
|
||||||
|
@ -70,7 +66,7 @@ class EscapistIE(InfoExtractor):
|
||||||
'upload_date': None,
|
'upload_date': None,
|
||||||
'title': title,
|
'title': title,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'thumbnail': imgUrl,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'description': videoDesc,
|
'description': videoDesc,
|
||||||
'player_url': playerUrl,
|
'player_url': playerUrl,
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,21 +47,12 @@ class FlickrIE(InfoExtractor):
|
||||||
raise ExtractorError(u'Unable to extract video url')
|
raise ExtractorError(u'Unable to extract video url')
|
||||||
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
|
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
|
|
||||||
webpage, u'video title')
|
|
||||||
|
|
||||||
video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
|
|
||||||
webpage, u'description', fatal=False)
|
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
|
|
||||||
webpage, u'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
return [{
|
return [{
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': video_title,
|
'title': self._og_search_title(webpage),
|
||||||
'description': video_description,
|
'description': self._og_search_description(webpage),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'uploader_id': video_uploader_id,
|
'uploader_id': video_uploader_id,
|
||||||
}]
|
}]
|
||||||
|
|
|
@ -27,14 +27,11 @@ class FunnyOrDieIE(InfoExtractor):
|
||||||
title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
|
title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
|
||||||
r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
|
r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
|
||||||
|
|
||||||
video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
|
|
||||||
webpage, u'description', fatal=False, flags=re.DOTALL)
|
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': video_description,
|
'description': self._og_search_description(webpage, flags=re.DOTALL),
|
||||||
}
|
}
|
||||||
return [info]
|
return [info]
|
||||||
|
|
|
@ -33,16 +33,12 @@ class HotNewHipHopIE(InfoExtractor):
|
||||||
|
|
||||||
video_title = self._html_search_regex(r"<title>(.*)</title>",
|
video_title = self._html_search_regex(r"<title>(.*)</title>",
|
||||||
webpage_src, u'title')
|
webpage_src, u'title')
|
||||||
|
|
||||||
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
|
|
||||||
thumbnail = self._html_search_regex(r'"og:image" content="(.*)"',
|
|
||||||
webpage_src, u'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
results = [{
|
results = [{
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url' : video_url,
|
'url' : video_url,
|
||||||
'title' : video_title,
|
'title' : video_title,
|
||||||
'thumbnail' : thumbnail,
|
'thumbnail' : self._og_search_thumbnail(webpage_src),
|
||||||
'ext' : 'mp3',
|
'ext' : 'mp3',
|
||||||
}]
|
}]
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -18,12 +18,6 @@ class InstagramIE(InfoExtractor):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group(1)
|
video_id = mobj.group(1)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_url = self._html_search_regex(
|
|
||||||
r'<meta property="og:video" content="(.+?)"',
|
|
||||||
webpage, u'video URL')
|
|
||||||
thumbnail_url = self._html_search_regex(
|
|
||||||
r'<meta property="og:image" content="(.+?)" />',
|
|
||||||
webpage, u'thumbnail URL', fatal=False)
|
|
||||||
html_title = self._html_search_regex(
|
html_title = self._html_search_regex(
|
||||||
r'<title>(.+?)</title>',
|
r'<title>(.+?)</title>',
|
||||||
webpage, u'title', flags=re.DOTALL)
|
webpage, u'title', flags=re.DOTALL)
|
||||||
|
@ -34,9 +28,9 @@ class InstagramIE(InfoExtractor):
|
||||||
|
|
||||||
return [{
|
return [{
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': self._og_search_video_url(webpage),
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail_url,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'uploader_id' : uploader_id
|
'uploader_id' : uploader_id
|
||||||
}]
|
}]
|
||||||
|
|
|
@ -24,8 +24,7 @@ class KeekIE(InfoExtractor):
|
||||||
thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
|
thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
|
video_title = self._og_search_title(webpage)
|
||||||
webpage, u'title')
|
|
||||||
|
|
||||||
uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
|
uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
|
||||||
webpage, u'uploader', fatal=False)
|
webpage, u'uploader', fatal=False)
|
||||||
|
|
|
@ -33,11 +33,9 @@ class LiveLeakIE(InfoExtractor):
|
||||||
video_url = self._search_regex(r'file: "(.*?)",',
|
video_url = self._search_regex(r'file: "(.*?)",',
|
||||||
webpage, u'video URL')
|
webpage, u'video URL')
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
|
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
|
||||||
webpage, u'title').replace('LiveLeak.com -', '').strip()
|
|
||||||
|
|
||||||
video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
|
video_description = self._og_search_description(webpage)
|
||||||
webpage, u'description', fatal=False)
|
|
||||||
|
|
||||||
video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
|
video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
|
||||||
webpage, u'uploader', fatal=False)
|
webpage, u'uploader', fatal=False)
|
||||||
|
|
|
@ -30,8 +30,7 @@ class NBAIE(InfoExtractor):
|
||||||
video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
|
video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
|
||||||
|
|
||||||
shortened_video_id = video_id.rpartition('/')[2]
|
shortened_video_id = video_id.rpartition('/')[2]
|
||||||
title = self._html_search_regex(r'<meta property="og:title" content="(.*?)"',
|
title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
|
||||||
webpage, 'title', default=shortened_video_id).replace('NBA.com: ', '')
|
|
||||||
|
|
||||||
# It isn't there in the HTML it returns to us
|
# It isn't there in the HTML it returns to us
|
||||||
# uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False)
|
# uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False)
|
||||||
|
|
|
@ -18,12 +18,6 @@ class StatigramIE(InfoExtractor):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group(1)
|
video_id = mobj.group(1)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_url = self._html_search_regex(
|
|
||||||
r'<meta property="og:video:secure_url" content="(.+?)">',
|
|
||||||
webpage, u'video URL')
|
|
||||||
thumbnail_url = self._html_search_regex(
|
|
||||||
r'<meta property="og:image" content="(.+?)" />',
|
|
||||||
webpage, u'thumbnail URL', fatal=False)
|
|
||||||
html_title = self._html_search_regex(
|
html_title = self._html_search_regex(
|
||||||
r'<title>(.+?)</title>',
|
r'<title>(.+?)</title>',
|
||||||
webpage, u'title')
|
webpage, u'title')
|
||||||
|
@ -34,9 +28,9 @@ class StatigramIE(InfoExtractor):
|
||||||
|
|
||||||
return [{
|
return [{
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': self._og_search_video_url(webpage),
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail_url,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'uploader_id' : uploader_id
|
'uploader_id' : uploader_id
|
||||||
}]
|
}]
|
||||||
|
|
|
@ -30,15 +30,6 @@ class TeamcocoIE(InfoExtractor):
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
|
|
||||||
webpage, u'title')
|
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
|
|
||||||
webpage, u'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
|
|
||||||
webpage, u'description', fatal=False)
|
|
||||||
|
|
||||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||||
data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
|
data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
|
||||||
|
|
||||||
|
@ -49,7 +40,7 @@ class TeamcocoIE(InfoExtractor):
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': video_title,
|
'title': self._og_search_title(webpage),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'description': video_description,
|
'description': self._og_search_description(webpage),
|
||||||
}]
|
}]
|
||||||
|
|
|
@ -24,11 +24,8 @@ class TrailerAddictIE(InfoExtractor):
|
||||||
webpage, 'video title').replace(' - Trailer Addict','')
|
webpage, 'video title').replace(' - Trailer Addict','')
|
||||||
view_count = self._search_regex(r'Views: (.+?)<br />',
|
view_count = self._search_regex(r'Views: (.+?)<br />',
|
||||||
webpage, 'Views Count')
|
webpage, 'Views Count')
|
||||||
description = self._search_regex(r'<meta property="og:description" content="(.+?)" />',
|
video_id = self._og_search_property('video', webpage, 'Video id').split('=')[1]
|
||||||
webpage, 'video description')
|
|
||||||
video_id = self._search_regex(r'<meta property="og:video" content="(.+?)" />',
|
|
||||||
webpage, 'Video id').split('=')[1]
|
|
||||||
|
|
||||||
info_url = "http://www.traileraddict.com/fvar.php?tid=%s" %(str(video_id))
|
info_url = "http://www.traileraddict.com/fvar.php?tid=%s" %(str(video_id))
|
||||||
info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage")
|
info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage")
|
||||||
|
|
||||||
|
@ -44,6 +41,6 @@ class TrailerAddictIE(InfoExtractor):
|
||||||
'ext' : ext,
|
'ext' : ext,
|
||||||
'title' : title,
|
'title' : title,
|
||||||
'thumbnail' : thumbnail_url,
|
'thumbnail' : thumbnail_url,
|
||||||
'description' : description,
|
'description' : self._og_search_description(webpage),
|
||||||
'view_count' : view_count,
|
'view_count' : view_count,
|
||||||
}]
|
}]
|
||||||
|
|
|
@ -22,8 +22,6 @@ class TutvIE(InfoExtractor):
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
title = self._html_search_regex(
|
|
||||||
r'<meta property="og:title" content="(.*?)">', webpage, u'title')
|
|
||||||
internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID')
|
internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID')
|
||||||
|
|
||||||
data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
|
data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
|
||||||
|
@ -36,6 +34,6 @@ class TutvIE(InfoExtractor):
|
||||||
'id': internal_id,
|
'id': internal_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'title': title,
|
'title': self._og_search_title(webpage),
|
||||||
}
|
}
|
||||||
return [info]
|
return [info]
|
||||||
|
|
|
@ -27,12 +27,6 @@ class VineIE(InfoExtractor):
|
||||||
video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
|
video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
|
||||||
webpage, u'video URL')
|
webpage, u'video URL')
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
|
|
||||||
webpage, u'title')
|
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
|
|
||||||
webpage, u'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
|
uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
|
||||||
webpage, u'uploader', fatal=False, flags=re.DOTALL)
|
webpage, u'uploader', fatal=False, flags=re.DOTALL)
|
||||||
|
|
||||||
|
@ -40,7 +34,7 @@ class VineIE(InfoExtractor):
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': video_title,
|
'title': self._og_search_title(webpage),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
}]
|
}]
|
||||||
|
|
Loading…
Reference in New Issue