[pornhub] Fix uploader extraction and extract counts
This commit is contained in:
parent
56dd55721c
commit
0320ddc192
|
@ -8,6 +8,7 @@ from ..utils import (
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
str_to_int,
|
||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import (
|
||||||
aes_decrypt_text
|
aes_decrypt_text
|
||||||
|
@ -27,6 +28,12 @@ class PornHubIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _extract_count(self, pattern, webpage, name):
|
||||||
|
count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
|
||||||
|
if count:
|
||||||
|
count = str_to_int(count)
|
||||||
|
return count
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('videoid')
|
video_id = mobj.group('videoid')
|
||||||
|
@ -37,11 +44,19 @@ class PornHubIE(InfoExtractor):
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
||||||
video_uploader = self._html_search_regex(r'<b>From: </b>(?:\s|<[^>]*>)*(.+?)<', webpage, 'uploader', fatal=False)
|
video_uploader = self._html_search_regex(
|
||||||
|
r'(?s)<div class="video-info-row">\s*From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
|
||||||
|
webpage, 'uploader', fatal=False)
|
||||||
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||||
if thumbnail:
|
if thumbnail:
|
||||||
thumbnail = compat_urllib_parse.unquote(thumbnail)
|
thumbnail = compat_urllib_parse.unquote(thumbnail)
|
||||||
|
|
||||||
|
view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
|
||||||
|
like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
|
||||||
|
dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
|
||||||
|
comment_count = self._extract_count(
|
||||||
|
r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
|
||||||
|
|
||||||
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
||||||
if webpage.find('"encrypted":true') != -1:
|
if webpage.find('"encrypted":true') != -1:
|
||||||
password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
||||||
|
@ -77,6 +92,10 @@ class PornHubIE(InfoExtractor):
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'dislike_count': dislike_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue