[extractor/common] Improve _form_hidden_inputs and rename to _hidden_inputs
This commit is contained in:
parent
9750e7d70e
commit
f8da79f828
|
@ -706,10 +706,17 @@ class InfoExtractor(object):
|
||||||
'twitter card player')
|
'twitter card player')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _form_hidden_inputs(html):
|
def _hidden_inputs(html):
|
||||||
return dict(re.findall(
|
return dict([
|
||||||
r'<input\s+type="hidden"\s+name="([^"]+)"\s+(?:id="[^"]+"\s+)?value="([^"]*)"',
|
(input.group('name'), input.group('value')) for input in re.finditer(
|
||||||
html))
|
r'''(?x)
|
||||||
|
<input\s+
|
||||||
|
type=(?P<q_hidden>["\'])hidden(?P=q_hidden)\s+
|
||||||
|
name=(?P<q_name>["\'])(?P<name>.+?)(?P=q_name)\s+
|
||||||
|
(?:id=(?P<q_id>["\']).+?(?P=q_id)\s+)?
|
||||||
|
value=(?P<q_value>["\'])(?P<value>.*?)(?P=q_value)
|
||||||
|
''', html)
|
||||||
|
])
|
||||||
|
|
||||||
def _sort_formats(self, formats, field_preference=None):
|
def _sort_formats(self, formats, field_preference=None):
|
||||||
if not formats:
|
if not formats:
|
||||||
|
|
|
@ -78,7 +78,7 @@ class GorillaVidIE(InfoExtractor):
|
||||||
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
|
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
|
||||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
fields = self._form_hidden_inputs(webpage)
|
fields = self._hidden_inputs(webpage)
|
||||||
|
|
||||||
if fields['op'] == 'download1':
|
if fields['op'] == 'download1':
|
||||||
countdown = int_or_none(self._search_regex(
|
countdown = int_or_none(self._search_regex(
|
||||||
|
|
|
@ -58,7 +58,7 @@ class HostingBulkIE(InfoExtractor):
|
||||||
r'<img src="([^"]+)".+?class="pic"',
|
r'<img src="([^"]+)".+?class="pic"',
|
||||||
webpage, 'thumbnail', fatal=False)
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
fields = self._form_hidden_inputs(webpage)
|
fields = self._hidden_inputs(webpage)
|
||||||
|
|
||||||
request = compat_urllib_request.Request(url, urlencode_postdata(fields))
|
request = compat_urllib_request.Request(url, urlencode_postdata(fields))
|
||||||
request.add_header('Content-type', 'application/x-www-form-urlencoded')
|
request.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||||
|
|
|
@ -38,7 +38,7 @@ class PlayedIE(InfoExtractor):
|
||||||
if m_error:
|
if m_error:
|
||||||
raise ExtractorError(m_error.group('msg'), expected=True)
|
raise ExtractorError(m_error.group('msg'), expected=True)
|
||||||
|
|
||||||
data = self._form_hidden_inputs(orig_webpage)
|
data = self._hidden_inputs(orig_webpage)
|
||||||
|
|
||||||
self._sleep(2, video_id)
|
self._sleep(2, video_id)
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,7 @@ class PrimeShareTVIE(InfoExtractor):
|
||||||
if '>File not exist<' in webpage:
|
if '>File not exist<' in webpage:
|
||||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
fields = self._form_hidden_inputs(webpage)
|
fields = self._hidden_inputs(webpage)
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'Referer': url,
|
'Referer': url,
|
||||||
|
|
|
@ -35,7 +35,7 @@ class PromptFileIE(InfoExtractor):
|
||||||
raise ExtractorError('Video %s does not exist' % video_id,
|
raise ExtractorError('Video %s does not exist' % video_id,
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
fields = self._form_hidden_inputs(webpage)
|
fields = self._hidden_inputs(webpage)
|
||||||
post = compat_urllib_parse.urlencode(fields)
|
post = compat_urllib_parse.urlencode(fields)
|
||||||
req = compat_urllib_request.Request(url, post)
|
req = compat_urllib_request.Request(url, post)
|
||||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||||
|
|
|
@ -34,7 +34,7 @@ class SharedIE(InfoExtractor):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Video %s does not exist' % video_id, expected=True)
|
'Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
download_form = self._form_hidden_inputs(webpage)
|
download_form = self._hidden_inputs(webpage)
|
||||||
request = compat_urllib_request.Request(
|
request = compat_urllib_request.Request(
|
||||||
url, compat_urllib_parse.urlencode(download_form))
|
url, compat_urllib_parse.urlencode(download_form))
|
||||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
|
|
@ -59,7 +59,7 @@ class TwitchBaseIE(InfoExtractor):
|
||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
self._LOGIN_URL, None, 'Downloading login page')
|
self._LOGIN_URL, None, 'Downloading login page')
|
||||||
|
|
||||||
login_form = self._form_hidden_inputs(login_page)
|
login_form = self._hidden_inputs(login_page)
|
||||||
|
|
||||||
login_form.update({
|
login_form.update({
|
||||||
'login': username.encode('utf-8'),
|
'login': username.encode('utf-8'),
|
||||||
|
|
|
@ -452,7 +452,7 @@ class VimeoChannelIE(InfoExtractor):
|
||||||
password = self._downloader.params.get('videopassword', None)
|
password = self._downloader.params.get('videopassword', None)
|
||||||
if password is None:
|
if password is None:
|
||||||
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
|
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
|
||||||
fields = self._form_hidden_inputs(login_form)
|
fields = self._hidden_inputs(login_form)
|
||||||
token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
|
token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
|
||||||
fields['token'] = token
|
fields['token'] = token
|
||||||
fields['password'] = password
|
fields['password'] = password
|
||||||
|
|
|
@ -168,7 +168,7 @@ class VKIE(InfoExtractor):
|
||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
'https://vk.com', None, 'Downloading login page')
|
'https://vk.com', None, 'Downloading login page')
|
||||||
|
|
||||||
login_form = self._form_hidden_inputs(login_page)
|
login_form = self._hidden_inputs(login_page)
|
||||||
|
|
||||||
login_form.update({
|
login_form.update({
|
||||||
'email': username.encode('cp1251'),
|
'email': username.encode('cp1251'),
|
||||||
|
|
|
@ -26,7 +26,7 @@ class VodlockerIE(InfoExtractor):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
fields = self._form_hidden_inputs(webpage)
|
fields = self._hidden_inputs(webpage)
|
||||||
|
|
||||||
if fields['op'] == 'download1':
|
if fields['op'] == 'download1':
|
||||||
self._sleep(3, video_id) # they do detect when requests happen too fast!
|
self._sleep(3, video_id) # they do detect when requests happen too fast!
|
||||||
|
|
Loading…
Reference in New Issue