[youtube] Add support for search result URLs (Fixes #2495)
This commit is contained in:
parent
86fb4347f7
commit
c9ae7b9565
|
@ -71,6 +71,10 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||||
def test_youtube_truncated(self):
|
def test_youtube_truncated(self):
|
||||||
self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
|
self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
|
||||||
|
|
||||||
|
def test_youtube_search_matching(self):
|
||||||
|
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||||
|
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||||
|
|
||||||
def test_justin_tv_channelid_matching(self):
|
def test_justin_tv_channelid_matching(self):
|
||||||
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
|
||||||
|
|
|
@ -16,6 +16,7 @@ from youtube_dl.extractor import (
|
||||||
YoutubeChannelIE,
|
YoutubeChannelIE,
|
||||||
YoutubeShowIE,
|
YoutubeShowIE,
|
||||||
YoutubeTopListIE,
|
YoutubeTopListIE,
|
||||||
|
YoutubeSearchURLIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -133,5 +134,14 @@ class TestYoutubeLists(unittest.TestCase):
|
||||||
entries = result['entries']
|
entries = result['entries']
|
||||||
self.assertTrue(len(entries) >= 5)
|
self.assertTrue(len(entries) >= 5)
|
||||||
|
|
||||||
|
def test_youtube_search_url(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = YoutubeSearchURLIE(dl)
|
||||||
|
result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
|
||||||
|
entries = result['entries']
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['title'], 'youtube-dl test video')
|
||||||
|
self.assertTrue(len(entries) >= 5)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -675,7 +675,7 @@ class YoutubeDL(object):
|
||||||
info_dict['playlist'] = None
|
info_dict['playlist'] = None
|
||||||
info_dict['playlist_index'] = None
|
info_dict['playlist_index'] = None
|
||||||
|
|
||||||
if 'display_id' not in info_dict:
|
if 'display_id' not in info_dict and 'id' in info_dict:
|
||||||
info_dict['display_id'] = info_dict['id']
|
info_dict['display_id'] = info_dict['id']
|
||||||
|
|
||||||
# This extractors handle format selection themselves
|
# This extractors handle format selection themselves
|
||||||
|
|
|
@ -285,19 +285,20 @@ from .youku import YoukuIE
|
||||||
from .youporn import YouPornIE
|
from .youporn import YouPornIE
|
||||||
from .youtube import (
|
from .youtube import (
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
YoutubePlaylistIE,
|
|
||||||
YoutubeSearchIE,
|
|
||||||
YoutubeSearchDateIE,
|
|
||||||
YoutubeUserIE,
|
|
||||||
YoutubeChannelIE,
|
YoutubeChannelIE,
|
||||||
YoutubeShowIE,
|
|
||||||
YoutubeSubscriptionsIE,
|
|
||||||
YoutubeRecommendedIE,
|
|
||||||
YoutubeTruncatedURLIE,
|
|
||||||
YoutubeWatchLaterIE,
|
|
||||||
YoutubeFavouritesIE,
|
YoutubeFavouritesIE,
|
||||||
YoutubeHistoryIE,
|
YoutubeHistoryIE,
|
||||||
|
YoutubePlaylistIE,
|
||||||
|
YoutubeRecommendedIE,
|
||||||
|
YoutubeSearchDateIE,
|
||||||
|
YoutubeSearchIE,
|
||||||
|
YoutubeSearchURLIE,
|
||||||
|
YoutubeShowIE,
|
||||||
|
YoutubeSubscriptionsIE,
|
||||||
YoutubeTopListIE,
|
YoutubeTopListIE,
|
||||||
|
YoutubeTruncatedURLIE,
|
||||||
|
YoutubeUserIE,
|
||||||
|
YoutubeWatchLaterIE,
|
||||||
)
|
)
|
||||||
from .zdf import ZDFIE
|
from .zdf import ZDFIE
|
||||||
|
|
||||||
|
|
|
@ -1645,7 +1645,7 @@ class YoutubeChannelIE(InfoExtractor):
|
||||||
|
|
||||||
class YoutubeUserIE(InfoExtractor):
|
class YoutubeUserIE(InfoExtractor):
|
||||||
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
|
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
|
||||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
|
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
|
||||||
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
|
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
|
||||||
_GDATA_PAGE_SIZE = 50
|
_GDATA_PAGE_SIZE = 50
|
||||||
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
||||||
|
@ -1744,12 +1744,50 @@ class YoutubeSearchIE(SearchInfoExtractor):
|
||||||
for video_id in video_ids]
|
for video_id in video_ids]
|
||||||
return self.playlist_result(videos, query)
|
return self.playlist_result(videos, query)
|
||||||
|
|
||||||
|
|
||||||
class YoutubeSearchDateIE(YoutubeSearchIE):
|
class YoutubeSearchDateIE(YoutubeSearchIE):
|
||||||
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
|
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
|
||||||
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
|
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
|
||||||
_SEARCH_KEY = 'ytsearchdate'
|
_SEARCH_KEY = 'ytsearchdate'
|
||||||
IE_DESC = u'YouTube.com searches, newest videos first'
|
IE_DESC = u'YouTube.com searches, newest videos first'
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeSearchURLIE(InfoExtractor):
|
||||||
|
IE_DESC = u'YouTube.com search URLs'
|
||||||
|
IE_NAME = u'youtube:search_url'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
query = compat_urllib_parse.unquote_plus(mobj.group('query'))
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, query)
|
||||||
|
result_code = self._search_regex(
|
||||||
|
r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML')
|
||||||
|
|
||||||
|
part_codes = re.findall(
|
||||||
|
r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
|
||||||
|
entries = []
|
||||||
|
for part_code in part_codes:
|
||||||
|
part_title = self._html_search_regex(
|
||||||
|
r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False)
|
||||||
|
part_url_snippet = self._html_search_regex(
|
||||||
|
r'(?s)href="([^"]+)"', part_code, 'item URL')
|
||||||
|
part_url = compat_urlparse.urljoin(
|
||||||
|
'https://www.youtube.com/', part_url_snippet)
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': part_url,
|
||||||
|
'title': part_title,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'title': query,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class YoutubeShowIE(InfoExtractor):
|
class YoutubeShowIE(InfoExtractor):
|
||||||
IE_DESC = u'YouTube.com (multi-season) shows'
|
IE_DESC = u'YouTube.com (multi-season) shows'
|
||||||
_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
|
_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
|
||||||
|
|
Loading…
Reference in New Issue