2017-08-12 16:24:46 +00:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
2017-10-14 20:38:34 +00:00
|
|
|
import re
|
|
|
|
|
2017-08-12 16:24:46 +00:00
|
|
|
from .common import InfoExtractor
|
|
|
|
from ..utils import (
|
|
|
|
ExtractorError,
|
|
|
|
int_or_none,
|
|
|
|
float_or_none,
|
2020-12-17 16:21:36 +00:00
|
|
|
try_get,
|
2019-04-26 09:26:51 +00:00
|
|
|
url_or_none,
|
2020-12-22 12:12:14 +00:00
|
|
|
unescapeHTML,
|
2017-08-12 16:24:46 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
class RedditIE(InfoExtractor):
|
|
|
|
_VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)'
|
|
|
|
_TEST = {
|
|
|
|
# from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
|
|
|
|
'url': 'https://v.redd.it/zv89llsvexdz',
|
2018-02-20 21:12:56 +00:00
|
|
|
'md5': '0a070c53eba7ec4534d95a5a1259e253',
|
2017-08-12 16:24:46 +00:00
|
|
|
'info_dict': {
|
|
|
|
'id': 'zv89llsvexdz',
|
|
|
|
'ext': 'mp4',
|
|
|
|
'title': 'zv89llsvexdz',
|
|
|
|
},
|
|
|
|
'params': {
|
|
|
|
'format': 'bestvideo',
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
video_id = self._match_id(url)
|
|
|
|
|
|
|
|
formats = self._extract_m3u8_formats(
|
|
|
|
'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id,
|
|
|
|
'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
|
|
|
|
|
|
|
formats.extend(self._extract_mpd_formats(
|
|
|
|
'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
|
|
|
|
mpd_id='dash', fatal=False))
|
|
|
|
|
2017-10-06 18:40:00 +00:00
|
|
|
self._sort_formats(formats)
|
|
|
|
|
2017-08-12 16:24:46 +00:00
|
|
|
return {
|
|
|
|
'id': video_id,
|
|
|
|
'title': video_id,
|
|
|
|
'formats': formats,
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
class RedditRIE(InfoExtractor):
|
2018-05-10 16:17:23 +00:00
|
|
|
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
|
2017-08-12 16:24:46 +00:00
|
|
|
_TESTS = [{
|
|
|
|
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
|
|
|
|
'info_dict': {
|
|
|
|
'id': 'zv89llsvexdz',
|
|
|
|
'ext': 'mp4',
|
|
|
|
'title': 'That small heart attack.',
|
|
|
|
'thumbnail': r're:^https?://.*\.jpg$',
|
|
|
|
'timestamp': 1501941939,
|
|
|
|
'upload_date': '20170805',
|
|
|
|
'uploader': 'Antw87',
|
2020-12-17 16:21:36 +00:00
|
|
|
'duration': 12,
|
2017-08-12 16:24:46 +00:00
|
|
|
'like_count': int,
|
|
|
|
'dislike_count': int,
|
|
|
|
'comment_count': int,
|
|
|
|
'age_limit': 0,
|
|
|
|
},
|
|
|
|
'params': {
|
|
|
|
'format': 'bestvideo',
|
|
|
|
'skip_download': True,
|
|
|
|
},
|
|
|
|
}, {
|
|
|
|
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
|
|
|
|
'only_matching': True,
|
|
|
|
}, {
|
|
|
|
# imgur
|
|
|
|
'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
|
|
|
|
'only_matching': True,
|
2018-04-29 14:59:40 +00:00
|
|
|
}, {
|
|
|
|
# imgur @ old reddit
|
|
|
|
'url': 'https://old.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
|
|
|
|
'only_matching': True,
|
2017-08-12 16:24:46 +00:00
|
|
|
}, {
|
|
|
|
# streamable
|
|
|
|
'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/',
|
|
|
|
'only_matching': True,
|
|
|
|
}, {
|
|
|
|
# youtube
|
|
|
|
'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
|
|
|
|
'only_matching': True,
|
2018-05-10 16:17:23 +00:00
|
|
|
}, {
|
|
|
|
# reddit video @ nm reddit
|
|
|
|
'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/',
|
|
|
|
'only_matching': True,
|
2017-08-12 16:24:46 +00:00
|
|
|
}]
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
2017-10-14 20:38:34 +00:00
|
|
|
mobj = re.match(self._VALID_URL, url)
|
|
|
|
url, video_id = mobj.group('url', 'id')
|
|
|
|
|
2017-08-12 16:24:46 +00:00
|
|
|
video_id = self._match_id(url)
|
|
|
|
|
|
|
|
data = self._download_json(
|
2017-10-14 20:38:34 +00:00
|
|
|
url + '/.json', video_id)[0]['data']['children'][0]['data']
|
2017-08-12 16:24:46 +00:00
|
|
|
|
|
|
|
video_url = data['url']
|
|
|
|
|
|
|
|
# Avoid recursing into the same reddit URL
|
|
|
|
if 'reddit.com/' in video_url and '/%s/' % video_id in video_url:
|
|
|
|
raise ExtractorError('No media found', expected=True)
|
|
|
|
|
|
|
|
over_18 = data.get('over_18')
|
|
|
|
if over_18 is True:
|
|
|
|
age_limit = 18
|
|
|
|
elif over_18 is False:
|
|
|
|
age_limit = 0
|
|
|
|
else:
|
|
|
|
age_limit = None
|
|
|
|
|
2020-12-22 12:12:14 +00:00
|
|
|
thumbnails = []
|
|
|
|
images = try_get(
|
|
|
|
data, lambda x: x['preview']['images'][0]['resolutions']) or []
|
|
|
|
for image in images:
|
|
|
|
url = url_or_none(unescapeHTML(image['url']))
|
|
|
|
if url is not None:
|
|
|
|
thumbnails.append({
|
|
|
|
'url': url,
|
|
|
|
'width': int_or_none(image['width']),
|
|
|
|
'height': int_or_none(image['height']),
|
|
|
|
})
|
|
|
|
|
2017-08-12 16:24:46 +00:00
|
|
|
return {
|
|
|
|
'_type': 'url_transparent',
|
|
|
|
'url': video_url,
|
|
|
|
'title': data.get('title'),
|
2020-12-22 12:12:14 +00:00
|
|
|
'thumbnails': thumbnails,
|
2017-08-12 16:24:46 +00:00
|
|
|
'timestamp': float_or_none(data.get('created_utc')),
|
|
|
|
'uploader': data.get('author'),
|
2020-12-17 16:21:36 +00:00
|
|
|
'duration': int_or_none(try_get(
|
|
|
|
data,
|
|
|
|
(lambda x: x['media']['reddit_video']['duration'],
|
|
|
|
lambda x: x['secure_media']['reddit_video']['duration']))),
|
2017-08-12 16:24:46 +00:00
|
|
|
'like_count': int_or_none(data.get('ups')),
|
|
|
|
'dislike_count': int_or_none(data.get('downs')),
|
|
|
|
'comment_count': int_or_none(data.get('num_comments')),
|
|
|
|
'age_limit': age_limit,
|
|
|
|
}
|