[screenwavemedia] Remove extractor

Rewrite TeamFourStar and Normalboots extractors in terms of JWPlatform
2016-11-28 17:17:56 +01:00 · 2016-11-28 17:17:56 +01:00 · 8953319916
parent 51b1378eed
commit 8953319916
5 changed files with 54 additions and 170 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -804,7 +804,6 @@ from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .screencastomatic import ScreencastOMaticIE
 from .screenjunkies import ScreenJunkiesIE
 from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
 from .seeker import SeekerIE
 from .senateisvp import SenateISVPIE
 from .sendtonews import SendtoNewsIE
@ -897,6 +896,7 @@ from .teachertube import (
 )
 from .teachingchannel import TeachingChannelIE
 from .teamcoco import TeamcocoIE
 from .teamfourstar import TeamFourStarIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
 from .tele13 import Tele13IE
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -56,7 +56,6 @@ from .dailymotion import (
 )
 from .onionstudios import OnionStudiosIE
 from .viewlift import ViewLiftEmbedIE
 from .screenwavemedia import ScreenwaveMediaIE
 from .mtv import MTVServicesEmbeddedIE
 from .pladform import PladformIE
 from .videomore import VideomoreIE
@ -1190,16 +1189,6 @@ class GenericIE(InfoExtractor):
                'duration': 248.667,
            },
        },
        # ScreenwaveMedia embed
        {
            'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
            'md5': '24ace5baba0d35d55c6810b51f34e9e0',
            'info_dict': {
                'id': 'cinemasnob-55d26273809dd',
                'ext': 'mp4',
                'title': 'cinemasnob',
            },
        },
        # BrightcoveInPageEmbed embed
        {
            'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
@ -2212,11 +2201,6 @@ class GenericIE(InfoExtractor):
        if jwplatform_url:
            return self.url_result(jwplatform_url, 'JWPlatform')
        # Look for ScreenwaveMedia embeds
        mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
        if mobj is not None:
            return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
        # Look for Digiteka embeds
        digiteka_url = DigitekaIE._extract_url(webpage)
        if digiteka_url:
--- a/youtube_dl/extractor/normalboots.py
+++ b/youtube_dl/extractor/normalboots.py
@ -2,7 +2,7 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
-from .screenwavemedia import ScreenwaveMediaIE
+from .jwplatform import JWPlatformIE
 from ..utils import (
    unified_strdate,
@ -25,7 +25,7 @@ class NormalbootsIE(InfoExtractor):
            # m3u8 download
            'skip_download': True,
        },
-        'add_ie': ['ScreenwaveMedia'],
+        'add_ie': ['JWPlatform'],
    }
    def _real_extract(self, url):
@ -39,15 +39,13 @@ class NormalbootsIE(InfoExtractor):
            r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
            webpage, 'date', fatal=False))
-        screenwavemedia_url = self._html_search_regex(
+        jwplatform_url = JWPlatformIE._extract_url(webpage)
            ScreenwaveMediaIE.EMBED_PATTERN, webpage, 'screenwave URL',
            group='url')
        return {
            '_type': 'url_transparent',
            'id': video_id,
-            'url': screenwavemedia_url,
+            'url': jwplatform_url,
-            'ie_key': ScreenwaveMediaIE.ie_key(),
+            'ie_key': JWPlatformIE.ie_key(),
            'title': self._og_search_title(webpage),
            'description': self._og_search_description(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
--- a/youtube_dl/extractor/screenwavemedia.py
+++ b/youtube_dl/extractor/screenwavemedia.py
@ -1,146 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    unified_strdate,
    js_to_json,
 )
 class ScreenwaveMediaIE(InfoExtractor):
    _VALID_URL = r'(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)'
    EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1'
    _TESTS = [{
        'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        playerdata = self._download_webpage(
            'http://player.screenwavemedia.com/player.php?id=%s' % video_id,
            video_id, 'Downloading player webpage')
        vidtitle = self._search_regex(
            r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
        playerconfig = self._download_webpage(
            'http://player.screenwavemedia.com/player.js',
            video_id, 'Downloading playerconfig webpage')
        videoserver = self._search_regex(r'SWMServer\s*=\s*"([\d\.]+)"', playerdata, 'videoserver')
        sources = self._parse_json(
            js_to_json(
                re.sub(
                    r'(?s)/\*.*?\*/', '',
                    self._search_regex(
                        r'sources\s*:\s*(\[[^\]]+?\])', playerconfig,
                        'sources',
                    ).replace(
                        "' + thisObj.options.videoserver + '",
                        videoserver
                    ).replace(
                        "' + playerVidId + '",
                        video_id
                    )
                )
            ),
            video_id, fatal=False
        )
        # Fallback to hardcoded sources if JS changes again
        if not sources:
            self.report_warning('Falling back to a hardcoded list of streams')
            sources = [{
                'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id),
                'type': 'mp4',
                'label': format_label,
            } for format_id, format_label in (
                ('low', '144p Low'), ('med', '160p Med'), ('high', '360p High'), ('hd1', '720p HD1'))]
            sources.append({
                'file': 'http://%s/vod/smil:%s.smil/playlist.m3u8' % (videoserver, video_id),
                'type': 'hls',
            })
        formats = []
        for source in sources:
            file_ = source.get('file')
            if not file_:
                continue
            if source.get('type') == 'hls':
                formats.extend(self._extract_m3u8_formats(file_, video_id, ext='mp4'))
            else:
                format_id = self._search_regex(
                    r'_(.+?)\.[^.]+$', file_, 'format id', default=None)
                if not self._is_valid_url(file_, video_id, format_id or 'video'):
                    continue
                format_label = source.get('label')
                height = int_or_none(self._search_regex(
                    r'^(\d+)[pP]', format_label, 'height', default=None))
                formats.append({
                    'url': file_,
                    'format_id': format_id,
                    'format': format_label,
                    'ext': source.get('type'),
                    'height': height,
                })
        self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
        return {
            'id': video_id,
            'title': vidtitle,
            'formats': formats,
        }
 class TeamFourIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
    _TEST = {
        'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
        'info_dict': {
            'id': 'TeamFourStar-5292a02f20bfa',
            'ext': 'mp4',
            'upload_date': '20130401',
            'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
            'title': 'A Moment With TFS Episode 4',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        playerdata_url = self._search_regex(
            r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
            webpage, 'player data URL')
        video_title = self._html_search_regex(
            r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
            webpage, 'title')
        video_date = unified_strdate(self._html_search_regex(
            r'<div class="heroheadingdate">(?P<date>.+?)</div>',
            webpage, 'date', fatal=False))
        video_description = self._html_search_regex(
            r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
            webpage, 'description', fatal=False)
        video_thumbnail = self._og_search_thumbnail(webpage)
        return {
            '_type': 'url_transparent',
            'display_id': display_id,
            'title': video_title,
            'description': video_description,
            'upload_date': video_date,
            'thumbnail': video_thumbnail,
            'url': playerdata_url,
        }
--- a/youtube_dl/extractor/teamfourstar.py
+++ b/youtube_dl/extractor/teamfourstar.py
@ -0,0 +1,48 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from .jwplatform import JWPlatformIE
 from ..utils import unified_strdate
 class TeamFourStarIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)/?'
    _TEST = {
        'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/',
        'info_dict': {
            'id': '0WdZO31W',
            'title': 'TFS Abridged Parody Episode 1',
            'description': 'Episode 1: The Return of Raditz! … Wait…\nCast\nMasakoX – Goku, Roshi\nLanipator – Piccolo, Radditz, Krillin, Vegeta\nVegeta3986 – Radditz, Yamcha, Oolong, Gohan\nHbi2k – Farmer with Shotgun\nMegami33 – Bulma, Puar\nTakahata101 – Nappa\nKaiserNeko – SpacePod\nSongs\nMorgenstemning by Edvard Hagerup Grieg\nCha-La-Head-Cha-La by Kageyama Hiranobu\nWE DO NOT OWN DRAGONBALL. DragonBall is Owned by TOEI ANIMATION, Ltd. and Licensed by FUNimation Productions, Ltd.. All Rights Reserved. DragonBall, DragonBall Z, DragonBall GT and all logos, character names and distinctive likenesses thereof are trademarks of TOEI ANIMATION, Ltd.\nThis is nothing more than a Parody made for entertainment purposes only.',            
            'ext': 'mp4',
            'timestamp': 1394168400,
            'upload_date': '20080508',
        },
    }
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        jwplatform_url = JWPlatformIE._extract_url(webpage)
        video_title = self._html_search_regex(
            r'<h1 class="entry-title">(?P<title>.+?)</h1>',
            webpage, 'title')
        video_date = unified_strdate(self._html_search_regex(
            r'<span class="meta-date date updated">(?P<date>.+?)</span>',
            webpage, 'date', fatal=False))
        video_description = self._html_search_regex(
            r'(?s)<div class="content-inner">.*?(?P<description><p>.+?)</div>',
            webpage, 'description', fatal=False)
        video_thumbnail = self._og_search_thumbnail(webpage)
        return {
            '_type': 'url_transparent',
            'display_id': display_id,
            'title': video_title,
            'description': video_description,
            'upload_date': video_date,
            'thumbnail': video_thumbnail,
            'url': jwplatform_url,
        }