youtube-dl/youtube_dl/extractor/generic.py

# coding: utf-8

from __future__ import unicode_literals

import os
import re
import sys

from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import (
    compat_etree_fromstring,
    compat_urllib_parse_unquote,
    compat_urlparse,
    compat_xml_parse_error,
)
from ..utils import (
    determine_ext,
    ExtractorError,
    float_or_none,
    HEADRequest,
    is_html,
    orderedSet,
    sanitized_Request,
    smuggle_url,
    unescapeHTML,
    unified_strdate,
    unsmuggle_url,
    UnsupportedError,
    xpath_text,
)
from .brightcove import (
    BrightcoveLegacyIE,
    BrightcoveNewIE,
)
from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
from .tvc import TVCIE
from .sportbox import SportBoxEmbedIE
from .smotri import SmotriIE
from .myvi import MyviIE
from .condenast import CondeNastIE
from .udn import UDNEmbedIE
from .senateisvp import SenateISVPIE
from .svt import SVTIE
from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE
from .tnaflix import TNAFlixNetworkEmbedIE
from .drtuber import DrTuberIE
from .vimeo import VimeoIE
from .dailymotion import (
    DailymotionIE,
    DailymotionCloudIE,
)
from .onionstudios import OnionStudiosIE
from .viewlift import ViewLiftEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE
from .videomore import VideomoreIE
from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE
from .digiteka import DigitekaIE
from .arkena import ArkenaIE
from .instagram import InstagramIE
from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE
from .theplatform import ThePlatformIE
from .vessel import VesselIE
from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE
from .soundcloud import SoundcloudIE
from .vbox7 import Vbox7IE
from .dbtv import DBTVIE


class GenericIE(InfoExtractor):
    IE_DESC = 'Generic downloader that works on some sites'
    _VALID_URL = r'.*'
    IE_NAME = 'generic'
    _TESTS = [
        # Direct link to a video
        {
            'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
            'md5': '67d406c2bcb6af27fa886f31aa934bbe',
            'info_dict': {
                'id': 'trailer',
                'ext': 'mp4',
                'title': 'trailer',
                'upload_date': '20100513',
            }
        },
        # Direct link to media delivered compressed (until Accept-Encoding is *)
        {
            'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
            'md5': '128c42e68b13950268b648275386fc74',
            'info_dict': {
                'id': 'FictionJunction-Parallel_Hearts',
                'ext': 'flac',
                'title': 'FictionJunction-Parallel_Hearts',
                'upload_date': '20140522',
            },
            'expected_warnings': [
                'URL could be a direct video link, returning it as such.'
            ],
            'skip': 'URL invalid',
        },
        # Direct download with broken HEAD
        {
            'url': 'http://ai-radio.org:8000/radio.opus',
            'info_dict': {
                'id': 'radio',
                'ext': 'opus',
                'title': 'radio',
            },
            'params': {
                'skip_download': True,  # infinite live stream
            },
            'expected_warnings': [
                r'501.*Not Implemented',
                r'400.*Bad Request',
            ],
        },
        # Direct link with incorrect MIME type
        {
            'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
            'md5': '4ccbebe5f36706d85221f204d7eb5913',
            'info_dict': {
                'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
                'id': '5_Lennart_Poettering_-_Systemd',
                'ext': 'webm',
                'title': '5_Lennart_Poettering_-_Systemd',
                'upload_date': '20141120',
            },
            'expected_warnings': [
                'URL could be a direct video link, returning it as such.'
            ]
        },
        # RSS feed
        {
            'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
            'info_dict': {
                'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
                'title': 'Zero Punctuation',
                'description': 're:.*groundbreaking video review series.*'
            },
            'playlist_mincount': 11,
        },
        # RSS feed with enclosure
        {
            'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
            'info_dict': {
                'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
                'ext': 'm4v',
                'upload_date': '20150228',
                'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
            }
        },
        # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
        {
            'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
            'info_dict': {
                'id': 'smil',
                'ext': 'mp4',
                'title': 'Automatics, robotics and biocybernetics',
                'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
                'upload_date': '20130627',
                'formats': 'mincount:16',
                'subtitles': 'mincount:1',
            },
            'params': {
                'force_generic_extractor': True,
                'skip_download': True,
            },
        },
        # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
        {
            'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
            'info_dict': {
                'id': 'hds',
                'ext': 'flv',
                'title': 'hds',
                'formats': 'mincount:1',
            },
            'params': {
                'skip_download': True,
            },
        },
        # SMIL from https://www.restudy.dk/video/play/id/1637
        {
            'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
            'info_dict': {
                'id': 'video_1637',
                'ext': 'flv',
                'title': 'video_1637',
                'formats': 'mincount:3',
            },
            'params': {
                'skip_download': True,
            },
        },
        # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
        {
            'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
            'info_dict': {
                'id': 'smil-service',
                'ext': 'flv',
                'title': 'smil-service',
                'formats': 'mincount:1',
            },
            'params': {
                'skip_download': True,
            },
        },
        # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
        {
            'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
            'info_dict': {
                'id': '4719370',
                'ext': 'mp4',
                'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
                'formats': 'mincount:3',
            },
            'params': {
                'skip_download': True,
            },
        },
        # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
        {
            'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
            'info_dict': {
                'id': 'mZlp2ctYIUEB',
                'ext': 'mp4',
                'title': 'Tikibad ontruimd wegens brand',
                'description': 'md5:05ca046ff47b931f9b04855015e163a4',
                'thumbnail': 're:^https?://.*\.jpg$',
                'duration': 33,
            },
            'params': {
                'skip_download': True,
            },
        },
        # MPD from http://dash-mse-test.appspot.com/media.html
        {
            'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
            'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
            'info_dict': {
                'id': 'car-20120827-manifest',
                'ext': 'mp4',
                'title': 'car-20120827-manifest',
                'formats': 'mincount:9',
                'upload_date': '20130904',
            },
            'params': {
                'format': 'bestvideo',
            },
        },
        # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
        {
            'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
            'info_dict': {
                'id': 'content',
                'ext': 'mp4',
                'title': 'content',
                'formats': 'mincount:8',
            },
            'params': {
                # m3u8 downloads
                'skip_download': True,
            },
            'skip': 'video gone',
        },
        # m3u8 served with Content-Type: text/plain
        {
            'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
            'info_dict': {
                'id': 'index',
                'ext': 'mp4',
                'title': 'index',
                'upload_date': '20140720',
                'formats': 'mincount:11',
            },
            'params': {
                # m3u8 downloads
                'skip_download': True,
            },
            'skip': 'video gone',
        },
        # google redirect
        {
            'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
            'info_dict': {
                'id': 'cmQHVoWB5FY',
                'ext': 'mp4',
                'upload_date': '20130224',
                'uploader_id': 'TheVerge',
                'description': 're:^Chris Ziegler takes a look at the\.*',
                'uploader': 'The Verge',
                'title': 'First Firefox OS phones side-by-side',
            },
            'params': {
                'skip_download': False,
            }
        },
        {
            # redirect in Refresh HTTP header
            'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
            'info_dict': {
                'id': 'pO8h3EaFRdo',
                'ext': 'mp4',
                'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
                'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
                'upload_date': '20150917',
                'uploader_id': 'brtvofficial',
                'uploader': 'Boiler Room',
            },
            'params': {
                'skip_download': False,
            },
        },
        {
            'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
            'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
            'info_dict': {
                'id': '13601338388002',
                'ext': 'mp4',
                'uploader': 'www.hodiho.fr',
                'title': 'R\u00e9gis plante sa Jeep',
            }
        },
        # bandcamp page with custom domain
        {
            'add_ie': ['Bandcamp'],
            'url': 'http://bronyrock.com/track/the-pony-mash',
            'info_dict': {
                'id': '3235767654',
                'ext': 'mp3',
                'title': 'The Pony Mash',
                'uploader': 'M_Pallante',
            },
            'skip': 'There is a limit of 200 free downloads / month for the test song',
        },
        # embedded brightcove video
        # it also tests brightcove videos that need to set the 'Referer' in the
        # http requests
        {
            'add_ie': ['BrightcoveLegacy'],
            'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
            'info_dict': {
                'id': '2765128793001',
                'ext': 'mp4',
                'title': 'Le cours de bourse : l’analyse technique',
                'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
                'uploader': 'BFM BUSINESS',
            },
            'params': {
                'skip_download': True,
            },
        },
        {
            # https://github.com/rg3/youtube-dl/issues/2253
            'url': 'http://bcove.me/i6nfkrc3',
            'md5': '0ba9446db037002366bab3b3eb30c88c',
            'info_dict': {
                'id': '3101154703001',
                'ext': 'mp4',
                'title': 'Still no power',
                'uploader': 'thestar.com',
                'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
            },
            'add_ie': ['BrightcoveLegacy'],
            'skip': 'video gone',
        },
        {
            'url': 'http://www.championat.com/video/football/v/87/87499.html',
            'md5': 'fb973ecf6e4a78a67453647444222983',
            'info_dict': {
                'id': '3414141473001',
                'ext': 'mp4',
                'title': 'Видео. Удаление Дзагоева (ЦСКА)',
                'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
                'uploader': 'Championat',
            },
        },
        {
            # https://github.com/rg3/youtube-dl/issues/3541
            'add_ie': ['BrightcoveLegacy'],
            'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
            'info_dict': {
                'id': '3866516442001',
                'ext': 'mp4',
                'title': 'Leer mij vrouwen kennen: Aflevering 1',
                'description': 'Leer mij vrouwen kennen: Aflevering 1',
                'uploader': 'SBS Broadcasting',
            },
            'skip': 'Restricted to Netherlands',
            'params': {
                'skip_download': True,  # m3u8 download
            },
        },
        # ooyala video
        {
            'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
            'md5': '166dd577b433b4d4ebfee10b0824d8ff',
            'info_dict': {
                'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
                'ext': 'mp4',
                'title': '2cc213299525360.mov',  # that's what we get
                'duration': 238.231,
            },
            'add_ie': ['Ooyala'],
        },
        {
            # ooyala video embedded with http://player.ooyala.com/iframe.js
            'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
            'info_dict': {
                'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
                'ext': 'mp4',
                'title': '"Steve Jobs: Man in the Machine" trailer',
                'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
                'duration': 135.427,
            },
            'params': {
                'skip_download': True,
            },
            'skip': 'movie expired',
        },
        # embed.ly video
        {
            'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
            'info_dict': {
                'id': '9ODmcdjQcHQ',
                'ext': 'mp4',
                'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
                'upload_date': '20140225',
                'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
                'uploader': 'Tested',
                'uploader_id': 'testedcom',
            },
            # No need to test YoutubeIE here
            'params': {
                'skip_download': True,
            },
        },
        # funnyordie embed
        {
            'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
            'info_dict': {
                'id': '18e820ec3f',
                'ext': 'mp4',
                'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
                'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
            },
            # HEAD requests lead to endless 301, while GET is OK
            'expected_warnings': ['301'],
        },
        # RUTV embed
        {
            'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
            'info_dict': {
                'id': '776940',
                'ext': 'mp4',
                'title': 'Охотское море стало целиком российским',
                'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
            },
            'params': {
                # m3u8 download
                'skip_download': True,
            },
        },
        # TVC embed
        {
            'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
            'info_dict': {
                'id': '55304',
                'ext': 'mp4',
                'title': 'Дошкольное воспитание',
            },
        },
        # SportBox embed
        {
            'url': 'http://www.vestifinance.ru/articles/25753',
            'info_dict': {
                'id': '25753',
                'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
            },
            'playlist': [{
                'info_dict': {
                    'id': '370908',
                    'title': 'Госзаказ. День 3',
                    'ext': 'mp4',
                }
            }, {
                'info_dict': {
                    'id': '370905',
                    'title': 'Госзаказ. День 2',
                    'ext': 'mp4',
                }
            }, {
                'info_dict': {
                    'id': '370902',
                    'title': 'Госзаказ. День 1',
                    'ext': 'mp4',
                }
            }],
            'params': {
                # m3u8 download
                'skip_download': True,
            },
        },
        # Myvi.ru embed
        {
            'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
            'info_dict': {
                'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
                'ext': 'mp4',
                'title': 'Ужастики, русский трейлер (2015)',
                'thumbnail': 're:^https?://.*\.jpg$',
                'duration': 153,
            }
        },
        # XHamster embed
        {
            'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
            'info_dict': {
                'id': 'showthread',
                'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
            },
            'playlist_mincount': 7,
            # This forum does not allow <iframe> syntaxes anymore
            # Now HTML tags are displayed as-is
            'skip': 'No videos on this page',
        },
        # Embedded TED video
        {
            'url': 'http://en.support.wordpress.com/videos/ted-talks/',
            'md5': '65fdff94098e4a607385a60c5177c638',
            'info_dict': {
                'id': '1969',
                'ext': 'mp4',
                'title': 'Hidden miracles of the natural world',
                'uploader': 'Louie Schwartzberg',
                'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
            }
        },
        # Embedded Ustream video
        {
            'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
            'md5': '27b99cdb639c9b12a79bca876a073417',
            'info_dict': {
                'id': '45734260',
                'ext': 'flv',
                'uploader': 'AU SPA:  The NSA and Privacy',
                'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
            }
        },
        # nowvideo embed hidden behind percent encoding
        {
            'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
            'md5': '2baf4ddd70f697d94b1c18cf796d5107',
            'info_dict': {
                'id': '06e53103ca9aa',
                'ext': 'flv',
                'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
                'description': 'No description',
            },
        },
        # arte embed
        {
            'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
            'md5': '7653032cbb25bf6c80d80f217055fa43',
            'info_dict': {
                'id': '048195-004_PLUS7-F',
                'ext': 'flv',
                'title': 'X:enius',
                'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
                'upload_date': '20140320',
            },
            'params': {
                'skip_download': 'Requires rtmpdump'
            },
            'skip': 'video gone',
        },
        # francetv embed
        {
            'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
            'info_dict': {
                'id': 'EV_30231',
                'ext': 'mp4',
                'title': 'Alcaline, le concert avec Calogero',
                'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
                'upload_date': '20150226',
                'timestamp': 1424989860,
                'duration': 5400,
            },
            'params': {
                # m3u8 downloads
                'skip_download': True,
            },
            'expected_warnings': [
                'Forbidden'
            ]
        },
        # Condé Nast embed
        {
            'url': 'http://www.wired.com/2014/04/honda-asimo/',
            'md5': 'ba0dfe966fa007657bd1443ee672db0f',
            'info_dict': {
                'id': '53501be369702d3275860000',
                'ext': 'mp4',
                'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
            }
        },
        # Dailymotion embed
        {
            'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
            'md5': '441aeeb82eb72c422c7f14ec533999cd',
            'info_dict': {
                'id': 'k2mm4bCdJ6CQ2i7c8o2',
                'ext': 'mp4',
                'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
                'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
                'uploader': 'Spi0n',
                'uploader_id': 'xgditw',
                'upload_date': '20140425',
                'timestamp': 1398441542,
            },
            'add_ie': ['Dailymotion'],
        },
        # YouTube embed
        {
            'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
            'info_dict': {
                'id': 'FXRb4ykk4S0',
                'ext': 'mp4',
                'title': 'The NBL Auction 2014',
                'uploader': 'BADMINTON England',
                'uploader_id': 'BADMINTONEvents',
                'upload_date': '20140603',
                'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
            },
            'add_ie': ['Youtube'],
            'params': {
                'skip_download': True,
            }
        },
        # MTVSercices embed
        {
            'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
            'md5': 'ca1aef97695ef2c1d6973256a57e5252',
            'info_dict': {
                'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
                'ext': 'mp4',
                'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
                'description': 'Two valets share their love for movie star Liam Neesons.',
                'timestamp': 1349922600,
                'upload_date': '20121011',
            },
        },
        # YouTube embed via <data-embed-url="">
        {
            'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
            'info_dict': {
                'id': '4vAffPZIT44',
                'ext': 'mp4',
                'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
                'uploader': 'Gameloft',
                'uploader_id': 'gameloft',
                'upload_date': '20140828',
                'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
            },
            'params': {
                'skip_download': True,
            }
        },
        # Camtasia studio
        {
            'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
            'playlist': [{
                'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
                'info_dict': {
                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
                    'ext': 'flv',
                    'duration': 2235.90,
                }
            }, {
                'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
                'info_dict': {
                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
                    'ext': 'flv',
                    'duration': 2235.93,
                }
            }],
            'info_dict': {
                'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
            }
        },
        # Flowplayer
        {
            'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
            'md5': '9d65602bf31c6e20014319c7d07fba27',
            'info_dict': {
                'id': '5123ea6d5e5a7',
                'ext': 'mp4',
                'age_limit': 18,
                'uploader': 'www.handjobhub.com',
                'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
            }
        },
        # Multiple brightcove videos
        # https://github.com/rg3/youtube-dl/issues/2283
        {
            'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
            'info_dict': {
                'id': 'always-never',
                'title': 'Always / Never - The New Yorker',
            },
            'playlist_count': 3,
            'params': {
                'extract_flat': False,
                'skip_download': True,
            }
        },
        # MLB embed
        {
            'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
            'md5': '96f09a37e44da40dd083e12d9a683327',
            'info_dict': {
                'id': '33322633',
                'ext': 'mp4',
                'title': 'Ump changes call to ball',
                'description': 'md5:71c11215384298a172a6dcb4c2e20685',
                'duration': 48,
                'timestamp': 1401537900,
                'upload_date': '20140531',
                'thumbnail': 're:^https?://.*\.jpg$',
            },
        },
        # Wistia embed
        {
            'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
            'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
            'info_dict': {
                'id': '6e2wtrbdaf',
                'ext': 'mov',
                'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
                'description': 'a Paywall Videos video from Remilon',
                'duration': 644.072,
                'uploader': 'study.com',
                'timestamp': 1459678540,
                'upload_date': '20160403',
                'filesize': 24687186,
            },
        },
        {
            'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
            'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
            'info_dict': {
                'id': 'uxjb0lwrcz',
                'ext': 'mp4',
                'title': 'Conversation about Hexagonal Rails Part 1',
                'description': 'a Martin Fowler video from ThoughtWorks',
                'duration': 1715.0,
                'uploader': 'thoughtworks.wistia.com',
                'timestamp': 1401832161,
                'upload_date': '20140603',
            },
        },
        # Wistia standard embed (async)
        {
            'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
            'info_dict': {
                'id': '807fafadvk',
                'ext': 'mp4',
                'title': 'Drip Brennan Dunn Workshop',
                'description': 'a JV Webinars video from getdrip-1',
                'duration': 4986.95,
                'timestamp': 1463607249,
                'upload_date': '20160518',
            },
            'params': {
                'skip_download': True,
            }
        },
        # Soundcloud embed
        {
            'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
            'info_dict': {
                'id': '174391317',
                'ext': 'mp3',
                'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
                'uploader': 'Sophos Security',
                'title': 'Chet Chat 171 - Oct 29, 2014',
                'upload_date': '20141029',
            }
        },
        # Soundcloud multiple embeds
        {
            'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
            'info_dict': {
                'id': '52809',
                'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance  | TAB + AUDIO',
            },
            'playlist_mincount': 7,
        },
        # Livestream embed
        {
            'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
            'info_dict': {
                'id': '67864563',
                'ext': 'flv',
                'upload_date': '20141112',
                'title': 'Rosetta #CometLanding webcast HL 10',
            }
        },
        # Another Livestream embed, without 'new.' in URL
        {
            'url': 'https://www.freespeech.org/',
            'info_dict': {
                'id': '123537347',
                'ext': 'mp4',
                'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
            },
            'params': {
                # Live stream
                'skip_download': True,
            },
        },
        # LazyYT
        {
            'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
            'info_dict': {
                'id': '1986',
                'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
            },
            'playlist_mincount': 2,
        },
        # Cinchcast embed
        {
            'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
            'info_dict': {
                'id': '7141703',
                'ext': 'mp3',
                'upload_date': '20141126',
                'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
            }
        },
        # Cinerama player
        {
            'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
            'info_dict': {
                'id': '730m_DandD_1901_512k',
                'ext': 'mp4',
                'uploader': 'www.abc.net.au',
                'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
            }
        },
        # embedded viddler video
        {
            'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
            'info_dict': {
                'id': '4d03aad9',
                'ext': 'mp4',
                'uploader': 'deadspin',
                'title': 'WALL-TO-GORTAT',
                'timestamp': 1422285291,
                'upload_date': '20150126',
            },
            'add_ie': ['Viddler'],
        },
        # Libsyn embed
        {
            'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
            'info_dict': {
                'id': '3377616',
                'ext': 'mp3',
                'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
                'description': 'md5:601cb790edd05908957dae8aaa866465',
                'upload_date': '20150220',
            },
            'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
        },
        # jwplayer YouTube
        {
            'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
            'info_dict': {
                'id': 'Mrj4DVp2zeA',
                'ext': 'mp4',
                'upload_date': '20150212',
                'uploader': 'The National Archives UK',
                'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
                'uploader_id': 'NationalArchives08',
                'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
            },
        },
        # rtl.nl embed
        {
            'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
            'playlist_mincount': 5,
            'info_dict': {
                'id': 'aanslagen-kopenhagen',
                'title': 'Aanslagen Kopenhagen | RTL Nieuws',
            }
        },
        # Zapiks embed
        {
            'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
            'info_dict': {
                'id': '118046',
                'ext': 'mp4',
                'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
            }
        },
        # Kaltura embed (different embed code)
        {
            'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
            'info_dict': {
                'id': '1_a52wc67y',
                'ext': 'flv',
                'upload_date': '20150127',
                'uploader_id': 'PremierMedia',
                'timestamp': int,
                'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
            },
        },
        # Kaltura embed protected with referrer
        {
            'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
            'info_dict': {
                'id': '1_g4fbemnq',
                'ext': 'mp4',
                'title': 'Violetta - Achter De Schermen - Ruggero',
                'description': 'Achter de schermen met Ruggero',
                'timestamp': 1435133761,
                'upload_date': '20150624',
                'uploader_id': 'echojecka',
            },
        },
        # Kaltura embed with single quotes
        {
            'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
            'info_dict': {
                'id': '0_izeg5utt',
                'ext': 'mp4',
                'title': '35871',
                'timestamp': 1355743100,
                'upload_date': '20121217',
                'uploader_id': 'batchUser',
            },
            'add_ie': ['Kaltura'],
        },
        {
            # Kaltura embedded via quoted entry_id
            'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
            'info_dict': {
                'id': '0_utuok90b',
                'ext': 'mp4',
                'title': '06_matthew_brender_raj_dutt',
                'timestamp': 1466638791,
                'upload_date': '20160622',
            },
            'add_ie': ['Kaltura'],
            'expected_warnings': [
                'Could not send HEAD request'
            ],
            'params': {
                'skip_download': True,
            }
        },
        # Eagle.Platform embed (generic URL)
        {
            'url': 'http://lenta.ru/news/2015/03/06/navalny/',
            # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
            'info_dict': {
                'id': '227304',
                'ext': 'mp4',
                'title': 'Навальный вышел на свободу',
                'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
                'thumbnail': 're:^https?://.*\.jpg$',
                'duration': 87,
                'view_count': int,
                'age_limit': 0,
            },
        },
        # ClipYou (Eagle.Platform) embed (custom URL)
        {
            'url': 'http://muz-tv.ru/play/7129/',
            # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
            'info_dict': {
                'id': '12820',
                'ext': 'mp4',
                'title': "'O Sole Mio",
                'thumbnail': 're:^https?://.*\.jpg$',
                'duration': 216,
                'view_count': int,
            },
        },
        # Pladform embed
        {
            'url': 'http://muz-tv.ru/kinozal/view/7400/',
            'info_dict': {
                'id': '100183293',
                'ext': 'mp4',
                'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
                'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
                'thumbnail': 're:^https?://.*\.jpg$',
                'duration': 694,
                'age_limit': 0,
            },
        },
        # Playwire embed
        {
            'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
            'info_dict': {
                'id': '3519514',
                'ext': 'mp4',
                'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
                'thumbnail': 're:^https?://.*\.png$',
                'duration': 45.115,
            },
        },
        # 5min embed
        {
            'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
            'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
            'info_dict': {
                'id': '518726732',
                'ext': 'mp4',
                'title': 'Facebook Creates "On This Day" | Crunch Report',
            },
        },
        # SVT embed
        {
            'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
            'info_dict': {
                'id': '2900353',
                'ext': 'flv',
                'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
                'duration': 27,
                'age_limit': 0,
            },
        },
        # Crooks and Liars embed
        {
            'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
            'info_dict': {
                'id': '8RUoRhRi',
                'ext': 'mp4',
                'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
                'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
                'timestamp': 1428207000,
                'upload_date': '20150405',
                'uploader': 'Heather',
            },
        },
        # Crooks and Liars external embed
        {
            'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
            'info_dict': {
                'id': 'MTE3MjUtMzQ2MzA',
                'ext': 'mp4',
                'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
                'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
                'timestamp': 1265032391,
                'upload_date': '20100201',
                'uploader': 'Heather',
            },
        },
        # NBC Sports vplayer embed
        {
            'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
            'info_dict': {
                'id': 'ln7x1qSThw4k',
                'ext': 'flv',
                'title': "PFT Live: New leader in the 'new-look' defense",
                'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
                'uploader': 'NBCU-SPORTS',
                'upload_date': '20140107',
                'timestamp': 1389118457,
            },
        },
        # NBC News embed
        {
            'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
            'md5': '1aa589c675898ae6d37a17913cf68d66',
            'info_dict': {
                'id': '701714499682',
                'ext': 'mp4',
                'title': 'PREVIEW: On Assignment: David Letterman',
                'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
            },
        },
        # UDN embed
        {
            'url': 'https://video.udn.com/news/300346',
            'md5': 'fd2060e988c326991037b9aff9df21a6',
            'info_dict': {
                'id': '300346',
                'ext': 'mp4',
                'title': '中一中男師變性 全校師生力挺',
                'thumbnail': 're:^https?://.*\.jpg$',
            },
            'params': {
                # m3u8 download
                'skip_download': True,
            },
        },
        # Ooyala embed
        {
            'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
            'info_dict': {
                'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
                'ext': 'mp4',
                'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
                'title': 'This is what separates the Excel masters from the wannabes',
                'duration': 191.933,
            },
            'params': {
                # m3u8 downloads
                'skip_download': True,
            }
        },
        # Brightcove URL in single quotes
        {
            'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
            'md5': '4ae374f1f8b91c889c4b9203c8c752af',
            'info_dict': {
                'id': '4255764656001',
                'ext': 'mp4',
                'title': 'SN Presents: Russell Martin, World Citizen',
                'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
                'uploader': 'Rogers Sportsnet',
                'uploader_id': '1704050871',
                'upload_date': '20150525',
                'timestamp': 1432570283,
            },
        },
        # Dailymotion Cloud video
        {
            'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
            'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
            'info_dict': {
                'id': 'x2uy8t3',
                'ext': 'mp4',
                'title': 'Sauvons les abeilles ! - Le débat',
                'description': 'md5:d9082128b1c5277987825d684939ca26',
                'thumbnail': 're:^https?://.*\.jpe?g$',
                'timestamp': 1434970506,
                'upload_date': '20150622',
                'uploader': 'Public Sénat',
                'uploader_id': 'xa9gza',
            }
        },
        # OnionStudios embed
        {
            'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
            'info_dict': {
                'id': '2855',
                'ext': 'mp4',
                'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
                'thumbnail': 're:^https?://.*\.jpe?g$',
                'uploader': 'ClickHole',
                'uploader_id': 'clickhole',
            }
        },
        # SnagFilms embed
        {
            'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
            'info_dict': {
                'id': '74849a00-85a9-11e1-9660-123139220831',
                'ext': 'mp4',
                'title': '#whilewewatch',
            }
        },
        # AdobeTVVideo embed
        {
            'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
            'md5': '43662b577c018ad707a63766462b1e87',
            'info_dict': {
                'id': '2456',
                'ext': 'mp4',
                'title': 'New experience with Acrobat DC',
                'description': 'New experience with Acrobat DC',
                'duration': 248.667,
            },
        },
        # ScreenwaveMedia embed
        {
            'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
            'md5': '24ace5baba0d35d55c6810b51f34e9e0',
            'info_dict': {
                'id': 'cinemasnob-55d26273809dd',
                'ext': 'mp4',
                'title': 'cinemasnob',
            },
        },
        # BrightcoveInPageEmbed embed
        {
            'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
            'info_dict': {
                'id': '4238694884001',
                'ext': 'flv',
                'title': 'Tabletop: Dread, Last Thoughts',
                'description': 'Tabletop: Dread, Last Thoughts',
                'duration': 51690,
            },
        },
        # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
        # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
        {
            'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
            'info_dict': {
                'id': '4785848093001',
                'ext': 'mp4',
                'title': 'The Cardinal Pell Interview',
                'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
                'uploader': 'GlobeCast Australia - GlobeStream',
                'uploader_id': '2733773828001',
                'upload_date': '20160304',
                'timestamp': 1457083087,
            },
            'params': {
                # m3u8 downloads
                'skip_download': True,
            },
        },
        # Another form of arte.tv embed
        {
            'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
            'md5': '850bfe45417ddf221288c88a0cffe2e2',
            'info_dict': {
                'id': '030273-562_PLUS7-F',
                'ext': 'mp4',
                'title': 'ARTE Reportage - Nulle part, en France',
                'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
                'upload_date': '20160409',
            },
        },
        # LiveLeak embed
        {
            'url': 'http://www.wykop.pl/link/3088787/',
            'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
            'info_dict': {
                'id': '874_1459135191',
                'ext': 'mp4',
                'title': 'Man shows poor quality of new apartment building',
                'description': 'The wall is like a sand pile.',
                'uploader': 'Lake8737',
            }
        },
        # Duplicated embedded video URLs
        {
            'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
            'info_dict': {
                'id': '149298443_480_16c25b74_2',
                'ext': 'mp4',
                'title': 'vs. Blue Orange Spring Game',
                'uploader': 'www.hudl.com',
            },
        },
        # twitter:player:stream embed
        {
            'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
            'info_dict': {
                'id': 'master',
                'ext': 'mp4',
                'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
                'uploader': 'www.rtl.be',
            },
            'params': {
                # m3u8 downloads
                'skip_download': True,
            },
        },
        # twitter:player embed
        {
            'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
            'md5': 'a3e0df96369831de324f0778e126653c',
            'info_dict': {
                'id': '4909620399001',
                'ext': 'mp4',
                'title': 'What Do Black Holes Sound Like?',
                'description': 'what do black holes sound like',
                'upload_date': '20160524',
                'uploader_id': '29913724001',
                'timestamp': 1464107587,
                'uploader': 'TheAtlantic',
            },
            'add_ie': ['BrightcoveLegacy'],
        },
        # Facebook <iframe> embed
        {
            'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
            'md5': 'fbcde74f534176ecb015849146dd3aee',
            'info_dict': {
                'id': '599637780109885',
                'ext': 'mp4',
                'title': 'Facebook video #599637780109885',
            },
        },
        # Facebook API embed
        {
            'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
            'md5': 'a47372ee61b39a7b90287094d447d94e',
            'info_dict': {
                'id': '10153467542406923',
                'ext': 'mp4',
                'title': 'Facebook video #10153467542406923',
            },
        },
        # Wordpress "YouTube Video Importer" plugin
        {
            'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
            'md5': 'd16797741b560b485194eddda8121b48',
            'info_dict': {
                'id': 'HNTXWDXV9Is',
                'ext': 'mp4',
                'title': 'Blue Devils Drumline Stanford lot 2016',
                'upload_date': '20160627',
                'uploader_id': 'GENOCIDE8GENERAL10',
                'uploader': 'cylus cyrus',
            },
        },
        {
            # video stored on custom kaltura server
            'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
            'md5': '537617d06e64dfed891fa1593c4b30cc',
            'info_dict': {
                'id': '0_1iotm5bh',
                'ext': 'mp4',
                'title': 'Elecciones británicas: 5 lecciones para Rajoy',
                'description': 'md5:435a89d68b9760b92ce67ed227055f16',
                'uploader_id': 'videos.expansion@el-mundo.net',
                'upload_date': '20150429',
                'timestamp': 1430303472,
            },
            'add_ie': ['Kaltura'],
        },
        {
            # Non-standard Vimeo embed
            'url': 'https://openclassrooms.com/courses/understanding-the-web',
            'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
            'info_dict': {
                'id': '148867247',
                'ext': 'mp4',
                'title': 'Understanding the web - Teaser',
                'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
                'upload_date': '20151214',
                'uploader': 'OpenClassrooms',
                'uploader_id': 'openclassrooms',
            },
            'add_ie': ['Vimeo'],
        },
        {
            # generic vimeo embed that requires original URL passed as Referer
            'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
            'only_matching': True,
        },
        {
            'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
            'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
            'info_dict': {
                'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
                'ext': 'mp4',
                'title': 'Big Buck Bunny',
                'description': 'Royalty free test video',
                'timestamp': 1432816365,
                'upload_date': '20150528',
                'is_live': False,
            },
            'params': {
                'skip_download': True,
            },
            'add_ie': [ArkenaIE.ie_key()],
        },
        {
            'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
            'info_dict': {
                'id': '1c7141f46c',
                'ext': 'mp4',
                'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
            },
            'params': {
                'skip_download': True,
            },
            'add_ie': [Vbox7IE.ie_key()],
        },
        {
            # DBTV embeds
            'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
            'info_dict': {
                'id': '43254897',
                'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
            },
            'playlist_mincount': 3,
        },
        # {
        #     # TODO: find another test
        #     # http://schema.org/VideoObject
        #     'url': 'https://flipagram.com/f/nyvTSJMKId',
        #     'md5': '888dcf08b7ea671381f00fab74692755',
        #     'info_dict': {
        #         'id': 'nyvTSJMKId',
        #         'ext': 'mp4',
        #         'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
        #         'description': '#love for cats.',
        #         'timestamp': 1461244995,
        #         'upload_date': '20160421',
        #     },
        #     'params': {
        #         'force_generic_extractor': True,
        #     },
        # }
    ]

    def report_following_redirect(self, new_url):
        """Report information extraction."""
        self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)

    def _extract_rss(self, url, video_id, doc):
        playlist_title = doc.find('./channel/title').text
        playlist_desc_el = doc.find('./channel/description')
        playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text

        entries = []
        for it in doc.findall('./channel/item'):
            next_url = xpath_text(it, 'link', fatal=False)
            if not next_url:
                enclosure_nodes = it.findall('./enclosure')
                for e in enclosure_nodes:
                    next_url = e.attrib.get('url')
                    if next_url:
                        break

            if not next_url:
                continue

            entries.append({
                '_type': 'url',
                'url': next_url,
                'title': it.find('title').text,
            })

        return {
            '_type': 'playlist',
            'id': url,
            'title': playlist_title,
            'description': playlist_desc,
            'entries': entries,
        }

    def _extract_camtasia(self, url, video_id, webpage):
        """ Returns None if no camtasia video can be found. """

        camtasia_cfg = self._search_regex(
            r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
            webpage, 'camtasia configuration file', default=None)
        if camtasia_cfg is None:
            return None

        title = self._html_search_meta('DC.title', webpage, fatal=True)

        camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
        camtasia_cfg = self._download_xml(
            camtasia_url, video_id,
            note='Downloading camtasia configuration',
            errnote='Failed to download camtasia configuration')
        fileset_node = camtasia_cfg.find('./playlist/array/fileset')

        entries = []
        for n in fileset_node.getchildren():
            url_n = n.find('./uri')
            if url_n is None:
                continue

            entries.append({
                'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
                'title': '%s - %s' % (title, n.tag),
                'url': compat_urlparse.urljoin(url, url_n.text),
                'duration': float_or_none(n.find('./duration').text),
            })

        return {
            '_type': 'playlist',
            'entries': entries,
            'title': title,
        }

    def _real_extract(self, url):
        if url.startswith('//'):
            return {
                '_type': 'url',
                'url': self.http_scheme() + url,
            }

        parsed_url = compat_urlparse.urlparse(url)
        if not parsed_url.scheme:
            default_search = self._downloader.params.get('default_search')
            if default_search is None:
                default_search = 'fixup_error'

            if default_search in ('auto', 'auto_warning', 'fixup_error'):
                if '/' in url:
                    self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
                    return self.url_result('http://' + url)
                elif default_search != 'fixup_error':
                    if default_search == 'auto_warning':
                        if re.match(r'^(?:url|URL)$', url):
                            raise ExtractorError(
                                'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
                                expected=True)
                        else:
                            self._downloader.report_warning(
                                'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
                    return self.url_result('ytsearch:' + url)

            if default_search in ('error', 'fixup_error'):
                raise ExtractorError(
                    '%r is not a valid URL. '
                    'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
                    % (url, url), expected=True)
            else:
                if ':' not in default_search:
                    default_search += ':'
                return self.url_result(default_search + url)

        url, smuggled_data = unsmuggle_url(url)
        force_videoid = None
        is_intentional = smuggled_data and smuggled_data.get('to_generic')
        if smuggled_data and 'force_videoid' in smuggled_data:
            force_videoid = smuggled_data['force_videoid']
            video_id = force_videoid
        else:
            video_id = self._generic_id(url)

        self.to_screen('%s: Requesting header' % video_id)

        head_req = HEADRequest(url)
        head_response = self._request_webpage(
            head_req, video_id,
            note=False, errnote='Could not send HEAD request to %s' % url,
            fatal=False)

        if head_response is not False:
            # Check for redirect
            new_url = head_response.geturl()
            if url != new_url:
                self.report_following_redirect(new_url)
                if force_videoid:
                    new_url = smuggle_url(
                        new_url, {'force_videoid': force_videoid})
                return self.url_result(new_url)

        full_response = None
        if head_response is False:
            request = sanitized_Request(url)
            request.add_header('Accept-Encoding', '*')
            full_response = self._request_webpage(request, video_id)
            head_response = full_response

        info_dict = {
            'id': video_id,
            'title': self._generic_title(url),
            'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
        }

        # Check for direct link to a video
        content_type = head_response.headers.get('Content-Type', '').lower()
        m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
        if m:
            format_id = m.group('format_id')
            if format_id.endswith('mpegurl'):
                formats = self._extract_m3u8_formats(url, video_id, 'mp4')
            elif format_id == 'f4m':
                formats = self._extract_f4m_formats(url, video_id)
            else:
                formats = [{
                    'format_id': m.group('format_id'),
                    'url': url,
                    'vcodec': 'none' if m.group('type') == 'audio' else None
                }]
                info_dict['direct'] = True
            self._sort_formats(formats)
            info_dict['formats'] = formats
            return info_dict

        if not self._downloader.params.get('test', False) and not is_intentional:
            force = self._downloader.params.get('force_generic_extractor', False)
            self._downloader.report_warning(
                '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))

        if not full_response:
            request = sanitized_Request(url)
            # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
            # making it impossible to download only chunk of the file (yet we need only 512kB to
            # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
            # that will always result in downloading the whole file that is not desirable.
            # Therefore for extraction pass we have to override Accept-Encoding to any in order
            # to accept raw bytes and being able to download only a chunk.
            # It may probably better to solve this by checking Content-Type for application/octet-stream
            # after HEAD request finishes, but not sure if we can rely on this.
            request.add_header('Accept-Encoding', '*')
            full_response = self._request_webpage(request, video_id)

        first_bytes = full_response.read(512)

        # Is it an M3U playlist?
        if first_bytes.startswith(b'#EXTM3U'):
            info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
            self._sort_formats(info_dict['formats'])
            return info_dict

        # Maybe it's a direct link to a video?
        # Be careful not to download the whole thing!
        if not is_html(first_bytes):
            self._downloader.report_warning(
                'URL could be a direct video link, returning it as such.')
            info_dict.update({
                'direct': True,
                'url': url,
            })
            return info_dict

        webpage = self._webpage_read_content(
            full_response, url, video_id, prefix=first_bytes)

        self.report_extraction(video_id)

        # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
        try:
            doc = compat_etree_fromstring(webpage.encode('utf-8'))
            if doc.tag == 'rss':
                return self._extract_rss(url, video_id, doc)
            elif doc.tag == 'SmoothStreamingMedia':
                info_dict['formats'] = self._parse_ism_formats(doc, url)
                self._sort_formats(info_dict['formats'])
                return info_dict
            elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
                smil = self._parse_smil(doc, url, video_id)
                self._sort_formats(smil['formats'])
                return smil
            elif doc.tag == '{http://xspf.org/ns/0/}playlist':
                return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
            elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
                info_dict['formats'] = self._parse_mpd_formats(
                    doc, video_id,
                    mpd_base_url=full_response.geturl().rpartition('/')[0],
                    mpd_url=url)
                self._sort_formats(info_dict['formats'])
                return info_dict
            elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
                info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
                self._sort_formats(info_dict['formats'])
                return info_dict
        except compat_xml_parse_error:
            pass

        # Is it a Camtasia project?
        camtasia_res = self._extract_camtasia(url, video_id, webpage)
        if camtasia_res is not None:
            return camtasia_res

        # Sometimes embedded video player is hidden behind percent encoding
        # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
        # Unescaping the whole page allows to handle those cases in a generic way
        webpage = compat_urllib_parse_unquote(webpage)

        # it's tempting to parse this further, but you would
        # have to take into account all the variations like
        #   Video Title - Site Name
        #   Site Name | Video Title
        #   Video Title - Tagline | Site Name
        # and so on and so forth; it's just not practical
        video_title = self._og_search_title(
            webpage, default=None) or self._html_search_regex(
            r'(?s)<title>(.*?)</title>', webpage, 'video title',
            default='video')

        # Try to detect age limit automatically
        age_limit = self._rta_search(webpage)
        # And then there are the jokers who advertise that they use RTA,
        # but actually don't.
        AGE_LIMIT_MARKERS = [
            r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
        ]
        if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
            age_limit = 18

        # video uploader is domain name
        video_uploader = self._search_regex(
            r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')

        video_description = self._og_search_description(webpage, default=None)
        video_thumbnail = self._og_search_thumbnail(webpage, default=None)

        # Helper method
        def _playlist_from_matches(matches, getter=None, ie=None):
            urlrs = orderedSet(
                self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
                for m in matches)
            return self.playlist_result(
                urlrs, playlist_id=video_id, playlist_title=video_title)

        # Look for Brightcove Legacy Studio embeds
        bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
        if bc_urls:
            self.to_screen('Brightcove video detected.')
            entries = [{
                '_type': 'url',
                'url': smuggle_url(bc_url, {'Referer': url}),
                'ie_key': 'BrightcoveLegacy'
            } for bc_url in bc_urls]

            return {
                '_type': 'playlist',
                'title': video_title,
                'id': video_id,
                'entries': entries,
            }

        # Look for Brightcove New Studio embeds
        bc_urls = BrightcoveNewIE._extract_urls(webpage)
        if bc_urls:
            return _playlist_from_matches(bc_urls, ie='BrightcoveNew')

        # Look for ThePlatform embeds
        tp_urls = ThePlatformIE._extract_urls(webpage)
        if tp_urls:
            return _playlist_from_matches(tp_urls, ie='ThePlatform')

        # Look for Vessel embeds
        vessel_urls = VesselIE._extract_urls(webpage)
        if vessel_urls:
            return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())

        # Look for embedded rtl.nl player
        matches = re.findall(
            r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
            webpage)
        if matches:
            return _playlist_from_matches(matches, ie='RtlNl')

        vimeo_urls = VimeoIE._extract_urls(url, webpage)
        if vimeo_urls:
            return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())

        vid_me_embed_url = self._search_regex(
            r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
            webpage, 'vid.me embed', default=None)
        if vid_me_embed_url is not None:
            return self.url_result(vid_me_embed_url, 'Vidme')

        # Look for embedded YouTube player
        matches = re.findall(r'''(?x)
            (?:
                <iframe[^>]+?src=|
                data-video-url=|
                <embed[^>]+?src=|
                embedSWF\(?:\s*|
                new\s+SWFObject\(
            )
            (["\'])
                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
                (?:embed|v|p)/.+?)
            \1''', webpage)
        if matches:
            return _playlist_from_matches(
                matches, lambda m: unescapeHTML(m[1]))

        # Look for lazyYT YouTube embed
        matches = re.findall(
            r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
        if matches:
            return _playlist_from_matches(matches, lambda m: unescapeHTML(m))

        # Look for Wordpress "YouTube Video Importer" plugin
        matches = re.findall(r'''(?x)<div[^>]+
            class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
            data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
        if matches:
            return _playlist_from_matches(matches, lambda m: m[-1])

        matches = DailymotionIE._extract_urls(webpage)
        if matches:
            return _playlist_from_matches(matches)

        # Look for embedded Dailymotion playlist player (#3822)
        m = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
        if m:
            playlists = re.findall(
                r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
            if playlists:
                return _playlist_from_matches(
                    playlists, lambda p: '//dailymotion.com/playlist/%s' % p)

        # Look for embedded Wistia player
        match = re.search(
            r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
        if match:
            embed_url = self._proto_relative_url(
                unescapeHTML(match.group('url')))
            return {
                '_type': 'url_transparent',
                'url': embed_url,
                'ie_key': 'Wistia',
                'uploader': video_uploader,
            }

        match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
        if match:
            return {
                '_type': 'url_transparent',
                'url': 'wistia:%s' % match.group('id'),
                'ie_key': 'Wistia',
                'uploader': video_uploader,
            }

        match = re.search(
            r'''(?sx)
                <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
                <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
            ''', webpage)
        if match:
            return self.url_result(self._proto_relative_url(
                'wistia:%s' % match.group('id')), 'Wistia')

        # Look for SVT player
        svt_url = SVTIE._extract_url(webpage)
        if svt_url:
            return self.url_result(svt_url, 'SVT')

        # Look for embedded condenast player
        matches = re.findall(
            r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
            webpage)
        if matches:
            return {
                '_type': 'playlist',
                'entries': [{
                    '_type': 'url',
                    'ie_key': 'CondeNast',
                    'url': ma,
                } for ma in matches],
                'title': video_title,
                'id': video_id,
            }

        # Look for Bandcamp pages with custom domain
        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
        if mobj is not None:
            burl = unescapeHTML(mobj.group(1))
            # Don't set the extractor because it can be a track url or an album
            return self.url_result(burl)

        # Look for embedded Vevo player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for embedded Viddler player
        mobj = re.search(
            r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for NYTimes player
        mobj = re.search(
            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for Libsyn player
        mobj = re.search(
            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for Ooyala videos
        mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
                re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
                re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
                re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
        if mobj is not None:
            return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))

        # Look for multiple Ooyala embeds on SBN network websites
        mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
        if mobj is not None:
            embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
            if embeds:
                return _playlist_from_matches(
                    embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')

        # Look for Aparat videos
        mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
        if mobj is not None:
            return self.url_result(mobj.group(1), 'Aparat')

        # Look for MPORA videos
        mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
        if mobj is not None:
            return self.url_result(mobj.group(1), 'Mpora')

        # Look for embedded NovaMov-based player
        mobj = re.search(
            r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
                    (?P<url>http://(?:(?:embed|www)\.)?
                        (?:novamov\.com|
                           nowvideo\.(?:ch|sx|eu|at|ag|co)|
                           videoweed\.(?:es|com)|
                           movshare\.(?:net|sx|ag)|
                           divxstage\.(?:eu|net|ch|co|at|ag))
                        /embed\.php.+?)\1''', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for embedded Facebook player
        facebook_url = FacebookIE._extract_url(webpage)
        if facebook_url is not None:
            return self.url_result(facebook_url, 'Facebook')

        # Look for embedded VK player
        mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'VK')

        # Look for embedded Odnoklassniki player
        mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Odnoklassniki')

        # Look for embedded ivi player
        mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Ivi')

        # Look for embedded Huffington Post player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'HuffPost')

        # Look for embed.ly
        mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))
        mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
        if mobj is not None:
            return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))

        # Look for funnyordie embed
        matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
        if matches:
            return _playlist_from_matches(
                matches, getter=unescapeHTML, ie='FunnyOrDie')

        # Look for BBC iPlayer embed
        matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
        if matches:
            return _playlist_from_matches(matches, ie='BBCCoUk')

        # Look for embedded RUTV player
        rutv_url = RUTVIE._extract_url(webpage)
        if rutv_url:
            return self.url_result(rutv_url, 'RUTV')

        # Look for embedded TVC player
        tvc_url = TVCIE._extract_url(webpage)
        if tvc_url:
            return self.url_result(tvc_url, 'TVC')

        # Look for embedded SportBox player
        sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
        if sportbox_urls:
            return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')

        # Look for embedded PornHub player
        pornhub_url = PornHubIE._extract_url(webpage)
        if pornhub_url:
            return self.url_result(pornhub_url, 'PornHub')

        # Look for embedded XHamster player
        xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
        if xhamster_urls:
            return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')

        # Look for embedded TNAFlixNetwork player
        tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
        if tnaflix_urls:
            return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())

        # Look for embedded DrTuber player
        drtuber_urls = DrTuberIE._extract_urls(webpage)
        if drtuber_urls:
            return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())

        # Look for embedded Tvigle player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Tvigle')

        # Look for embedded TED player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'TED')

        # Look for embedded Ustream videos
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Ustream')

        # Look for embedded arte.tv player
        mobj = re.search(
            r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'ArteTVEmbed')

        # Look for embedded francetv player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for embedded smotri.com player
        smotri_url = SmotriIE._extract_url(webpage)
        if smotri_url:
            return self.url_result(smotri_url, 'Smotri')

        # Look for embedded Myvi.ru player
        myvi_url = MyviIE._extract_url(webpage)
        if myvi_url:
            return self.url_result(myvi_url)

        # Look for embedded soundcloud player
        soundcloud_urls = SoundcloudIE._extract_urls(webpage)
        if soundcloud_urls:
            return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())

        # Look for embedded mtvservices player
        mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
        if mtvservices_url:
            return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')

        # Look for embedded yahoo player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Yahoo')

        # Look for embedded sbs.com.au player
        mobj = re.search(
            r'''(?x)
            (?:
                <meta\s+property="og:video"\s+content=|
                <iframe[^>]+?src=
            )
            (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'SBS')

        # Look for embedded Cinchcast player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Cinchcast')

        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
            webpage)
        if not mobj:
            mobj = re.search(
                r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
                webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'MLB')

        mobj = re.search(
            r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
            webpage)
        if mobj is not None:
            return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')

        mobj = re.search(
            r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Livestream')

        # Look for Zapiks embed
        mobj = re.search(
            r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Zapiks')

        # Look for Kaltura embeds
        kaltura_url = KalturaIE._extract_url(webpage)
        if kaltura_url:
            return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())

        # Look for Eagle.Platform embeds
        eagleplatform_url = EaglePlatformIE._extract_url(webpage)
        if eagleplatform_url:
            return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())

        # Look for ClipYou (uses Eagle.Platform) embeds
        mobj = re.search(
            r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
        if mobj is not None:
            return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')

        # Look for Pladform embeds
        pladform_url = PladformIE._extract_url(webpage)
        if pladform_url:
            return self.url_result(pladform_url)

        # Look for Videomore embeds
        videomore_url = VideomoreIE._extract_url(webpage)
        if videomore_url:
            return self.url_result(videomore_url)

        # Look for Playwire embeds
        mobj = re.search(
            r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for 5min embeds
        mobj = re.search(
            r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
        if mobj is not None:
            return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')

        # Look for Crooks and Liars embeds
        mobj = re.search(
            r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for NBC Sports VPlayer embeds
        nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
        if nbc_sports_url:
            return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')

        # Look for NBC News embeds
        nbc_news_embed_url = re.search(
            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
        if nbc_news_embed_url:
            return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')

        # Look for Google Drive embeds
        google_drive_url = GoogleDriveIE._extract_url(webpage)
        if google_drive_url:
            return self.url_result(google_drive_url, 'GoogleDrive')

        # Look for UDN embeds
        mobj = re.search(
            r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
        if mobj is not None:
            return self.url_result(
                compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')

        # Look for Senate ISVP iframe
        senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
        if senate_isvp_url:
            return self.url_result(senate_isvp_url, 'SenateISVP')

        # Look for Dailymotion Cloud videos
        dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
        if dmcloud_url:
            return self.url_result(dmcloud_url, 'DailymotionCloud')

        # Look for OnionStudios embeds
        onionstudios_url = OnionStudiosIE._extract_url(webpage)
        if onionstudios_url:
            return self.url_result(onionstudios_url)

        # Look for ViewLift embeds
        viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
        if viewlift_url:
            return self.url_result(viewlift_url)

        # Look for JWPlatform embeds
        jwplatform_url = JWPlatformIE._extract_url(webpage)
        if jwplatform_url:
            return self.url_result(jwplatform_url, 'JWPlatform')

        # Look for ScreenwaveMedia embeds
        mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
        if mobj is not None:
            return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')

        # Look for Digiteka embeds
        digiteka_url = DigitekaIE._extract_url(webpage)
        if digiteka_url:
            return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())

        # Look for Arkena embeds
        arkena_url = ArkenaIE._extract_url(webpage)
        if arkena_url:
            return self.url_result(arkena_url, ArkenaIE.ie_key())

        # Look for Limelight embeds
        mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
        if mobj:
            lm = {
                'Media': 'media',
                'Channel': 'channel',
                'ChannelList': 'channel_list',
            }
            return self.url_result('limelight:%s:%s' % (
                lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))

        # Look for AdobeTVVideo embeds
        mobj = re.search(
            r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
            webpage)
        if mobj is not None:
            return self.url_result(
                self._proto_relative_url(unescapeHTML(mobj.group(1))),
                'AdobeTVVideo')

        # Look for Vine embeds
        mobj = re.search(
            r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
            webpage)
        if mobj is not None:
            return self.url_result(
                self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')

        # Look for VODPlatform embeds
        mobj = re.search(
            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
            webpage)
        if mobj is not None:
            return self.url_result(
                self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')

        # Look for Mangomolo embeds
        mobj = re.search(
            r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
                (?:
                    video\?.*?\bid=(?P<video_id>\d+)|
                    index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
                ).+?)\1''', webpage)
        if mobj is not None:
            info = {
                '_type': 'url_transparent',
                'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
                'title': video_title,
                'description': video_description,
                'thumbnail': video_thumbnail,
                'uploader': video_uploader,
            }
            video_id = mobj.group('video_id')
            if video_id:
                info.update({
                    'ie_key': 'MangomoloVideo',
                    'id': video_id,
                })
            else:
                info.update({
                    'ie_key': 'MangomoloLive',
                    'id': mobj.group('channel_id'),
                })
            return info

        # Look for Instagram embeds
        instagram_embed_url = InstagramIE._extract_embed_url(webpage)
        if instagram_embed_url is not None:
            return self.url_result(
                self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())

        # Look for LiveLeak embeds
        liveleak_url = LiveLeakIE._extract_url(webpage)
        if liveleak_url:
            return self.url_result(liveleak_url, 'LiveLeak')

        # Look for 3Q SDN embeds
        threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
        if threeqsdn_url:
            return {
                '_type': 'url_transparent',
                'ie_key': ThreeQSDNIE.ie_key(),
                'url': self._proto_relative_url(threeqsdn_url),
                'title': video_title,
                'description': video_description,
                'thumbnail': video_thumbnail,
                'uploader': video_uploader,
            }

        # Look for VBOX7 embeds
        vbox7_url = Vbox7IE._extract_url(webpage)
        if vbox7_url:
            return self.url_result(vbox7_url, Vbox7IE.ie_key())

        # Look for DBTV embeds
        dbtv_urls = DBTVIE._extract_urls(webpage)
        if dbtv_urls:
            return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())

        # Looking for http://schema.org/VideoObject
        json_ld = self._search_json_ld(
            webpage, video_id, default={}, expected_type='VideoObject')
        if json_ld.get('url'):
            info_dict.update({
                'title': video_title or info_dict['title'],
                'description': video_description,
                'thumbnail': video_thumbnail,
                'age_limit': age_limit
            })
            info_dict.update(json_ld)
            return info_dict

        # Look for HTML5 media
        entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
        if entries:
            for entry in entries:
                entry.update({
                    'id': video_id,
                    'title': video_title,
                })
                self._sort_formats(entry['formats'])
            return self.playlist_result(entries)

        def check_video(vurl):
            if YoutubeIE.suitable(vurl):
                return True
            vpath = compat_urlparse.urlparse(vurl).path
            vext = determine_ext(vpath)
            return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')

        def filter_video(urls):
            return list(filter(check_video, urls))

        # Start with something easy: JW Player in SWFObject
        found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
        if not found:
            # Look for gorilla-vid style embedding
            found = filter_video(re.findall(r'''(?sx)
                (?:
                    jw_plugins|
                    JWPlayerOptions|
                    jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
                )
                .*?
                ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
        if not found:
            # Broaden the search a little bit
            found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
        if not found:
            # Broaden the findall a little bit: JWPlayer JS loader
            found = filter_video(re.findall(
                r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
        if not found:
            # Flow player
            found = filter_video(re.findall(r'''(?xs)
                flowplayer\("[^"]+",\s*
                    \{[^}]+?\}\s*,
                    \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
                        ["']?url["']?\s*:\s*["']([^"']+)["']
            ''', webpage))
        if not found:
            # Cinerama player
            found = re.findall(
                r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
        if not found:
            # Try to find twitter cards info
            # twitter:player:stream should be checked before twitter:player since
            # it is expected to contain a raw stream (see
            # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
            found = filter_video(re.findall(
                r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
        if not found:
            # We look for Open Graph info:
            # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
            m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
            if m_video_type is not None:
                found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
        if not found:
            REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
            found = re.search(
                r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
                r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
                webpage)
            if not found:
                # Look also in Refresh HTTP header
                refresh_header = head_response.headers.get('Refresh')
                if refresh_header:
                    # In python 2 response HTTP headers are bytestrings
                    if sys.version_info < (3, 0) and isinstance(refresh_header, str):
                        refresh_header = refresh_header.decode('iso-8859-1')
                    found = re.search(REDIRECT_REGEX, refresh_header)
            if found:
                new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
                self.report_following_redirect(new_url)
                return {
                    '_type': 'url',
                    'url': new_url,
                }

        if not found:
            # twitter:player is a https URL to iframe player that may or may not
            # be supported by youtube-dl thus this is checked the very last (see
            # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
            embed_url = self._html_search_meta('twitter:player', webpage, default=None)
            if embed_url:
                return self.url_result(embed_url)

        if not found:
            raise UnsupportedError(url)

        entries = []
        for video_url in orderedSet(found):
            video_url = unescapeHTML(video_url)
            video_url = video_url.replace('\\/', '/')
            video_url = compat_urlparse.urljoin(url, video_url)
            video_id = compat_urllib_parse_unquote(os.path.basename(video_url))

            # Sometimes, jwplayer extraction will result in a YouTube URL
            if YoutubeIE.suitable(video_url):
                entries.append(self.url_result(video_url, 'Youtube'))
                continue

            # here's a fun little line of code for you:
            video_id = os.path.splitext(video_id)[0]

            entry_info_dict = {
                'id': video_id,
                'uploader': video_uploader,
                'title': video_title,
                'age_limit': age_limit,
            }

            ext = determine_ext(video_url)
            if ext == 'smil':
                entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
            elif ext == 'xspf':
                return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
            elif ext == 'm3u8':
                entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
            elif ext == 'mpd':
                entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
            elif ext == 'f4m':
                entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
            elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
                # Just matching .ism/manifest is not enough to be reliably sure
                # whether it's actually an ISM manifest or some other streaming
                # manifest since there are various streaming URL formats
                # possible (see [1]) as well as some other shenanigans like
                # .smil/manifest URLs that actually serve an ISM (see [2]) and
                # so on.
                # Thus the most reasonable way to solve this is to delegate
                # to generic extractor in order to look into the contents of
                # the manifest itself.
                # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
                # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
                entry_info_dict = self.url_result(
                    smuggle_url(video_url, {'to_generic': True}),
                    GenericIE.ie_key())
            else:
                entry_info_dict['url'] = video_url

            if entry_info_dict.get('formats'):
                self._sort_formats(entry_info_dict['formats'])

            entries.append(entry_info_dict)

        if len(entries) == 1:
            return entries[0]
        else:
            for num, e in enumerate(entries, start=1):
                # 'url' results don't have a title
                if e.get('title') is not None:
                    e['title'] = '%s (%d)' % (e['title'], num)
            return {
                '_type': 'playlist',
                'entries': entries,
            }
-												Unify coding cookie

											
										
										
											2016-10-02 11:39:18 +00:00
+								# coding: utf-8
-												GenericIE: Detect videos from Brightcove

Brightcove videos info is usually found in an <object class="BrightcoveExperience"></object> node, this is passed to a new method of BrightcoveIE that builds a url to extract the video.

											
										
										
											2013-07-10 15:49:11 +00:00
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								from __future__ import unicode_literals
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
+								import os
 								import re
-												[extractor/generic] Fix following redirect in Refresh HTTP header on python 2

											
										
										
											2015-09-20 05:16:12 +00:00
+								import sys
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
 								from .common import InfoExtractor
-												[youtube] Support jwplayer with YouTube URLs (Closes #2075)

											
										
										
											2014-01-06 00:42:58 +00:00
+								from .youtube import YoutubeIE
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											2014-11-02 10:23:40 +00:00
+								from ..compat import (
-												[compat] compat_etree_fromstring: also decode the text attribute

Deletes parse_xml from utils, because it also does it.

											
										
										
											2015-10-26 15:41:24 +00:00
+								    compat_etree_fromstring,
-												[extractor/generic] Use compat_urllib_parse_unquote for unquoting video_id and title from URL

											
										
										
											2015-05-30 19:23:58 +00:00
+								    compat_urllib_parse_unquote,
-												[generic] simply use urljoin

											
										
										
											2013-08-28 10:47:27 +00:00
+								    compat_urlparse,
-												[generic] Fix on python 2.6

`ParseError` is not available, it raises `xml.parsers.expat.ExpatError`.
The webpage needs to be encoded.

											
										
										
											2014-02-21 15:59:10 +00:00
+								    compat_xml_parse_error,
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											2014-11-02 10:23:40 +00:00
+								)
 								from ..utils import (
-												fix up imports

											
										
										
											2014-08-27 23:00:59 +00:00
+								    determine_ext,
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
+								    ExtractorError,
-												[generic] Add support for camtasia videos (Fixes #3574)

											
										
										
											2014-08-24 00:02:17 +00:00
+								    float_or_none,
-												[aparat] Add support (Fixes #2012)

											
										
										
											2013-12-20 16:05:28 +00:00
+								    HEADRequest,
-												[generic] Add support for BOMs (Fixes #4753)

											
										
										
											2015-01-23 00:21:30 +00:00
+								    is_html,
-												[generic] Simplify playlist support (#2948)

											
										
										
											2014-08-22 16:19:56 +00:00
+								    orderedSet,
-												Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8

											
										
										
											2015-11-21 16:18:17 +00:00
+								    sanitized_Request,
-												[generic] Support embedded vimeo videos (#1602)

											
										
										
											2013-10-15 10:05:13 +00:00
+								    smuggle_url,
 								    unescapeHTML,
-												Add support for direct links to a video (#1973)

											
										
										
											2013-12-17 11:33:55 +00:00
+								    unified_strdate,
-												[ministrygrid] Add extractor (Fixes #2900)

											
										
										
											2014-08-24 02:47:18 +00:00
+								    unsmuggle_url,
-												Add documentation about supported sites (Fixes #4503)

											
										
										
											2014-12-30 18:35:35 +00:00
+								    UnsupportedError,
-												[generic] Parse RSS enclosure URLs (Fixes #5091)

											
										
										
											2015-03-02 14:21:11 +00:00
+								    xpath_text,
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
+								)
-												[brightcove] add support for brightcove in page embed(fixes #6824)

											
										
										
											2015-09-11 03:46:21 +00:00
+								from .brightcove import (
-												[brightcove] Rename extractor to brightcove legacy

Old embedding approaches are now "Legacy Studio"

											
										
										
											2015-11-13 23:54:16 +00:00
+								    BrightcoveLegacyIE,
-												[brightcove:embedinpage] Rename extractor to brightcove new

It's not actually embed_in_page but "New Studio" and allows both iframe and embed_in_page embeds

											
										
										
											2015-11-13 23:55:59 +00:00
+								    BrightcoveNewIE,
-												[brightcove] add support for brightcove in page embed(fixes #6824)

											
										
										
											2015-09-11 03:46:21 +00:00
+								)
-												[NBC/ThePlatform/Generic] Add a generic detector for NBCSportsVPlayer and enhance error detection in ThePlatformIE

											
										
										
											2015-03-30 19:36:09 +00:00
+								from .nbc import NBCSportsVPlayerIE
-												[generic] Detect ooyala videos (fixes #2013)

											
										
										
											2013-12-19 19:28:52 +00:00
+								from .ooyala import OoyalaIE
-												[generic] Add support for embedded rutv player

											
										
										
											2014-03-16 19:00:31 +00:00
+								from .rutv import RUTVIE
-												[tvc] Refactor extractor names

											
										
										
											2015-06-12 10:24:13 +00:00
+								from .tvc import TVCIE
-												[generic] Add support for sportbox embeds

											
										
										
											2015-05-15 17:09:34 +00:00
+								from .sportbox import SportBoxEmbedIE
-												[smotri] Modernize and add support for emdebbed videos (Closes #2585)

											
										
										
											2014-03-28 12:58:49 +00:00
+								from .smotri import SmotriIE
-												[myvi:embed] Rename to myvi

											
										
										
											2015-07-09 18:27:44 +00:00
+								from .myvi import MyviIE
-												[condenast] Add support for embedded videos (Closes #3929)

											
										
										
											2014-10-13 12:59:35 +00:00
+								from .condenast import CondeNastIE
-												[udn] Add new extractor

											
										
										
											2015-04-08 09:26:51 +00:00
+								from .udn import UDNEmbedIE
-												[CSpan] Add detection for Senate ISVP. Closes #5302

											
										
										
											2015-04-20 19:18:38 +00:00
+								from .senateisvp import SenateISVPIE
-												[extractor/generic] Add support for svt embeds (Closes #5622)

											
										
										
											2015-05-08 18:23:35 +00:00
+								from .svt import SVTIE
-												[extractor/generic] Add support for pornhub embeds

											
										
										
											2015-06-12 21:36:16 +00:00
+								from .pornhub import PornHubIE
-												[generic] Add support for xhamster embeds

											
										
										
											2015-06-21 17:11:25 +00:00
+								from .xhamster import XHamsterEmbedIE
-												[extractor/generic] Add support for tnaflix network embeds (Closes #7505)

											
										
										
											2016-02-27 11:15:49 +00:00
+								from .tnaflix import TNAFlixNetworkEmbedIE
-												[extractor/generic] Add support for drtuber embds (closes #11098)

											
										
										
											2016-11-06 14:33:51 +00:00
+								from .drtuber import DrTuberIE
-												[vimeo/generic] Move detection logic from GenericIE to VimeoIE

											
										
										
											2015-06-21 10:23:58 +00:00
+								from .vimeo import VimeoIE
-												[francetv] Recognize more Dailymotion embedded videos

Closes #9955

											
										
										
											2016-07-06 15:37:54 +00:00
+								from .dailymotion import (
 								    DailymotionIE,
 								    DailymotionCloudIE,
 								)
-												[extractor/generic] Add support for OnionStudios embeds (Closes #5841)

											
										
										
											2015-06-24 17:19:50 +00:00
+								from .onionstudios import OnionStudiosIE
-												[viewlift] replace SnagFilms extractors

- add support for other sites that use the same logic
- improve format extraction and sorting

											
										
										
											2016-04-29 10:14:42 +00:00
+								from .viewlift import ViewLiftEmbedIE
-												[generic] Use screenwavemedia embed pattern

											
										
										
											2015-08-29 13:07:31 +00:00
+								from .screenwavemedia import ScreenwaveMediaIE
-												[extractor/generic] Use _extract_url for mtvservices

											
										
										
											2015-09-26 13:47:20 +00:00
+								from .mtv import MTVServicesEmbeddedIE
-												[extractor/generic] Use _extract_url for pladform

											
										
										
											2015-12-07 16:03:21 +00:00
+								from .pladform import PladformIE
-												[extractor/generic] Add support for videomore embeds

											
										
										
											2015-12-29 17:58:23 +00:00
+								from .videomore import VideomoreIE
-												[googledrive] Modernize

											
										
										
											2015-12-21 02:05:34 +00:00
+								from .googledrive import GoogleDriveIE
-												[makertv] improve extraction

											
										
										
											2015-12-21 03:24:58 +00:00
+								from .jwplatform import JWPlatformIE
-												[ultimedia] Rename to digiteka

											
										
										
											2016-01-19 15:51:46 +00:00
+								from .digiteka import DigitekaIE
-												[extractor/generic] Add support for arkena embeds

											
										
										
											2016-07-23 10:56:48 +00:00
+								from .arkena import ArkenaIE
-												[generic] Extract Instagram embeds (#8817)

											
										
										
											2016-03-24 08:32:27 +00:00
+								from .instagram import InstagramIE
-												[generic] Add support for LiveLeak embeds

											
										
										
											2016-03-31 18:42:55 +00:00
+								from .liveleak import LiveLeakIE
-												[extractor/generic:3qsdn] Add support for embeds

											
										
										
											2016-05-14 17:40:34 +00:00
+								from .threeqsdn import ThreeQSDNIE
-												[extractor/generic] Add support for theplatform embeds (Closes #8636, closes #9476)

											
										
										
											2016-05-22 00:52:39 +00:00
+								from .theplatform import ThePlatformIE
-												[extractor/generic] Add support vessel embeds (Closes #7083)

											
										
										
											2016-06-08 21:02:27 +00:00
+								from .vessel import VesselIE
-												[extractor/generic] Use _extract_url for kaltura embeds (Closes #9922)

											
										
										
											2016-06-27 15:45:26 +00:00
+								from .kaltura import KalturaIE
-												[eagleplatform] Improve embed detection and extract in separate routine (Closes #9926)

											
										
										
											2016-06-29 16:01:34 +00:00
+								from .eagleplatform import EaglePlatformIE
-												[facebook] Improve Facebook embedded detection

Related to #9938.

Another example comes from 9834872bf63b4e03b66c5e3b8f306556e735d8c5.

											
										
										
											2016-07-02 13:33:23 +00:00
+								from .facebook import FacebookIE
-												[extractor/generic] Extract all soundcloud embeds (Closes #10179)

											
										
										
											2016-07-28 15:15:15 +00:00
+								from .soundcloud import SoundcloudIE
-												[vbox7:generic] Add support for vbox7 embeds

											
										
										
											2016-08-16 18:02:59 +00:00
+								from .vbox7 import Vbox7IE
-												[DBTV:generic] Add support for embeds

											
										
										
											2016-08-17 10:45:24 +00:00
+								from .dbtv import DBTVIE
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
-												[generic] Support double slash URLs (Fixes #1309)

											
										
										
											2013-08-24 20:49:52 +00:00
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
+								class GenericIE(InfoExtractor):
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								    IE_DESC = 'Generic downloader that works on some sites'
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
+								    _VALID_URL = r'.*'
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								    IE_NAME = 'generic'
-												GenericIE: Detect videos from Brightcove

Brightcove videos info is usually found in an <object class="BrightcoveExperience"></object> node, this is passed to a new method of BrightcoveIE that builds a url to extract the video.

											
										
										
											2013-07-10 15:49:11 +00:00
+								    _TESTS = [
-												[extractor/generic] Put all direct link tests near to each other for better navigation

											
										
										
											2015-05-30 20:22:29 +00:00
+								        # Direct link to a video
 								        {
 								            'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
 								            'md5': '67d406c2bcb6af27fa886f31aa934bbe',
 								            'info_dict': {
 								                'id': 'trailer',
 								                'ext': 'mp4',
 								                'title': 'trailer',
 								                'upload_date': '20100513',
 								            }
 								        },
-												[extractor/generic] Clarify test comment

											
										
										
											2015-05-30 20:36:20 +00:00
+								        # Direct link to media delivered compressed (until Accept-Encoding is *)
-												[extractor/generic] Put all direct link tests near to each other for better navigation

											
										
										
											2015-05-30 20:22:29 +00:00
+								        {
 								            'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
 								            'md5': '128c42e68b13950268b648275386fc74',
 								            'info_dict': {
 								                'id': 'FictionJunction-Parallel_Hearts',
 								                'ext': 'flac',
 								                'title': 'FictionJunction-Parallel_Hearts',
 								                'upload_date': '20140522',
 								            },
 								            'expected_warnings': [
 								                'URL could be a direct video link, returning it as such.'
-												[generic] Update some _TESTS

											
										
										
											2016-08-28 07:46:11 +00:00
+								            ],
 								            'skip': 'URL invalid',
-												[extractor/generic] Put all direct link tests near to each other for better navigation

											
										
										
											2015-05-30 20:22:29 +00:00
+								        },
 								        # Direct download with broken HEAD
 								        {
 								            'url': 'http://ai-radio.org:8000/radio.opus',
 								            'info_dict': {
 								                'id': 'radio',
 								                'ext': 'opus',
 								                'title': 'radio',
 								            },
 								            'params': {
 								                'skip_download': True,  # infinite live stream
 								            },
 								            'expected_warnings': [
-												[generic] Fix test_Generic_2

Now a HEAD request returns 400 Bad Request

											
										
										
											2016-04-16 11:44:12 +00:00
+								                r'501.*Not Implemented',
 								                r'400.*Bad Request',
-												[extractor/generic] Put all direct link tests near to each other for better navigation

											
										
										
											2015-05-30 20:22:29 +00:00
+								            ],
 								        },
 								        # Direct link with incorrect MIME type
 								        {
 								            'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 								            'md5': '4ccbebe5f36706d85221f204d7eb5913',
 								            'info_dict': {
 								                'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 								                'id': '5_Lennart_Poettering_-_Systemd',
 								                'ext': 'webm',
 								                'title': '5_Lennart_Poettering_-_Systemd',
 								                'upload_date': '20141120',
 								            },
 								            'expected_warnings': [
 								                'URL could be a direct video link, returning it as such.'
 								            ]
 								        },
 								        # RSS feed
 								        {
 								            'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 								            'info_dict': {
 								                'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 								                'title': 'Zero Punctuation',
 								                'description': 're:.*groundbreaking video review series.*'
 								            },
 								            'playlist_mincount': 11,
 								        },
 								        # RSS feed with enclosure
 								        {
 								            'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 								            'info_dict': {
 								                'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 								                'ext': 'm4v',
 								                'upload_date': '20150228',
 								                'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 								            }
 								        },
-												[extractor/generic] Add generic SMIL tests

											
										
										
											2015-08-01 19:16:21 +00:00
+								        # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
 								        {
 								            'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
 								            'info_dict': {
 								                'id': 'smil',
 								                'ext': 'mp4',
 								                'title': 'Automatics, robotics and biocybernetics',
 								                'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
-												[generic] Update test

											
										
										
											2015-10-31 17:05:30 +00:00
+								                'upload_date': '20130627',
-												[extractor/generic] Add generic SMIL tests

											
										
										
											2015-08-01 19:16:21 +00:00
+								                'formats': 'mincount:16',
 								                'subtitles': 'mincount:1',
 								            },
 								            'params': {
 								                'force_generic_extractor': True,
 								                'skip_download': True,
 								            },
 								        },
 								        # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
 								        {
 								            'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
 								            'info_dict': {
 								                'id': 'hds',
 								                'ext': 'flv',
 								                'title': 'hds',
 								                'formats': 'mincount:1',
 								            },
 								            'params': {
 								                'skip_download': True,
 								            },
 								        },
 								        # SMIL from https://www.restudy.dk/video/play/id/1637
 								        {
 								            'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
 								            'info_dict': {
 								                'id': 'video_1637',
 								                'ext': 'flv',
 								                'title': 'video_1637',
 								                'formats': 'mincount:3',
 								            },
 								            'params': {
 								                'skip_download': True,
 								            },
 								        },
 								        # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
 								        {
 								            'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
 								            'info_dict': {
 								                'id': 'smil-service',
 								                'ext': 'flv',
 								                'title': 'smil-service',
 								                'formats': 'mincount:1',
 								            },
 								            'params': {
 								                'skip_download': True,
 								            },
 								        },
 								        # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
 								        {
 								            'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
 								            'info_dict': {
 								                'id': '4719370',
 								                'ext': 'mp4',
 								                'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
 								                'formats': 'mincount:3',
 								            },
 								            'params': {
 								                'skip_download': True,
 								            },
 								        },
-												[extractor/generic] Add test for xspf playlist

											
										
										
											2015-08-09 13:47:08 +00:00
+								        # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
 								        {
 								            'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
 								            'info_dict': {
 								                'id': 'mZlp2ctYIUEB',
 								                'ext': 'mp4',
 								                'title': 'Tikibad ontruimd wegens brand',
 								                'description': 'md5:05ca046ff47b931f9b04855015e163a4',
 								                'thumbnail': 're:^https?://.*\.jpg$',
 								                'duration': 33,
 								            },
 								            'params': {
 								                'skip_download': True,
 								            },
 								        },
-												[extractor/generic] Add direct mpd url test

											
										
										
											2016-02-12 18:36:47 +00:00
+								        # MPD from http://dash-mse-test.appspot.com/media.html
 								        {
 								            'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
 								            'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
 								            'info_dict': {
 								                'id': 'car-20120827-manifest',
 								                'ext': 'mp4',
 								                'title': 'car-20120827-manifest',
 								                'formats': 'mincount:9',
-												[ThePlatform] Fix tests failed since 79ba9140dc8fcf5883b7473596e8f20cba6b479f

											
										
										
											2016-04-24 12:44:52 +00:00
+								                'upload_date': '20130904',
-												[extractor/generic] Add direct mpd url test

											
										
										
											2016-02-12 18:36:47 +00:00
+								            },
 								            'params': {
 								                'format': 'bestvideo',
 								            },
 								        },
-												[extractor/generic] Add another test for generic m3u8

											
										
										
											2016-03-18 15:54:33 +00:00
+								        # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
 								        {
 								            'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
 								            'info_dict': {
 								                'id': 'content',
 								                'ext': 'mp4',
 								                'title': 'content',
 								                'formats': 'mincount:8',
 								            },
 								            'params': {
 								                # m3u8 downloads
 								                'skip_download': True,
-												[generic] Update some _TESTS

											
										
										
											2016-08-28 07:46:11 +00:00
+								            },
 								            'skip': 'video gone',
-												[extractor/generic] Add another test for generic m3u8

											
										
										
											2016-03-18 15:54:33 +00:00
+								        },
-												[extractor/generic] Add a test for m3u playlist served without proper Content-Type

											
										
										
											2016-03-18 16:49:11 +00:00
+								        # m3u8 served with Content-Type: text/plain
 								        {
 								            'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
 								            'info_dict': {
 								                'id': 'index',
 								                'ext': 'mp4',
 								                'title': 'index',
 								                'upload_date': '20140720',
 								                'formats': 'mincount:11',
 								            },
 								            'params': {
 								                # m3u8 downloads
 								                'skip_download': True,
-												[generic] Update some _TESTS

											
										
										
											2016-08-28 07:46:11 +00:00
+								            },
 								            'skip': 'video gone',
-												[extractor/generic] Add a test for m3u playlist served without proper Content-Type

											
										
										
											2016-03-18 16:49:11 +00:00
+								        },
-												[extractor/generic] Put all direct link tests near to each other for better navigation

											
										
										
											2015-05-30 20:22:29 +00:00
+								        # google redirect
 								        {
 								            'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 								            'info_dict': {
 								                'id': 'cmQHVoWB5FY',
 								                'ext': 'mp4',
 								                'upload_date': '20130224',
 								                'uploader_id': 'TheVerge',
 								                'description': 're:^Chris Ziegler takes a look at the\.*',
 								                'uploader': 'The Verge',
 								                'title': 'First Firefox OS phones side-by-side',
 								            },
 								            'params': {
 								                'skip_download': False,
 								            }
 								        },
-												[extractor/generic] Fix following redirect in Refresh HTTP header on python 2

											
										
										
											2015-09-20 05:16:12 +00:00
+								        {
 								            # redirect in Refresh HTTP header
 								            'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
 								            'info_dict': {
 								                'id': 'pO8h3EaFRdo',
 								                'ext': 'mp4',
 								                'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
 								                'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
 								                'upload_date': '20150917',
 								                'uploader_id': 'brtvofficial',
 								                'uploader': 'Boiler Room',
 								            },
 								            'params': {
 								                'skip_download': False,
 								            },
 								        },
-												GenericIE: Detect videos from Brightcove

Brightcove videos info is usually found in an <object class="BrightcoveExperience"></object> node, this is passed to a new method of BrightcoveIE that builds a url to extract the video.

											
										
										
											2013-07-10 15:49:11 +00:00
+								        {
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								            'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
-												[generic] Update test

											
										
										
											2014-04-14 11:51:46 +00:00
+								            'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								            'info_dict': {
-												[generic] Update test

											
										
										
											2014-04-14 11:51:46 +00:00
+								                'id': '13601338388002',
 								                'ext': 'mp4',
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								                'uploader': 'www.hodiho.fr',
 								                'title': 'R\u00e9gis plante sa Jeep',
-												GenericIE: Detect videos from Brightcove

Brightcove videos info is usually found in an <object class="BrightcoveExperience"></object> node, this is passed to a new method of BrightcoveIE that builds a url to extract the video.

											
										
										
											2013-07-10 15:49:11 +00:00
+								            }
 								        },
-												[generic] Detect bandcamp pages that use custom domains (closes #1662)

They embed the original url in the 'og:url' property.

											
										
										
											2013-10-27 13:40:25 +00:00
+								        # bandcamp page with custom domain
 								        {
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								            'add_ie': ['Bandcamp'],
 								            'url': 'http://bronyrock.com/track/the-pony-mash',
 								            'info_dict': {
-												[generic] Modernize tests

											
										
										
											2014-04-14 11:56:29 +00:00
+								                'id': '3235767654',
 								                'ext': 'mp3',
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								                'title': 'The Pony Mash',
 								                'uploader': 'M_Pallante',
-												[generic] Detect bandcamp pages that use custom domains (closes #1662)

They embed the original url in the 'og:url' property.

											
										
										
											2013-10-27 13:40:25 +00:00
+								            },
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								            'skip': 'There is a limit of 200 free downloads / month for the test song',
-												[generic] Detect bandcamp pages that use custom domains (closes #1662)

They embed the original url in the 'og:url' property.

											
										
										
											2013-10-27 13:40:25 +00:00
+								        },
-												[brightcove] Add the extraction of the url from generic

											
										
										
											2013-11-06 15:40:24 +00:00
+								        # embedded brightcove video
-												[brightcove] Set the 'Referer' header if the url has the 'linkBaseUrl' parameter (fixes #1553)

											
										
										
											2013-11-07 20:06:48 +00:00
+								        # it also tests brightcove videos that need to set the 'Referer' in the
 								        # http requests
-												[brightcove] Add the extraction of the url from generic

											
										
										
											2013-11-06 15:40:24 +00:00
+								        {
-												Rename all references to legacy studio Brightcove extractor

											
										
										
											2015-11-14 00:05:46 +00:00
+								            'add_ie': ['BrightcoveLegacy'],
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								            'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 								            'info_dict': {
 								                'id': '2765128793001',
 								                'ext': 'mp4',
 								                'title': 'Le cours de bourse : l’analyse technique',
 								                'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 								                'uploader': 'BFM BUSINESS',
-												[brightcove] Add the extraction of the url from generic

											
										
										
											2013-11-06 15:40:24 +00:00
+								            },
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								            'params': {
 								                'skip_download': True,
-												[brightcove] Add the extraction of the url from generic

											
										
										
											2013-11-06 15:40:24 +00:00
+								            },
 								        },
-												[brightcove] Move test to generic

											
										
										
											2014-01-28 02:35:32 +00:00
+								        {
 								            # https://github.com/rg3/youtube-dl/issues/2253
 								            'url': 'http://bcove.me/i6nfkrc3',
 								            'md5': '0ba9446db037002366bab3b3eb30c88c',
 								            'info_dict': {
-												[generic] Modernize tests

											
										
										
											2014-04-14 11:56:29 +00:00
+								                'id': '3101154703001',
 								                'ext': 'mp4',
-												[brightcove] Move test to generic

											
										
										
											2014-01-28 02:35:32 +00:00
+								                'title': 'Still no power',
 								                'uploader': 'thestar.com',
 								                'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 								            },
-												Rename all references to legacy studio Brightcove extractor

											
										
										
											2015-11-14 00:05:46 +00:00
+								            'add_ie': ['BrightcoveLegacy'],
-												[generic] Update some _TESTS

											
										
										
											2016-08-28 07:46:11 +00:00
+								            'skip': 'video gone',
-												[brightcove] Move test to generic

											
										
										
											2014-01-28 02:35:32 +00:00
+								        },
-												[brightcove] Encode object_str with utf-8

											
										
										
											2014-04-01 13:17:35 +00:00
+								        {
 								            'url': 'http://www.championat.com/video/football/v/87/87499.html',
 								            'md5': 'fb973ecf6e4a78a67453647444222983',
 								            'info_dict': {
 								                'id': '3414141473001',
 								                'ext': 'mp4',
 								                'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 								                'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 								                'uploader': 'Championat',
 								            },
 								        },
-												[generic/brightcove] Add a new test case for kijk.nl (#3541)

											
										
										
											2014-11-03 21:13:46 +00:00
+								        {
-												[brightcove] Extract m3u8 formats (#3541)

											
										
										
											2014-11-05 22:14:33 +00:00
+								            # https://github.com/rg3/youtube-dl/issues/3541
-												Rename all references to legacy studio Brightcove extractor

											
										
										
											2015-11-14 00:05:46 +00:00
+								            'add_ie': ['BrightcoveLegacy'],
-												[generic/brightcove] Add a new test case for kijk.nl (#3541)

											
										
										
											2014-11-03 21:13:46 +00:00
+								            'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 								            'info_dict': {
 								                'id': '3866516442001',
-												[brightcove] Extract m3u8 formats (#3541)

											
										
										
											2014-11-05 22:14:33 +00:00
+								                'ext': 'mp4',
-												[generic/brightcove] Add a new test case for kijk.nl (#3541)

											
										
										
											2014-11-03 21:13:46 +00:00
+								                'title': 'Leer mij vrouwen kennen: Aflevering 1',
 								                'description': 'Leer mij vrouwen kennen: Aflevering 1',
 								                'uploader': 'SBS Broadcasting',
 								            },
-												[brightcove] Extract m3u8 formats (#3541)

											
										
										
											2014-11-05 22:14:33 +00:00
+								            'skip': 'Restricted to Netherlands',
-												[generic/brightcove] Add a new test case for kijk.nl (#3541)

											
										
										
											2014-11-03 21:13:46 +00:00
+								            'params': {
-												[brightcove] Extract m3u8 formats (#3541)

											
										
										
											2014-11-05 22:14:33 +00:00
+								                'skip_download': True,  # m3u8 download
-												[generic/brightcove] Add a new test case for kijk.nl (#3541)

											
										
										
											2014-11-03 21:13:46 +00:00
+								            },
 								        },
-												[generic] Detect ooyala videos (fixes #2013)

											
										
										
											2013-12-19 19:28:52 +00:00
+								        # ooyala video
 								        {
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								            'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
-												[generic] Update some tests

											
										
										
											2015-01-05 12:07:24 +00:00
+								            'md5': '166dd577b433b4d4ebfee10b0824d8ff',
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								            'info_dict': {
 								                'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 								                'ext': 'mp4',
-												[generic] Improve testcase

											
										
										
											2014-01-21 00:40:34 +00:00
+								                'title': '2cc213299525360.mov',  # that's what we get
-												[ooyala] fix duration scale

											
										
										
											2015-12-04 15:18:02 +00:00
+								                'duration': 238.231,
-												[generic] Detect ooyala videos (fixes #2013)

											
										
										
											2013-12-19 19:28:52 +00:00
+								            },
-												[generic] Update some tests

											
										
										
											2015-01-05 12:07:24 +00:00
+								            'add_ie': ['Ooyala'],
-												[generic] Detect ooyala videos (fixes #2013)

											
										
										
											2013-12-19 19:28:52 +00:00
+								        },
-												[extractor/generic] Add test for #6485

											
										
										
											2015-08-07 20:00:49 +00:00
+								        {
 								            # ooyala video embedded with http://player.ooyala.com/iframe.js
 								            'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
 								            'info_dict': {
 								                'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
 								                'ext': 'mp4',
 								                'title': '"Steve Jobs: Man in the Machine" trailer',
 								                'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
-												[ooyala] fix duration scale

											
										
										
											2015-12-04 15:18:02 +00:00
+								                'duration': 135.427,
-												[extractor/generic] Add test for #6485

											
										
										
											2015-08-07 20:00:49 +00:00
+								            },
 								            'params': {
 								                'skip_download': True,
 								            },
-												[generic] Update some _TESTS

											
										
										
											2016-08-28 07:46:11 +00:00
+								            'skip': 'movie expired',
-												[extractor/generic] Add test for #6485

											
										
										
											2015-08-07 20:00:49 +00:00
+								        },
-												Add support for embed.ly

											
										
										
											2014-02-24 00:15:51 +00:00
+								        # embed.ly video
 								        {
 								            'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 								            'info_dict': {
 								                'id': '9ODmcdjQcHQ',
 								                'ext': 'mp4',
-												[generic] Add all test attributes for embedly (#2447)

In the future, we may want to not only print something, but throw an error for untested properties.

											
										
										
											2014-03-05 13:05:44 +00:00
+								                'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 								                'upload_date': '20140225',
 								                'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 								                'uploader': 'Tested',
 								                'uploader_id': 'testedcom',
-												Add support for embed.ly

											
										
										
											2014-02-24 00:15:51 +00:00
+								            },
 								            # No need to test YoutubeIE here
 								            'params': {
 								                'skip_download': True,
 								            },
 								        },
-												[generic/funnyordie] Add support for funnyordie embeds (Fixes #2546)

											
										
										
											2014-03-11 15:51:36 +00:00
+								        # funnyordie embed
 								        {
 								            'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 								            'info_dict': {
 								                'id': '18e820ec3f',
 								                'ext': 'mp4',
 								                'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 								                'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
-												[generic] Add support for embedded rutv player

											
										
										
											2014-03-16 19:00:31 +00:00
+								            },
-												[generic] Update some _TESTS

											
										
										
											2016-08-28 07:46:11 +00:00
+								            # HEAD requests lead to endless 301, while GET is OK
 								            'expected_warnings': ['301'],
-												[generic/funnyordie] Add support for funnyordie embeds (Fixes #2546)

											
										
										
											2014-03-11 15:51:36 +00:00
+								        },
-												[generic] Add support for embedded rutv player

											
										
										
											2014-03-16 19:00:31 +00:00
+								        # RUTV embed
 								        {
 								            'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 								            'info_dict': {
 								                'id': '776940',
 								                'ext': 'mp4',
 								                'title': 'Охотское море стало целиком российским',
 								                'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 								            },
 								            'params': {
 								                # m3u8 download
 								                'skip_download': True,
 								            },
-												[ted] Simplify embed code (#2587)

											
										
										
											2014-03-20 15:33:23 +00:00
+								        },
-												[extractor/generic] Add test for tvc embed

											
										
										
											2015-06-12 10:28:45 +00:00
+								        # TVC embed
 								        {
 								            'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 								            'info_dict': {
 								                'id': '55304',
 								                'ext': 'mp4',
 								                'title': 'Дошкольное воспитание',
 								            },
 								        },
-												[generic] Add test for sportbox embeds

											
										
										
											2015-05-15 17:18:21 +00:00
+								        # SportBox embed
 								        {
 								            'url': 'http://www.vestifinance.ru/articles/25753',
 								            'info_dict': {
 								                'id': '25753',
-												[generic] Fix an MTV test and another test that breaks nosetests

											
										
										
											2016-07-26 03:11:36 +00:00
+								                'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
-												[generic] Add test for sportbox embeds

											
										
										
											2015-05-15 17:18:21 +00:00
+								            },
 								            'playlist': [{
 								                'info_dict': {
 								                    'id': '370908',
 								                    'title': 'Госзаказ. День 3',
 								                    'ext': 'mp4',
 								                }
 								            }, {
 								                'info_dict': {
 								                    'id': '370905',
 								                    'title': 'Госзаказ. День 2',
 								                    'ext': 'mp4',
 								                }
 								            }, {
 								                'info_dict': {
 								                    'id': '370902',
 								                    'title': 'Госзаказ. День 1',
 								                    'ext': 'mp4',
 								                }
 								            }],
 								            'params': {
 								                # m3u8 download
 								                'skip_download': True,
 								            },
 								        },
-												[extractor/generic] Add test for myvi embed

											
										
										
											2015-07-09 19:15:55 +00:00
+								        # Myvi.ru embed
 								        {
 								            'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 								            'info_dict': {
 								                'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 								                'ext': 'mp4',
 								                'title': 'Ужастики, русский трейлер (2015)',
 								                'thumbnail': 're:^https?://.*\.jpg$',
 								                'duration': 153,
 								            }
 								        },
-												[extractor/generic] Add test for xhamster embed

											
										
										
											2015-06-21 17:18:28 +00:00
+								        # XHamster embed
 								        {
 								            'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 								            'info_dict': {
 								                'id': 'showthread',
 								                'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 								            },
 								            'playlist_mincount': 7,
-												[generic] Update some _TESTS

											
										
										
											2016-08-28 07:46:11 +00:00
+								            # This forum does not allow <iframe> syntaxes anymore
 								            # Now HTML tags are displayed as-is
 								            'skip': 'No videos on this page',
-												[extractor/generic] Add test for xhamster embed

											
										
										
											2015-06-21 17:18:28 +00:00
+								        },
-												[ted] Simplify embed code (#2587)

											
										
										
											2014-03-20 15:33:23 +00:00
+								        # Embedded TED video
 								        {
 								            'url': 'http://en.support.wordpress.com/videos/ted-talks/',
-												[generic] Fix testcases

											
										
										
											2014-09-29 03:12:57 +00:00
+								            'md5': '65fdff94098e4a607385a60c5177c638',
-												[ted] Simplify embed code (#2587)

											
										
										
											2014-03-20 15:33:23 +00:00
+								            'info_dict': {
-												[generic] Fix testcases

											
										
										
											2014-09-29 03:12:57 +00:00
+								                'id': '1969',
-												[ted] Simplify embed code (#2587)

											
										
										
											2014-03-20 15:33:23 +00:00
+								                'ext': 'mp4',
-												[generic] Fix testcases

											
										
										
											2014-09-29 03:12:57 +00:00
+								                'title': 'Hidden miracles of the natural world',
 								                'uploader': 'Louie Schwartzberg',
 								                'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
-												[ted] Simplify embed code (#2587)

											
										
										
											2014-03-20 15:33:23 +00:00
+								            }
-												[generic/funnyordie] Add support for funnyordie embeds (Fixes #2546)

											
										
										
											2014-03-11 15:51:36 +00:00
+								        },
-												Fix typos

Closes #8200.

											
										
										
											2016-01-10 15:17:47 +00:00
+								        # Embedded Ustream video
-												[UstreamIE] [generic] Added support for Ustream embed URLs (Fixes #2694)

											
										
										
											2014-04-04 14:23:09 +00:00
+								        {
 								            'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 								            'md5': '27b99cdb639c9b12a79bca876a073417',
 								            'info_dict': {
-												Fix _TEST for Ustream embed URLs

											
										
										
											2014-04-04 16:56:29 +00:00
+								                'id': '45734260',
 								                'ext': 'flv',
 								                'uploader': 'AU SPA:  The NSA and Privacy',
-												[UstreamIE] [generic] Added support for Ustream embed URLs (Fixes #2694)

											
										
										
											2014-04-04 14:23:09 +00:00
+								                'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 								            }
 								        },
-												[generic] Add nowvideo test hidden behind percent encoding

											
										
										
											2014-03-14 21:39:53 +00:00
+								        # nowvideo embed hidden behind percent encoding
 								        {
 								            'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 								            'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 								            'info_dict': {
 								                'id': '06e53103ca9aa',
 								                'ext': 'flv',
 								                'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 								                'description': 'No description',
 								            },
-												Merge remote-tracking branch 'dstftw/generic-webpage-unescape'

Conflicts:
	youtube_dl/extractor/generic.py

											
										
										
											2014-03-21 21:14:24 +00:00
+								        },
-												[arte] Add support for embedded videos (Fixes #2620)

											
										
										
											2014-03-24 21:01:47 +00:00
+								        # arte embed
 								        {
 								            'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 								            'md5': '7653032cbb25bf6c80d80f217055fa43',
 								            'info_dict': {
 								                'id': '048195-004_PLUS7-F',
 								                'ext': 'flv',
 								                'title': 'X:enius',
 								                'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 								                'upload_date': '20140320',
 								            },
 								            'params': {
 								                'skip_download': 'Requires rtmpdump'
-												[generic] Update some _TESTS

											
										
										
											2016-08-28 07:46:11 +00:00
+								            },
 								            'skip': 'video gone',
-												[arte] Add support for embedded videos (Fixes #2620)

											
										
										
											2014-03-24 21:01:47 +00:00
+								        },
-												[extractor/generic] Add support for francetv embeds

											
										
										
											2015-07-18 16:56:00 +00:00
+								        # francetv embed
 								        {
 								            'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
 								            'info_dict': {
 								                'id': 'EV_30231',
 								                'ext': 'mp4',
 								                'title': 'Alcaline, le concert avec Calogero',
 								                'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
 								                'upload_date': '20150226',
 								                'timestamp': 1424989860,
 								                'duration': 5400,
 								            },
 								            'params': {
 								                # m3u8 downloads
 								                'skip_download': True,
 								            },
 								            'expected_warnings': [
 								                'Forbidden'
 								            ]
 								        },
-												[condenast|generic] Add support for condenast embeds (Fixes #2783)

											
										
										
											2014-04-21 03:47:52 +00:00
+								        # Condé Nast embed
 								        {
 								            'url': 'http://www.wired.com/2014/04/honda-asimo/',
 								            'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 								            'info_dict': {
 								                'id': '53501be369702d3275860000',
 								                'ext': 'mp4',
 								                'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 								            }
-												[generic] Add support for protocol-independent URLs (Fixes #2810)

											
										
										
											2014-04-29 23:46:06 +00:00
+								        },
 								        # Dailymotion embed
 								        {
 								            'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 								            'md5': '441aeeb82eb72c422c7f14ec533999cd',
 								            'info_dict': {
 								                'id': 'k2mm4bCdJ6CQ2i7c8o2',
 								                'ext': 'mp4',
 								                'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
-												[ThePlatform] Fix tests failed since 79ba9140dc8fcf5883b7473596e8f20cba6b479f

											
										
										
											2016-04-24 12:44:52 +00:00
+								                'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
-												[generic] Add support for protocol-independent URLs (Fixes #2810)

											
										
										
											2014-04-29 23:46:06 +00:00
+								                'uploader': 'Spi0n',
-												[ThePlatform] Fix tests failed since 79ba9140dc8fcf5883b7473596e8f20cba6b479f

											
										
										
											2016-04-24 12:44:52 +00:00
+								                'uploader_id': 'xgditw',
 								                'upload_date': '20140425',
 								                'timestamp': 1398441542,
-												[generic] Add support for protocol-independent URLs (Fixes #2810)

											
										
										
											2014-04-29 23:46:06 +00:00
+								            },
 								            'add_ie': ['Dailymotion'],
-												[generic] Add support for <embed YouTube

											
										
										
											2014-06-09 20:06:45 +00:00
+								        },
 								        # YouTube embed
 								        {
 								            'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 								            'info_dict': {
 								                'id': 'FXRb4ykk4S0',
 								                'ext': 'mp4',
 								                'title': 'The NBL Auction 2014',
 								                'uploader': 'BADMINTON England',
 								                'uploader_id': 'BADMINTONEvents',
 								                'upload_date': '20140603',
 								                'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 								            },
 								            'add_ie': ['Youtube'],
 								            'params': {
 								                'skip_download': True,
 								            }
 								        },
-												[generic] Extract mtvservices embedded videos

											
										
										
											2014-06-22 19:38:04 +00:00
+								        # MTVSercices embed
 								        {
-												[generic] Update test_Generic_40

The original link now redirects to an YouTube user channel.

											
										
										
											2016-06-10 08:39:31 +00:00
+								            'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
 								            'md5': 'ca1aef97695ef2c1d6973256a57e5252',
-												[generic] Extract mtvservices embedded videos

											
										
										
											2014-06-22 19:38:04 +00:00
+								            'info_dict': {
-												[generic] Update test_Generic_40

The original link now redirects to an YouTube user channel.

											
										
										
											2016-06-10 08:39:31 +00:00
+								                'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
-												[generic] Extract mtvservices embedded videos

											
										
										
											2014-06-22 19:38:04 +00:00
+								                'ext': 'mp4',
-												[generic] Update test_Generic_40

The original link now redirects to an YouTube user channel.

											
										
										
											2016-06-10 08:39:31 +00:00
+								                'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
 								                'description': 'Two valets share their love for movie star Liam Neesons.',
-												[generic] Fix an MTV test and another test that breaks nosetests

											
										
										
											2016-07-26 03:11:36 +00:00
+								                'timestamp': 1349922600,
 								                'upload_date': '20121011',
-												[generic] Extract mtvservices embedded videos

											
										
										
											2014-06-22 19:38:04 +00:00
+								            },
 								        },
-												Add a _TEST_

											
										
										
											2014-05-21 09:55:37 +00:00
+								        # YouTube embed via <data-embed-url="">
 								        {
 								            'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 								            'info_dict': {
-												[generic] Fix testcases

											
										
										
											2014-09-29 03:12:57 +00:00
+								                'id': '4vAffPZIT44',
-												Add a _TEST_

											
										
										
											2014-05-21 09:55:37 +00:00
+								                'ext': 'mp4',
-												[generic] Fix testcases

											
										
										
											2014-09-29 03:12:57 +00:00
+								                'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
-												[generic] Simplify playlist support (#2948)

											
										
										
											2014-08-22 16:19:56 +00:00
+								                'uploader': 'Gameloft',
 								                'uploader_id': 'gameloft',
-												[generic] Fix testcases

											
										
										
											2014-09-29 03:12:57 +00:00
+								                'upload_date': '20140828',
 								                'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
-												[generic] Simplify playlist support (#2948)

											
										
										
											2014-08-22 16:19:56 +00:00
+								            },
 								            'params': {
 								                'skip_download': True,
-												Add a _TEST_

											
										
										
											2014-05-21 09:55:37 +00:00
+								            }
-												[generic] Add support for camtasia videos (Fixes #3574)

											
										
										
											2014-08-24 00:02:17 +00:00
+								        },
 								        # Camtasia studio
 								        {
 								            'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 								            'playlist': [{
 								                'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 								                'info_dict': {
 								                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 								                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 								                    'ext': 'flv',
 								                    'duration': 2235.90,
 								                }
 								            }, {
 								                'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 								                'info_dict': {
 								                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 								                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 								                    'ext': 'flv',
 								                    'duration': 2235.93,
 								                }
 								            }],
 								            'info_dict': {
 								                'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 								            }
-												[generic] Automatic detection of flow player and age_limit (Fixes #3576)

											
										
										
											2014-08-24 03:31:32 +00:00
+								        },
 								        # Flowplayer
 								        {
 								            'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 								            'md5': '9d65602bf31c6e20014319c7d07fba27',
 								            'info_dict': {
 								                'id': '5123ea6d5e5a7',
 								                'ext': 'mp4',
 								                'age_limit': 18,
 								                'uploader': 'www.handjobhub.com',
-												[generic] Fix test title

											
										
										
											2014-10-26 23:45:15 +00:00
+								                'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
-												[generic] Automatic detection of flow player and age_limit (Fixes #3576)

											
										
										
											2014-08-24 03:31:32 +00:00
+								            }
-												[generic] Fix rss under Python 2.x and move test to extractor

											
										
										
											2014-08-25 16:03:01 +00:00
+								        },
-												Move playlist tests to extractors.

From now on, test_download will run these tests. That means we benefit not only from the networking setup in there, but also from the other tests (for example test_all_urls to find problems with _VALID_URLs).

											
										
										
											2014-08-27 22:58:24 +00:00
+								        # Multiple brightcove videos
 								        # https://github.com/rg3/youtube-dl/issues/2283
 								        {
 								            'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 								            'info_dict': {
 								                'id': 'always-never',
 								                'title': 'Always / Never - The New Yorker',
 								            },
 								            'playlist_count': 3,
 								            'params': {
 								                'extract_flat': False,
 								                'skip_download': True,
 								            }
-												[mlb] Add support for embedded videos (Closes #3653)

											
										
										
											2014-09-02 13:19:28 +00:00
+								        },
 								        # MLB embed
 								        {
 								            'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 								            'md5': '96f09a37e44da40dd083e12d9a683327',
 								            'info_dict': {
 								                'id': '33322633',
 								                'ext': 'mp4',
 								                'title': 'Ump changes call to ball',
 								                'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 								                'duration': 48,
 								                'timestamp': 1401537900,
 								                'upload_date': '20140531',
 								                'thumbnail': 're:^https?://.*\.jpg$',
 								            },
 								        },
-												[wistia] Use API and make more generic

											
										
										
											2014-09-20 00:02:11 +00:00
+								        # Wistia embed
 								        {
-												[extractor/generic] Remove generic id and title from wistia extractionand update tests

											
										
										
											2016-05-20 15:55:35 +00:00
+								            'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 								            'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
-												[wistia] Use API and make more generic

											
										
										
											2014-09-20 00:02:11 +00:00
+								            'info_dict': {
-												[extractor/generic] Remove generic id and title from wistia extractionand update tests

											
										
										
											2016-05-20 15:55:35 +00:00
+								                'id': '6e2wtrbdaf',
-												[wistia] Use API and make more generic

											
										
										
											2014-09-20 00:02:11 +00:00
+								                'ext': 'mov',
-												[extractor/generic] Remove generic id and title from wistia extractionand update tests

											
										
										
											2016-05-20 15:55:35 +00:00
+								                'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
 								                'description': 'a Paywall Videos video from Remilon',
 								                'duration': 644.072,
 								                'uploader': 'study.com',
 								                'timestamp': 1459678540,
 								                'upload_date': '20160403',
 								                'filesize': 24687186,
-												[wistia] Use API and make more generic

											
										
										
											2014-09-20 00:02:11 +00:00
+								            },
 								        },
-												[thoughtworks] wistia support added

											
										
										
											2014-10-23 14:57:13 +00:00
+								        {
 								            'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 								            'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 								            'info_dict': {
 								                'id': 'uxjb0lwrcz',
 								                'ext': 'mp4',
-												[extractor/generic] Remove generic id and title from wistia extractionand update tests

											
										
										
											2016-05-20 15:55:35 +00:00
+								                'title': 'Conversation about Hexagonal Rails Part 1',
-												[ThePlatform] Fix tests failed since 79ba9140dc8fcf5883b7473596e8f20cba6b479f

											
										
										
											2016-04-24 12:44:52 +00:00
+								                'description': 'a Martin Fowler video from ThoughtWorks',
-												[thoughtworks] wistia support added

											
										
										
											2014-10-23 14:57:13 +00:00
+								                'duration': 1715.0,
-												[thoughtworks] wistia regex modified

											
										
										
											2014-10-23 15:53:56 +00:00
+								                'uploader': 'thoughtworks.wistia.com',
-												[ThePlatform] Fix tests failed since 79ba9140dc8fcf5883b7473596e8f20cba6b479f

											
										
										
											2016-04-24 12:44:52 +00:00
+								                'timestamp': 1401832161,
-												[extractor/generic] Remove generic id and title from wistia extractionand update tests

											
										
										
											2016-05-20 15:55:35 +00:00
+								                'upload_date': '20140603',
-												[generic] Add a test case for direct links with broken HEAD (#4032)

											
										
										
											2014-10-26 19:49:51 +00:00
+								            },
-												[thoughtworks] wistia support added

											
										
										
											2014-10-23 14:57:13 +00:00
+								        },
-												[extractor/generic] Add test for wistia standard embed

											
										
										
											2016-05-20 15:43:36 +00:00
+								        # Wistia standard embed (async)
 								        {
 								            'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
 								            'info_dict': {
 								                'id': '807fafadvk',
 								                'ext': 'mp4',
 								                'title': 'Drip Brennan Dunn Workshop',
 								                'description': 'a JV Webinars video from getdrip-1',
 								                'duration': 4986.95,
 								                'timestamp': 1463607249,
-												[extractor/generic] Remove generic id and title from wistia extractionand update tests

											
										
										
											2016-05-20 15:55:35 +00:00
+								                'upload_date': '20160518',
-												[extractor/generic] Add test for wistia standard embed

											
										
										
											2016-05-20 15:43:36 +00:00
+								            },
 								            'params': {
 								                'skip_download': True,
 								            }
 								        },
-												[generic] Allow soundcloud embeds with additional attributes

											
										
										
											2014-10-29 19:27:58 +00:00
+								        # Soundcloud embed
 								        {
 								            'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 								            'info_dict': {
 								                'id': '174391317',
 								                'ext': 'mp3',
 								                'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 								                'uploader': 'Sophos Security',
 								                'title': 'Chet Chat 171 - Oct 29, 2014',
 								                'upload_date': '20141029',
 								            }
-												[generic] Add support for livestream embeds (Fixes #4185)

											
										
										
											2014-11-13 15:12:51 +00:00
+								        },
-												[extractor/generic] Add test for #10179

											
										
										
											2016-07-28 15:20:08 +00:00
+								        # Soundcloud multiple embeds
 								        {
 								            'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
 								            'info_dict': {
 								                'id': '52809',
 								                'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance  | TAB + AUDIO',
 								            },
 								            'playlist_mincount': 7,
 								        },
-												[generic] Add support for livestream embeds (Fixes #4185)

											
										
										
											2014-11-13 15:12:51 +00:00
+								        # Livestream embed
 								        {
 								            'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 								            'info_dict': {
 								                'id': '67864563',
 								                'ext': 'flv',
 								                'upload_date': '20141112',
 								                'title': 'Rosetta #CometLanding webcast HL 10',
 								            }
 								        },
-												[generic] Improve Livestream detection (closes #2234)

											
										
										
											2016-05-22 17:39:09 +00:00
+								        # Another Livestream embed, without 'new.' in URL
 								        {
 								            'url': 'https://www.freespeech.org/',
 								            'info_dict': {
 								                'id': '123537347',
 								                'ext': 'mp4',
 								                'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
 								            },
 								            'params': {
 								                # Live stream
 								                'skip_download': True,
 								            },
 								        },
-												[generic] Add support for LazyYT embeds (Fixes #4306)

											
										
										
											2014-11-25 13:34:19 +00:00
+								        # LazyYT
 								        {
 								            'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 								            'info_dict': {
-												Extend various playlist tests

											
										
										
											2015-02-17 23:49:10 +00:00
+								                'id': '1986',
-												[generic] Add support for LazyYT embeds (Fixes #4306)

											
										
										
											2014-11-25 13:34:19 +00:00
+								                'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 								            },
 								            'playlist_mincount': 2,
-												[generic] Detect direct video links (Fixes #4149, #4313)

											
										
										
											2014-11-26 09:44:39 +00:00
+								        },
-												[cinchcast] Add new extractor (Fixes #4428)

											
										
										
											2014-12-12 01:57:36 +00:00
+								        # Cinchcast embed
 								        {
 								            'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 								            'info_dict': {
 								                'id': '7141703',
 								                'ext': 'mp3',
 								                'upload_date': '20141126',
 								                'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 								            }
 								        },
-												[generic] Add support for Cinerama player (Fixes #4752)

											
										
										
											2015-01-23 11:00:25 +00:00
+								        # Cinerama player
 								        {
 								            'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 								            'info_dict': {
 								                'id': '730m_DandD_1901_512k',
 								                'ext': 'mp4',
 								                'uploader': 'www.abc.net.au',
 								                'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 								            }
-												fixed viddler support - needed a Referer header; also added a viddler
generic extractor

											
										
										
											2015-01-28 05:08:19 +00:00
+								        },
 								        # embedded viddler video
 								        {
 								            'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 								            'info_dict': {
 								                'id': '4d03aad9',
 								                'ext': 'mp4',
 								                'uploader': 'deadspin',
 								                'title': 'WALL-TO-GORTAT',
 								                'timestamp': 1422285291,
 								                'upload_date': '20150126',
 								            },
 								            'add_ie': ['Viddler'],
-												[generic] Add support for jwPlayer YouTube videos

This makes nationalarchives.gov.uk work (Fixes #4907, fixes #4876)

											
										
										
											2015-02-09 09:42:25 +00:00
+								        },
-												[extractor/generic] Add test for Libsyn embed

											
										
										
											2015-03-22 02:20:27 +00:00
+								        # Libsyn embed
 								        {
 								            'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 								            'info_dict': {
 								                'id': '3377616',
 								                'ext': 'mp3',
 								                'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 								                'description': 'md5:601cb790edd05908957dae8aaa866465',
 								                'upload_date': '20150220',
 								            },
-												[generic] Skip an invalid test

											
										
										
											2016-07-26 05:16:04 +00:00
+								            'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
-												[extractor/generic] Add test for Libsyn embed

											
										
										
											2015-03-22 02:20:27 +00:00
+								        },
-												[generic] Add support for jwPlayer YouTube videos

This makes nationalarchives.gov.uk work (Fixes #4907, fixes #4876)

											
										
										
											2015-02-09 09:42:25 +00:00
+								        # jwplayer YouTube
 								        {
 								            'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 								            'info_dict': {
 								                'id': 'Mrj4DVp2zeA',
 								                'ext': 'mp4',
-												[generic] Correct test case

Video has been reuploaded / edited

											
										
										
											2015-02-19 01:00:50 +00:00
+								                'upload_date': '20150212',
-												[generic] Add support for jwPlayer YouTube videos

This makes nationalarchives.gov.uk work (Fixes #4907, fixes #4876)

											
										
										
											2015-02-09 09:42:25 +00:00
+								                'uploader': 'The National Archives UK',
 								                'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 								                'uploader_id': 'NationalArchives08',
 								                'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 								            },
-												[rtlnl|generic] Add support for rtl.nl embeds (Fixes #4959)

											
										
										
											2015-02-16 14:45:01 +00:00
+								        },
 								        # rtl.nl embed
 								        {
 								            'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 								            'playlist_mincount': 5,
 								            'info_dict': {
 								                'id': 'aanslagen-kopenhagen',
 								                'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 								            }
-												[generic] Add support for Zapiks embeds (#5014)

											
										
										
											2015-02-21 19:39:26 +00:00
+								        },
 								        # Zapiks embed
 								        {
 								            'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 								            'info_dict': {
 								                'id': '118046',
 								                'ext': 'mp4',
 								                'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 								            }
 								        },
-												[extractor/generic] Improve kaltura embeds support (Closes #6137)

											
										
										
											2015-07-02 15:39:46 +00:00
+								        # Kaltura embed (different embed code)
 								        {
 								            'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
 								            'info_dict': {
 								                'id': '1_a52wc67y',
 								                'ext': 'flv',
 								                'upload_date': '20150127',
 								                'uploader_id': 'PremierMedia',
 								                'timestamp': int,
 								                'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
 								            },
 								        },
-												[kaltura] Add test for referrer protected video (#7409)

											
										
										
											2015-11-20 19:40:28 +00:00
+								        # Kaltura embed protected with referrer
 								        {
 								            'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
 								            'info_dict': {
 								                'id': '1_g4fbemnq',
 								                'ext': 'mp4',
 								                'title': 'Violetta - Achter De Schermen - Ruggero',
 								                'description': 'Achter de schermen met Ruggero',
 								                'timestamp': 1435133761,
 								                'upload_date': '20150624',
 								                'uploader_id': 'echojecka',
 								            },
 								        },
-												[generic] Improve Kaltura detection

Closes #4004

											
										
										
											2016-06-01 10:37:34 +00:00
+								        # Kaltura embed with single quotes
 								        {
 								            'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
 								            'info_dict': {
 								                'id': '0_izeg5utt',
 								                'ext': 'mp4',
 								                'title': '35871',
 								                'timestamp': 1355743100,
 								                'upload_date': '20121217',
 								                'uploader_id': 'batchUser',
 								            },
 								            'add_ie': ['Kaltura'],
 								        },
-												[extractor/generic] Improve kaltura embed detection (Closes #9911)

											
										
										
											2016-06-26 21:11:53 +00:00
+								        {
 								            # Kaltura embedded via quoted entry_id
 								            'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
 								            'info_dict': {
 								                'id': '0_utuok90b',
 								                'ext': 'mp4',
 								                'title': '06_matthew_brender_raj_dutt',
 								                'timestamp': 1466638791,
 								                'upload_date': '20160622',
 								            },
 								            'add_ie': ['Kaltura'],
 								            'expected_warnings': [
 								                'Could not send HEAD request'
 								            ],
 								            'params': {
 								                'skip_download': True,
 								            }
 								        },
-												[eagleplatform] Add support for embeds

											
										
										
											2015-03-07 16:22:57 +00:00
+								        # Eagle.Platform embed (generic URL)
 								        {
 								            'url': 'http://lenta.ru/news/2015/03/06/navalny/',
-												[eagleplatform] Checking direct HTTP links

Sometimes they fail with 404

											
										
										
											2016-04-25 14:48:17 +00:00
+								            # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
-												[eagleplatform] Add support for embeds

											
										
										
											2015-03-07 16:22:57 +00:00
+								            'info_dict': {
 								                'id': '227304',
 								                'ext': 'mp4',
 								                'title': 'Навальный вышел на свободу',
 								                'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 								                'thumbnail': 're:^https?://.*\.jpg$',
 								                'duration': 87,
 								                'view_count': int,
 								                'age_limit': 0,
 								            },
 								        },
-												[eagleplatform] Add support for ClipYou embeds

											
										
										
											2015-03-07 16:34:44 +00:00
+								        # ClipYou (Eagle.Platform) embed (custom URL)
 								        {
 								            'url': 'http://muz-tv.ru/play/7129/',
-												[eagleplatform] Checking direct HTTP links

Sometimes they fail with 404

											
										
										
											2016-04-25 14:48:17 +00:00
+								            # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
-												[eagleplatform] Add support for ClipYou embeds

											
										
										
											2015-03-07 16:34:44 +00:00
+								            'info_dict': {
 								                'id': '12820',
 								                'ext': 'mp4',
 								                'title': "'O Sole Mio",
 								                'thumbnail': 're:^https?://.*\.jpg$',
 								                'duration': 216,
 								                'view_count': int,
 								            },
 								        },
-												[pladform] Add support for embeds

											
										
										
											2015-03-08 12:07:10 +00:00
+								        # Pladform embed
 								        {
 								            'url': 'http://muz-tv.ru/kinozal/view/7400/',
 								            'info_dict': {
 								                'id': '100183293',
 								                'ext': 'mp4',
-												[generic] Update pladform embed test

											
										
										
											2015-04-16 15:37:15 +00:00
+								                'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
-												[pladform] Add support for embeds

											
										
										
											2015-03-08 12:07:10 +00:00
+								                'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 								                'thumbnail': 're:^https?://.*\.jpg$',
 								                'duration': 694,
 								                'age_limit': 0,
 								            },
 								        },
-												[generic] Add test for playwire embed (#5430)

											
										
										
											2015-04-15 16:13:01 +00:00
+								        # Playwire embed
 								        {
 								            'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 								            'info_dict': {
 								                'id': '3519514',
 								                'ext': 'mp4',
 								                'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 								                'thumbnail': 're:^https?://.*\.png$',
 								                'duration': 45.115,
 								            },
 								        },
-												[generic] Add support for 5min embeds (#5310)

											
										
										
											2015-03-29 01:57:37 +00:00
+								        # 5min embed
 								        {
 								            'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 								            'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 								            'info_dict': {
 								                'id': '518726732',
 								                'ext': 'mp4',
 								                'title': 'Facebook Creates "On This Day" | Crunch Report',
 								            },
 								        },
-												[extractor/generic] Add test for svt embed

											
										
										
											2015-05-08 18:27:37 +00:00
+								        # SVT embed
 								        {
 								            'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 								            'info_dict': {
 								                'id': '2900353',
 								                'ext': 'flv',
 								                'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 								                'duration': 27,
 								                'age_limit': 0,
 								            },
 								        },
-												[generic] Add tests for Crooks and Liars embeds

											
										
										
											2015-04-11 14:26:42 +00:00
+								        # Crooks and Liars embed
 								        {
 								            'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 								            'info_dict': {
 								                'id': '8RUoRhRi',
 								                'ext': 'mp4',
 								                'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 								                'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 								                'timestamp': 1428207000,
 								                'upload_date': '20150405',
 								                'uploader': 'Heather',
 								            },
 								        },
 								        # Crooks and Liars external embed
 								        {
 								            'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 								            'info_dict': {
 								                'id': 'MTE3MjUtMzQ2MzA',
 								                'ext': 'mp4',
 								                'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 								                'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 								                'timestamp': 1265032391,
 								                'upload_date': '20100201',
 								                'uploader': 'Heather',
 								            },
 								        },
-												[generic] Add working NBC Sports vplayer test

											
										
										
											2015-03-31 14:11:14 +00:00
+								        # NBC Sports vplayer embed
-												[NBC/ThePlatform/Generic] Add a generic detector for NBCSportsVPlayer and enhance error detection in ThePlatformIE

											
										
										
											2015-03-30 19:36:09 +00:00
+								        {
-												[generic] Add working NBC Sports vplayer test

											
										
										
											2015-03-31 14:11:14 +00:00
+								            'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
-												[NBC/ThePlatform/Generic] Add a generic detector for NBCSportsVPlayer and enhance error detection in ThePlatformIE

											
										
										
											2015-03-30 19:36:09 +00:00
+								            'info_dict': {
-												[generic] Add working NBC Sports vplayer test

											
										
										
											2015-03-31 14:11:14 +00:00
+								                'id': 'ln7x1qSThw4k',
 								                'ext': 'flv',
 								                'title': "PFT Live: New leader in the 'new-look' defense",
 								                'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
-												[ThePlatform] Fix tests failed since 79ba9140dc8fcf5883b7473596e8f20cba6b479f

											
										
										
											2016-04-24 12:44:52 +00:00
+								                'uploader': 'NBCU-SPORTS',
 								                'upload_date': '20140107',
 								                'timestamp': 1389118457,
-												[NBC/ThePlatform/Generic] Add a generic detector for NBCSportsVPlayer and enhance error detection in ThePlatformIE

											
										
										
											2015-03-30 19:36:09 +00:00
+								            },
-												[udn] Add new extractor

											
										
										
											2015-04-08 09:26:51 +00:00
+								        },
-												[generic] Detect NBC News embeds

											
										
										
											2016-06-10 05:32:59 +00:00
+								        # NBC News embed
 								        {
 								            'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
 								            'md5': '1aa589c675898ae6d37a17913cf68d66',
 								            'info_dict': {
 								                'id': '701714499682',
 								                'ext': 'mp4',
 								                'title': 'PREVIEW: On Assignment: David Letterman',
 								                'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
 								            },
 								        },
-												[udn] Add new extractor

											
										
										
											2015-04-08 09:26:51 +00:00
+								        # UDN embed
 								        {
-												[generic] Update the UDNEmbed test case

											
										
										
											2016-06-01 11:23:44 +00:00
+								            'url': 'https://video.udn.com/news/300346',
-												[generic] Fix test generic_51

The website replaced the original video with a new one

											
										
										
											2015-04-14 05:10:10 +00:00
+								            'md5': 'fd2060e988c326991037b9aff9df21a6',
-												[udn] Add new extractor

											
										
										
											2015-04-08 09:26:51 +00:00
+								            'info_dict': {
-												[generic] Fix test generic_51

The website replaced the original video with a new one

											
										
										
											2015-04-14 05:10:10 +00:00
+								                'id': '300346',
-												[udn] Add new extractor

											
										
										
											2015-04-08 09:26:51 +00:00
+								                'ext': 'mp4',
-												[generic] Fix test generic_51

The website replaced the original video with a new one

											
										
										
											2015-04-14 05:10:10 +00:00
+								                'title': '中一中男師變性 全校師生力挺',
-												[udn] Add new extractor

											
										
										
											2015-04-08 09:26:51 +00:00
+								                'thumbnail': 're:^https?://.*\.jpg$',
-												[generic] Update the UDNEmbed test case

											
										
										
											2016-06-01 11:23:44 +00:00
+								            },
 								            'params': {
 								                # m3u8 download
 								                'skip_download': True,
 								            },
-												[generic] Support another type of Ooyala embedded video

											
										
										
											2015-04-14 04:45:43 +00:00
+								        },
 								        # Ooyala embed
 								        {
 								            'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 								            'info_dict': {
 								                'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 								                'ext': 'mp4',
-												[ooyala] extract domain,handle errors and change related tests

											
										
										
											2015-10-16 15:02:40 +00:00
+								                'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
-												[generic] Support another type of Ooyala embedded video

											
										
										
											2015-04-14 04:45:43 +00:00
+								                'title': 'This is what separates the Excel masters from the wannabes',
-												[ooyala] fix duration scale

											
										
										
											2015-12-04 15:18:02 +00:00
+								                'duration': 191.933,
-												[generic] Support another type of Ooyala embedded video

											
										
										
											2015-04-14 04:45:43 +00:00
+								            },
 								            'params': {
 								                # m3u8 downloads
 								                'skip_download': True,
 								            }
-												[generic] Extract videos from SMIL manifests (closes #5145 and fixes #5135)

											
										
										
											2015-04-16 09:16:11 +00:00
+								        },
-												[brightcove] Allow single quotes in Brightcove URLs (fixes #5901)

											
										
										
											2015-06-07 07:29:17 +00:00
+								        # Brightcove URL in single quotes
 								        {
 								            'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
 								            'md5': '4ae374f1f8b91c889c4b9203c8c752af',
 								            'info_dict': {
 								                'id': '4255764656001',
 								                'ext': 'mp4',
 								                'title': 'SN Presents: Russell Martin, World Citizen',
 								                'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
 								                'uploader': 'Rogers Sportsnet',
-												[ThePlatform] Fix tests failed since 79ba9140dc8fcf5883b7473596e8f20cba6b479f

											
										
										
											2016-04-24 12:44:52 +00:00
+								                'uploader_id': '1704050871',
 								                'upload_date': '20150525',
 								                'timestamp': 1432570283,
-												[brightcove] Allow single quotes in Brightcove URLs (fixes #5901)

											
										
										
											2015-06-07 07:29:17 +00:00
+								            },
-												[dailymotion/generic] Add DailymotionCloudIE

											
										
										
											2015-06-21 13:30:34 +00:00
+								        },
 								        # Dailymotion Cloud video
 								        {
 								            'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
-												[generic] Fix test_Generic_76

Broken: https://travis-ci.org/rg3/youtube-dl/jobs/140251658

											
										
										
											2016-06-26 03:54:52 +00:00
+								            'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
-												[dailymotion/generic] Add DailymotionCloudIE

											
										
										
											2015-06-21 13:30:34 +00:00
+								            'info_dict': {
-												[generic] Fix test_Generic_76

Broken: https://travis-ci.org/rg3/youtube-dl/jobs/140251658

											
										
										
											2016-06-26 03:54:52 +00:00
+								                'id': 'x2uy8t3',
-												[dailymotion/generic] Add DailymotionCloudIE

											
										
										
											2015-06-21 13:30:34 +00:00
+								                'ext': 'mp4',
-												[generic] Fix test_Generic_76

Broken: https://travis-ci.org/rg3/youtube-dl/jobs/140251658

											
										
										
											2016-06-26 03:54:52 +00:00
+								                'title': 'Sauvons les abeilles ! - Le débat',
 								                'description': 'md5:d9082128b1c5277987825d684939ca26',
-												[dailymotion/generic] Add DailymotionCloudIE

											
										
										
											2015-06-21 13:30:34 +00:00
+								                'thumbnail': 're:^https?://.*\.jpe?g$',
-												[generic] Fix test_Generic_76

Broken: https://travis-ci.org/rg3/youtube-dl/jobs/140251658

											
										
										
											2016-06-26 03:54:52 +00:00
+								                'timestamp': 1434970506,
 								                'upload_date': '20150622',
 								                'uploader': 'Public Sénat',
 								                'uploader_id': 'xa9gza',
-												[dailymotion/generic] Add DailymotionCloudIE

											
										
										
											2015-06-21 13:30:34 +00:00
+								            }
-												[generic/adobetv] Support AdobeTVVideo embeds (#6039)

											
										
										
											2015-06-22 07:02:53 +00:00
+								        },
-												[extractor/generic] Add test for OnionStudios embeds

											
										
										
											2015-06-24 17:23:16 +00:00
+								        # OnionStudios embed
 								        {
 								            'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
 								            'info_dict': {
 								                'id': '2855',
 								                'ext': 'mp4',
 								                'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
 								                'thumbnail': 're:^https?://.*\.jpe?g$',
 								                'uploader': 'ClickHole',
 								                'uploader_id': 'clickhole',
 								            }
 								        },
-												[extractor/generic] Add test for snagfilms embeds

											
										
										
											2015-06-27 12:28:10 +00:00
+								        # SnagFilms embed
 								        {
 								            'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
 								            'info_dict': {
 								                'id': '74849a00-85a9-11e1-9660-123139220831',
 								                'ext': 'mp4',
 								                'title': '#whilewewatch',
 								            }
 								        },
-												[generic/adobetv] Support AdobeTVVideo embeds (#6039)

											
										
										
											2015-06-22 07:02:53 +00:00
+								        # AdobeTVVideo embed
 								        {
 								            'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
 								            'md5': '43662b577c018ad707a63766462b1e87',
 								            'info_dict': {
 								                'id': '2456',
 								                'ext': 'mp4',
 								                'title': 'New experience with Acrobat DC',
 								                'description': 'New experience with Acrobat DC',
 								                'duration': 248.667,
 								            },
-												[generic] Add test for screenwavemedia embed

											
										
										
											2015-08-29 13:12:38 +00:00
+								        },
 								        # ScreenwaveMedia embed
 								        {
 								            'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
 								            'md5': '24ace5baba0d35d55c6810b51f34e9e0',
 								            'info_dict': {
 								                'id': 'cinemasnob-55d26273809dd',
 								                'ext': 'mp4',
 								                'title': 'cinemasnob',
 								            },
-												[brightcove] add support for brightcove in page embed(fixes #6824)

											
										
										
											2015-09-11 03:46:21 +00:00
+								        },
 								        # BrightcoveInPageEmbed embed
 								        {
 								            'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
 								            'info_dict': {
 								                'id': '4238694884001',
 								                'ext': 'flv',
 								                'title': 'Tabletop: Dread, Last Thoughts',
 								                'description': 'Tabletop: Dread, Last Thoughts',
 								                'duration': 51690,
 								            },
-												[generic] Extract M3U8 formats (closes #7582)

											
										
										
											2015-11-21 08:43:01 +00:00
+								        },
-												[generic] Add a test case for brightcove embed

Closes #8862

											
										
										
											2016-03-26 10:30:43 +00:00
+								        # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
 								        # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
 								        {
 								            'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
 								            'info_dict': {
 								                'id': '4785848093001',
 								                'ext': 'mp4',
 								                'title': 'The Cardinal Pell Interview',
 								                'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
 								                'uploader': 'GlobeCast Australia - GlobeStream',
-												[ThePlatform] Fix tests failed since 79ba9140dc8fcf5883b7473596e8f20cba6b479f

											
										
										
											2016-04-24 12:44:52 +00:00
+								                'uploader_id': '2733773828001',
 								                'upload_date': '20160304',
 								                'timestamp': 1457083087,
-												[generic] Add a test case for brightcove embed

Closes #8862

											
										
										
											2016-03-26 10:30:43 +00:00
+								            },
 								            'params': {
 								                # m3u8 downloads
 								                'skip_download': True,
 								            },
 								        },
-												[arte.tv:embed] Extended support (#2620)

											
										
										
											2016-04-11 11:17:11 +00:00
+								        # Another form of arte.tv embed
 								        {
 								            'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
 								            'md5': '850bfe45417ddf221288c88a0cffe2e2',
 								            'info_dict': {
 								                'id': '030273-562_PLUS7-F',
 								                'ext': 'mp4',
 								                'title': 'ARTE Reportage - Nulle part, en France',
 								                'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
 								                'upload_date': '20160409',
 								            },
 								        },
-												[generic] Add support for LiveLeak embeds

											
										
										
											2016-03-31 18:42:55 +00:00
+								        # LiveLeak embed
 								        {
 								            'url': 'http://www.wykop.pl/link/3088787/',
 								            'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
 								            'info_dict': {
 								                'id': '874_1459135191',
 								                'ext': 'mp4',
 								                'title': 'Man shows poor quality of new apartment building',
 								                'description': 'The wall is like a sand pile.',
 								                'uploader': 'Lake8737',
 								            }
 								        },
-												[genric] Eliminate duplicated video URLs (closes #6562)

											
										
										
											2016-05-22 14:22:27 +00:00
+								        # Duplicated embedded video URLs
 								        {
 								            'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
 								            'info_dict': {
 								                'id': '149298443_480_16c25b74_2',
 								                'ext': 'mp4',
 								                'title': 'vs. Blue Orange Spring Game',
 								                'uploader': 'www.hudl.com',
 								            },
 								        },
-												[extractor/generic] Change twitter:player embeds priority to lowest (Closes #10090)

											
										
										
											2016-07-16 08:59:43 +00:00
+								        # twitter:player:stream embed
 								        {
 								            'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
 								            'info_dict': {
 								                'id': 'master',
 								                'ext': 'mp4',
 								                'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
 								                'uploader': 'www.rtl.be',
 								            },
 								            'params': {
 								                # m3u8 downloads
 								                'skip_download': True,
 								            },
 								        },
-												[generic] add generic support for twitter:player embeds

											
										
										
											2016-06-30 11:01:30 +00:00
+								        # twitter:player embed
 								        {
 								            'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
 								            'md5': 'a3e0df96369831de324f0778e126653c',
 								            'info_dict': {
 								                'id': '4909620399001',
 								                'ext': 'mp4',
 								                'title': 'What Do Black Holes Sound Like?',
 								                'description': 'what do black holes sound like',
 								                'upload_date': '20160524',
 								                'uploader_id': '29913724001',
 								                'timestamp': 1464107587,
 								                'uploader': 'TheAtlantic',
 								            },
 								            'add_ie': ['BrightcoveLegacy'],
-												[facebook] Improve Facebook embedded detection

Related to #9938.

Another example comes from 9834872bf63b4e03b66c5e3b8f306556e735d8c5.

											
										
										
											2016-07-02 13:33:23 +00:00
+								        },
 								        # Facebook <iframe> embed
 								        {
 								            'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
-												[generic] Add MD5 checksums

											
										
										
											2016-07-02 13:57:06 +00:00
+								            'md5': 'fbcde74f534176ecb015849146dd3aee',
-												[facebook] Improve Facebook embedded detection

Related to #9938.

Another example comes from 9834872bf63b4e03b66c5e3b8f306556e735d8c5.

											
										
										
											2016-07-02 13:33:23 +00:00
+								            'info_dict': {
 								                'id': '599637780109885',
 								                'ext': 'mp4',
 								                'title': 'Facebook video #599637780109885',
 								            },
 								        },
 								        # Facebook API embed
 								        {
 								            'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
-												[generic] Add MD5 checksums

											
										
										
											2016-07-02 13:57:06 +00:00
+								            'md5': 'a47372ee61b39a7b90287094d447d94e',
-												[facebook] Improve Facebook embedded detection

Related to #9938.

Another example comes from 9834872bf63b4e03b66c5e3b8f306556e735d8c5.

											
										
										
											2016-07-02 13:33:23 +00:00
+								            'info_dict': {
 								                'id': '10153467542406923',
 								                'ext': 'mp4',
 								                'title': 'Facebook video #10153467542406923',
 								            },
-												[generic] Support Wordpress "YouTube Video Importer" plugin

Closes #9938

											
										
										
											2016-07-02 13:50:17 +00:00
+								        },
 								        # Wordpress "YouTube Video Importer" plugin
 								        {
 								            'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
-												[generic] Add MD5 checksums

											
										
										
											2016-07-02 13:57:06 +00:00
+								            'md5': 'd16797741b560b485194eddda8121b48',
-												[generic] Support Wordpress "YouTube Video Importer" plugin

Closes #9938

											
										
										
											2016-07-02 13:50:17 +00:00
+								            'info_dict': {
 								                'id': 'HNTXWDXV9Is',
 								                'ext': 'mp4',
 								                'title': 'Blue Devils Drumline Stanford lot 2016',
 								                'upload_date': '20160627',
 								                'uploader_id': 'GENOCIDE8GENERAL10',
 								                'uploader': 'cylus cyrus',
 								            },
 								        },
-												[kaltura] add support videos stored on custom kaltura servers(closes #5557)

											
										
										
											2016-07-04 16:57:44 +00:00
+								        {
 								            # video stored on custom kaltura server
 								            'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
 								            'md5': '537617d06e64dfed891fa1593c4b30cc',
 								            'info_dict': {
 								                'id': '0_1iotm5bh',
 								                'ext': 'mp4',
 								                'title': 'Elecciones británicas: 5 lecciones para Rajoy',
 								                'description': 'md5:435a89d68b9760b92ce67ed227055f16',
 								                'uploader_id': 'videos.expansion@el-mundo.net',
 								                'upload_date': '20150429',
 								                'timestamp': 1430303472,
 								            },
 								            'add_ie': ['Kaltura'],
 								        },
-												[generic] Add the test. Closes #1638

											
										
										
											2016-07-09 06:39:01 +00:00
+								        {
 								            # Non-standard Vimeo embed
 								            'url': 'https://openclassrooms.com/courses/understanding-the-web',
 								            'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
 								            'info_dict': {
 								                'id': '148867247',
 								                'ext': 'mp4',
 								                'title': 'Understanding the web - Teaser',
 								                'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
 								                'upload_date': '20151214',
 								                'uploader': 'OpenClassrooms',
 								                'uploader_id': 'openclassrooms',
 								            },
 								            'add_ie': ['Vimeo'],
 								        },
-												[extractor/generic] Add vimeo embed that requires Referer passed

											
										
										
											2016-09-12 14:49:31 +00:00
+								        {
 								            # generic vimeo embed that requires original URL passed as Referer
 								            'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
 								            'only_matching': True,
 								        },
-												[extractor/generic] Add support for arkena embeds

											
										
										
											2016-07-23 10:56:48 +00:00
+								        {
 								            'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
 								            'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
 								            'info_dict': {
 								                'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
 								                'ext': 'mp4',
 								                'title': 'Big Buck Bunny',
 								                'description': 'Royalty free test video',
 								                'timestamp': 1432816365,
 								                'upload_date': '20150528',
 								                'is_live': False,
 								            },
 								            'params': {
 								                'skip_download': True,
 								            },
 								            'add_ie': [ArkenaIE.ie_key()],
 								        },
-												[vbox7:generic] Add support for vbox7 embeds

											
										
										
											2016-08-16 18:02:59 +00:00
+								        {
 								            'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
 								            'info_dict': {
 								                'id': '1c7141f46c',
 								                'ext': 'mp4',
 								                'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
 								            },
 								            'params': {
 								                'skip_download': True,
 								            },
 								            'add_ie': [Vbox7IE.ie_key()],
 								        },
-												[DBTV:generic] Add support for embeds

											
										
										
											2016-08-17 10:45:24 +00:00
+								        {
 								            # DBTV embeds
 								            'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
-												[generic] Fix dbtv test (Closes #10364)

											
										
										
											2016-08-18 14:35:41 +00:00
+								            'info_dict': {
 								                'id': '43254897',
 								                'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
 								            },
-												[DBTV:generic] Add support for embeds

											
										
										
											2016-08-17 10:45:24 +00:00
+								            'playlist_mincount': 3,
 								        },
-												[extractor/generic] Properly comment out a test

											
										
										
											2016-07-09 01:32:55 +00:00
+								        # {
 								        #     # TODO: find another test
 								        #     # http://schema.org/VideoObject
 								        #     'url': 'https://flipagram.com/f/nyvTSJMKId',
 								        #     'md5': '888dcf08b7ea671381f00fab74692755',
 								        #     'info_dict': {
 								        #         'id': 'nyvTSJMKId',
 								        #         'ext': 'mp4',
 								        #         'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
 								        #         'description': '#love for cats.',
 								        #         'timestamp': 1461244995,
 								        #         'upload_date': '20160421',
 								        #     },
 								        #     'params': {
 								        #         'force_generic_extractor': True,
 								        #     },
 								        # }
-												GenericIE: Detect videos from Brightcove

Brightcove videos info is usually found in an <object class="BrightcoveExperience"></object> node, this is passed to a new method of BrightcoveIE that builds a url to extract the video.

											
										
										
											2013-07-10 15:49:11 +00:00
+								    ]
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
 								    def report_following_redirect(self, new_url):
 								        """Report information extraction."""
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								        self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
-												[generic] Add support for RSS feeds (Fixes #667)

											
										
										
											2014-02-20 12:14:05 +00:00
+								    def _extract_rss(self, url, video_id, doc):
 								        playlist_title = doc.find('./channel/title').text
 								        playlist_desc_el = doc.find('./channel/description')
 								        playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
-												[generic] Parse RSS enclosure URLs (Fixes #5091)

											
										
										
											2015-03-02 14:21:11 +00:00
+								        entries = []
 								        for it in doc.findall('./channel/item'):
 								            next_url = xpath_text(it, 'link', fatal=False)
 								            if not next_url:
 								                enclosure_nodes = it.findall('./enclosure')
 								                for e in enclosure_nodes:
 								                    next_url = e.attrib.get('url')
 								                    if next_url:
 								                        break
 								            if not next_url:
 								                continue
 								            entries.append({
 								                '_type': 'url',
 								                'url': next_url,
 								                'title': it.find('title').text,
 								            })
-												[generic] Add support for RSS feeds (Fixes #667)

											
										
										
											2014-02-20 12:14:05 +00:00
 								        return {
 								            '_type': 'playlist',
 								            'id': url,
 								            'title': playlist_title,
 								            'description': playlist_desc,
 								            'entries': entries,
 								        }
-												[generic] Add support for camtasia videos (Fixes #3574)

											
										
										
											2014-08-24 00:02:17 +00:00
+								    def _extract_camtasia(self, url, video_id, webpage):
 								        """ Returns None if no camtasia video can be found. """
 								        camtasia_cfg = self._search_regex(
 								            r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 								            webpage, 'camtasia configuration file', default=None)
 								        if camtasia_cfg is None:
 								            return None
 								        title = self._html_search_meta('DC.title', webpage, fatal=True)
 								        camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 								        camtasia_cfg = self._download_xml(
 								            camtasia_url, video_id,
 								            note='Downloading camtasia configuration',
 								            errnote='Failed to download camtasia configuration')
 								        fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 								        entries = []
 								        for n in fileset_node.getchildren():
 								            url_n = n.find('./uri')
 								            if url_n is None:
 								                continue
 								            entries.append({
 								                'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 								                'title': '%s - %s' % (title, n.tag),
 								                'url': compat_urlparse.urljoin(url, url_n.text),
 								                'duration': float_or_none(n.find('./duration').text),
 								            })
 								        return {
 								            '_type': 'playlist',
 								            'entries': entries,
 								            'title': title,
 								        }
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
+								    def _real_extract(self, url):
-												[generic] Add support for protocol-independent URLs (Fixes #2810)

											
										
										
											2014-04-29 23:46:06 +00:00
+								        if url.startswith('//'):
 								            return {
 								                '_type': 'url',
-												[soundcloud/generic] Add support for playlists

											
										
										
											2014-05-05 01:12:41 +00:00
+								                'url': self.http_scheme() + url,
-												[generic] Add support for protocol-independent URLs (Fixes #2810)

											
										
										
											2014-04-29 23:46:06 +00:00
+								            }
-												[generic] If the url doesn't specify the protocol, then try to extract prepending 'http://'

											
										
										
											2013-09-06 16:39:35 +00:00
+								        parsed_url = compat_urlparse.urlparse(url)
 								        if not parsed_url.scheme:
-												Add new --default-search option (#2193)

											
										
										
											2014-01-22 13:16:43 +00:00
+								            default_search = self._downloader.params.get('default_search')
 								            if default_search is None:
-												[generic] Add --default-search fixup_error

This restores the ability to enter URLs without a scheme (and default to http), but still fail if the input is a search term.

											
										
										
											2014-07-29 15:17:43 +00:00
+								                default_search = 'fixup_error'
-												Add new --default-search option (#2193)

											
										
										
											2014-01-22 13:16:43 +00:00
-												[generic] Add --default-search fixup_error

This restores the ability to enter URLs without a scheme (and default to http), but still fail if the input is a search term.

											
										
										
											2014-07-29 15:17:43 +00:00
+								            if default_search in ('auto', 'auto_warning', 'fixup_error'):
-												Add new --default-search option (#2193)

											
										
										
											2014-01-22 13:16:43 +00:00
+								                if '/' in url:
 								                    self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
 								                    return self.url_result('http://' + url)
-												[generic] Add --default-search fixup_error

This restores the ability to enter URLs without a scheme (and default to http), but still fail if the input is a search term.

											
										
										
											2014-07-29 15:17:43 +00:00
+								                elif default_search != 'fixup_error':
-												[generic] Warn before fallback to automatic search

											
										
										
											2014-03-30 13:57:31 +00:00
+								                    if default_search == 'auto_warning':
-												[generic] Abort if user passes in URL "url" (#2942)

											
										
										
											2014-05-19 15:10:11 +00:00
+								                        if re.match(r'^(?:url|URL)$', url):
 								                            raise ExtractorError(
 								                                'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
 								                                expected=True)
 								                        else:
 								                            self._downloader.report_warning(
-												[generic] Set default-search to error

This prevents users from submitting bug reports where they mistyped a URL, and prevents me from getting a weird video when holding shift and thus searching for :Tds

											
										
										
											2014-07-06 09:22:44 +00:00
+								                                'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
-												Add new --default-search option (#2193)

											
										
										
											2014-01-22 13:16:43 +00:00
+								                    return self.url_result('ytsearch:' + url)
-												[generic] Add --default-search fixup_error

This restores the ability to enter URLs without a scheme (and default to http), but still fail if the input is a search term.

											
										
										
											2014-07-29 15:17:43 +00:00
 								            if default_search in ('error', 'fixup_error'):
-												[generic] Set default-search to error

This prevents users from submitting bug reports where they mistyped a URL, and prevents me from getting a weird video when holding shift and thus searching for :Tds

											
										
										
											2014-07-06 09:22:44 +00:00
+								                raise ExtractorError(
-												Fix all PEP8 issues except E501

											
										
										
											2014-11-23 21:21:46 +00:00
+								                    '%r is not a valid URL. '
 								                    'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
 								                    % (url, url), expected=True)
-												Add new --default-search option (#2193)

											
										
										
											2014-01-22 13:16:43 +00:00
+								            else:
-												[generic] Allow --default-search without colon

											
										
										
											2014-10-23 19:13:45 +00:00
+								                if ':' not in default_search:
 								                    default_search += ':'
-												Add new --default-search option (#2193)

											
										
										
											2014-01-22 13:16:43 +00:00
+								                return self.url_result(default_search + url)
-												[ministrygrid] Add extractor (Fixes #2900)

											
										
										
											2014-08-24 02:47:18 +00:00
 								        url, smuggled_data = unsmuggle_url(url)
 								        force_videoid = None
-												[vimeo:likes] Add new extractor (Fixes #3835)

											
										
										
											2014-09-28 10:14:16 +00:00
+								        is_intentional = smuggled_data and smuggled_data.get('to_generic')
-												[ministrygrid] Add extractor (Fixes #2900)

											
										
										
											2014-08-24 02:47:18 +00:00
+								        if smuggled_data and 'force_videoid' in smuggled_data:
 								            force_videoid = smuggled_data['force_videoid']
 								            video_id = force_videoid
 								        else:
-												[generic,commonprotocols] Move mms suuport from GenericIE

And use _generic_* helpers in those extractors

											
										
										
											2016-10-07 11:22:30 +00:00
+								            video_id = self._generic_id(url)
-												[generic] Support direct MMS links (closes #10838)

											
										
										
											2016-10-07 09:50:45 +00:00
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								        self.to_screen('%s: Requesting header' % video_id)
-												[generic] Output something before making network requests

											
										
										
											2013-12-27 07:38:42 +00:00
-												[generic] Use default opener for HEAD request (Fixes #3528)

											
										
										
											2014-08-24 04:58:11 +00:00
+								        head_req = HEADRequest(url)
-												[generic] Handle audio streams that do not implement HEAD (Fixes #4032)

											
										
										
											2014-10-26 16:05:44 +00:00
+								        head_response = self._request_webpage(
-												[generic] Use default opener for HEAD request (Fixes #3528)

											
										
										
											2014-08-24 04:58:11 +00:00
+								            head_req, video_id,
 								            note=False, errnote='Could not send HEAD request to %s' % url,
 								            fatal=False)
-												Add support for direct links to a video (#1973)

											
										
										
											2013-12-17 11:33:55 +00:00
-												[generic] Handle audio streams that do not implement HEAD (Fixes #4032)

											
										
										
											2014-10-26 16:05:44 +00:00
+								        if head_response is not False:
-												Add support for direct links to a video (#1973)

											
										
										
											2013-12-17 11:33:55 +00:00
+								            # Check for redirect
-												[generic] Handle audio streams that do not implement HEAD (Fixes #4032)

											
										
										
											2014-10-26 16:05:44 +00:00
+								            new_url = head_response.geturl()
-												Add support for direct links to a video (#1973)

											
										
										
											2013-12-17 11:33:55 +00:00
+								            if url != new_url:
 								                self.report_following_redirect(new_url)
-												[ministrygrid] Add extractor (Fixes #2900)

											
										
										
											2014-08-24 02:47:18 +00:00
+								                if force_videoid:
 								                    new_url = smuggle_url(
 								                        new_url, {'force_videoid': force_videoid})
-												[generic] Do not use compatibility result fallback

											
										
										
											2013-12-17 11:04:33 +00:00
+								                return self.url_result(new_url)
-												Add support for direct links to a video (#1973)

											
										
										
											2013-12-17 11:33:55 +00:00
-												[generic] Handle audio streams that do not implement HEAD (Fixes #4032)

											
										
										
											2014-10-26 16:05:44 +00:00
+								        full_response = None
 								        if head_response is False:
-												Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8

											
										
										
											2015-11-21 16:18:17 +00:00
+								            request = sanitized_Request(url)
-												[extractor/generic] Force Accept-Encoding to any for extraction pass

											
										
										
											2015-05-30 18:44:54 +00:00
+								            request.add_header('Accept-Encoding', '*')
 								            full_response = self._request_webpage(request, video_id)
-												[generic] Handle audio streams that do not implement HEAD (Fixes #4032)

											
										
										
											2014-10-26 16:05:44 +00:00
+								            head_response = full_response
-												[extractor/generic] Extract f4m formats and refactor common info

											
										
										
											2016-03-12 21:17:25 +00:00
+								        info_dict = {
 								            'id': video_id,
-												[generic,commonprotocols] Move mms suuport from GenericIE

And use _generic_* helpers in those extractors

											
										
										
											2016-10-07 11:22:30 +00:00
+								            'title': self._generic_title(url),
-												[extractor/generic] Simplify upload_date extraction

											
										
										
											2016-03-18 16:41:16 +00:00
+								            'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
-												[extractor/generic] Extract f4m formats and refactor common info

											
										
										
											2016-03-12 21:17:25 +00:00
+								        }
-												[generic] Handle audio streams that do not implement HEAD (Fixes #4032)

											
										
										
											2014-10-26 16:05:44 +00:00
+								        # Check for direct link to a video
-												[extractor/generic] Force Content-Type to lowecase

											
										
										
											2016-03-18 15:50:44 +00:00
+								        content_type = head_response.headers.get('Content-Type', '').lower()
-												[extractor/generic] Properly extract format id from Content-Type

Fixes extraction for cases like: audio/x-mpegURL; charset=utf-8

											
										
										
											2016-03-18 15:50:10 +00:00
+								        m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
-												[generic] Handle audio streams that do not implement HEAD (Fixes #4032)

											
										
										
											2014-10-26 16:05:44 +00:00
+								        if m:
-												[extractor/generic] Extract f4m formats and refactor common info

											
										
										
											2016-03-12 21:17:25 +00:00
+								            format_id = m.group('format_id')
 								            if format_id.endswith('mpegurl'):
-												[generic] extract m3u8 formats when mpegurl content type detected

											
										
										
											2016-02-04 00:25:36 +00:00
+								                formats = self._extract_m3u8_formats(url, video_id, 'mp4')
-												[extractor/generic] Extract f4m formats and refactor common info

											
										
										
											2016-03-12 21:17:25 +00:00
+								            elif format_id == 'f4m':
 								                formats = self._extract_f4m_formats(url, video_id)
-												[generic] extract m3u8 formats when mpegurl content type detected

											
										
										
											2016-02-04 00:25:36 +00:00
+								            else:
 								                formats = [{
 								                    'format_id': m.group('format_id'),
 								                    'url': url,
 								                    'vcodec': 'none' if m.group('type') == 'audio' else None
 								                }]
-												[extractor/generic] Fix direct link semantics

											
										
										
											2016-03-18 16:43:07 +00:00
+								                info_dict['direct'] = True
-												Remove _sort_formats from _extract_*_formats methods

Now _sort_formats should be called explicitly.
_sort_formats has been added to all the necessary places in code.

Closes #8051

											
										
										
											2016-03-27 01:03:08 +00:00
+								            self._sort_formats(formats)
-												[extractor/generic] Fix direct link semantics

											
										
										
											2016-03-18 16:43:07 +00:00
+								            info_dict['formats'] = formats
-												[extractor/generic] Extract f4m formats and refactor common info

											
										
										
											2016-03-12 21:17:25 +00:00
+								            return info_dict
-												Add support for direct links to a video (#1973)

											
										
										
											2013-12-17 11:33:55 +00:00
-												[vimeo:likes] Add new extractor (Fixes #3835)

											
										
										
											2014-09-28 10:14:16 +00:00
+								        if not self._downloader.params.get('test', False) and not is_intentional:
-												[extractor/generic] Clarify generic extraction warning

											
										
										
											2015-06-23 21:08:24 +00:00
+								            force = self._downloader.params.get('force_generic_extractor', False)
 								            self._downloader.report_warning(
 								                '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
-												[vimeo:likes] Add new extractor (Fixes #3835)

											
										
										
											2014-09-28 10:14:16 +00:00
-												[generic] Detect direct video links (Fixes #4149, #4313)

											
										
										
											2014-11-26 09:44:39 +00:00
+								        if not full_response:
-												Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8

											
										
										
											2015-11-21 16:18:17 +00:00
+								            request = sanitized_Request(url)
-												[extractor/generic] Force Accept-Encoding to any for extraction pass

											
										
										
											2015-05-30 18:44:54 +00:00
+								            # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
 								            # making it impossible to download only chunk of the file (yet we need only 512kB to
 								            # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
 								            # that will always result in downloading the whole file that is not desirable.
 								            # Therefore for extraction pass we have to override Accept-Encoding to any in order
 								            # to accept raw bytes and being able to download only a chunk.
 								            # It may probably better to solve this by checking Content-Type for application/octet-stream
 								            # after HEAD request finishes, but not sure if we can rely on this.
 								            request.add_header('Accept-Encoding', '*')
 								            full_response = self._request_webpage(request, video_id)
-												[generic] Detect direct video links (Fixes #4149, #4313)

											
										
										
											2014-11-26 09:44:39 +00:00
-												[extractor/generic] Detect m3u playlists served without proper Content-Type

											
										
										
											2016-03-18 16:45:28 +00:00
+								        first_bytes = full_response.read(512)
 								        # Is it an M3U playlist?
-												[extractor/generic] Fix missing byte literal prefix

											
										
										
											2016-03-18 23:43:43 +00:00
+								        if first_bytes.startswith(b'#EXTM3U'):
-												[extractor/generic] Detect m3u playlists served without proper Content-Type

											
										
										
											2016-03-18 16:45:28 +00:00
+								            info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
-												Remove _sort_formats from _extract_*_formats methods

Now _sort_formats should be called explicitly.
_sort_formats has been added to all the necessary places in code.

Closes #8051

											
										
										
											2016-03-27 01:03:08 +00:00
+								            self._sort_formats(info_dict['formats'])
-												[extractor/generic] Detect m3u playlists served without proper Content-Type

											
										
										
											2016-03-18 16:45:28 +00:00
+								            return info_dict
-												[generic] Detect direct video links (Fixes #4149, #4313)

											
										
										
											2014-11-26 09:44:39 +00:00
+								        # Maybe it's a direct link to a video?
 								        # Be careful not to download the whole thing!
-												[generic] Add support for BOMs (Fixes #4753)

											
										
										
											2015-01-23 00:21:30 +00:00
+								        if not is_html(first_bytes):
-												[generic] Detect direct video links (Fixes #4149, #4313)

											
										
										
											2014-11-26 09:44:39 +00:00
+								            self._downloader.report_warning(
 								                'URL could be a direct video link, returning it as such.')
-												[extractor/generic] Extract f4m formats and refactor common info

											
										
										
											2016-03-12 21:17:25 +00:00
+								            info_dict.update({
-												[generic] Detect direct video links (Fixes #4149, #4313)

											
										
										
											2014-11-26 09:44:39 +00:00
+								                'direct': True,
 								                'url': url,
-												[extractor/generic] Extract f4m formats and refactor common info

											
										
										
											2016-03-12 21:17:25 +00:00
+								            })
 								            return info_dict
-												[generic] Detect direct video links (Fixes #4149, #4313)

											
										
										
											2014-11-26 09:44:39 +00:00
 								        webpage = self._webpage_read_content(
 								            full_response, url, video_id, prefix=first_bytes)
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
+								        self.report_extraction(video_id)
-												Support multiple embedded YouTube URLs (Fixes #1787)

											
										
										
											2013-11-18 12:28:26 +00:00
-												[extractor/generic] Detect DASH manifests and extract mpd formats

											
										
										
											2016-02-06 13:35:32 +00:00
+								        # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
-												[generic] Add support for RSS feeds (Fixes #667)

											
										
										
											2014-02-20 12:14:05 +00:00
+								        try:
-												[compat] compat_etree_fromstring: also decode the text attribute

Deletes parse_xml from utils, because it also does it.

											
										
										
											2015-10-26 15:41:24 +00:00
+								            doc = compat_etree_fromstring(webpage.encode('utf-8'))
-												[generic] Add support for RSS feeds (Fixes #667)

											
										
										
											2014-02-20 12:14:05 +00:00
+								            if doc.tag == 'rss':
 								                return self._extract_rss(url, video_id, doc)
-												[extractor/generic] Add support for ISM manifests

											
										
										
											2016-11-01 20:01:13 +00:00
+								            elif doc.tag == 'SmoothStreamingMedia':
 								                info_dict['formats'] = self._parse_ism_formats(doc, url)
 								                self._sort_formats(info_dict['formats'])
 								                return info_dict
-												[extractor/generic] Improve generic SMIL detection

											
										
										
											2015-08-01 19:13:59 +00:00
+								            elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
-												Remove _sort_formats from _extract_*_formats methods

Now _sort_formats should be called explicitly.
_sort_formats has been added to all the necessary places in code.

Closes #8051

											
										
										
											2016-03-27 01:03:08 +00:00
+								                smil = self._parse_smil(doc, url, video_id)
 								                self._sort_formats(smil['formats'])
 								                return smil
-												[extractor/generic] Add support for xspf playlists

											
										
										
											2015-08-09 13:43:42 +00:00
+								            elif doc.tag == '{http://xspf.org/ns/0/}playlist':
 								                return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
-												[extractor/generic] Detect DASH manifests and extract mpd formats

											
										
										
											2016-02-06 13:35:32 +00:00
+								            elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
-												[extractor/generic] Extract f4m formats and refactor common info

											
										
										
											2016-03-12 21:17:25 +00:00
+								                info_dict['formats'] = self._parse_mpd_formats(
-												Refactor fragments interface and dash segments downloader
- Eliminate segment_urls and initialization_url
+ Introduce manifest_url (manifest may contain unfragmented data in this case url will be used for direct media URL and manifest_url for manifest itself correspondingly)
* Rewrite dashsegments downloader to use fragments data
* Improve generic mpd extraction

											
										
										
											2016-09-17 13:35:22 +00:00
+								                    doc, video_id,
 								                    mpd_base_url=full_response.geturl().rpartition('/')[0],
 								                    mpd_url=url)
-												Remove _sort_formats from _extract_*_formats methods

Now _sort_formats should be called explicitly.
_sort_formats has been added to all the necessary places in code.

Closes #8051

											
										
										
											2016-03-27 01:03:08 +00:00
+								                self._sort_formats(info_dict['formats'])
-												[extractor/generic] Extract f4m formats and refactor common info

											
										
										
											2016-03-12 21:17:25 +00:00
+								                return info_dict
 								            elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
 								                info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
-												Remove _sort_formats from _extract_*_formats methods

Now _sort_formats should be called explicitly.
_sort_formats has been added to all the necessary places in code.

Closes #8051

											
										
										
											2016-03-27 01:03:08 +00:00
+								                self._sort_formats(info_dict['formats'])
-												[extractor/generic] Extract f4m formats and refactor common info

											
										
										
											2016-03-12 21:17:25 +00:00
+								                return info_dict
-												[generic] Fix on python 2.6

`ParseError` is not available, it raises `xml.parsers.expat.ExpatError`.
The webpage needs to be encoded.

											
										
										
											2014-02-21 15:59:10 +00:00
+								        except compat_xml_parse_error:
-												[generic] Add support for RSS feeds (Fixes #667)

											
										
										
											2014-02-20 12:14:05 +00:00
+								            pass
-												[generic] Add support for camtasia videos (Fixes #3574)

											
										
										
											2014-08-24 00:02:17 +00:00
+								        # Is it a Camtasia project?
 								        camtasia_res = self._extract_camtasia(url, video_id, webpage)
 								        if camtasia_res is not None:
 								            return camtasia_res
-												[generic] Add comment for unescaping webpage contents

											
										
										
											2014-03-14 21:38:49 +00:00
+								        # Sometimes embedded video player is hidden behind percent encoding
 								        # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
 								        # Unescaping the whole page allows to handle those cases in a generic way
-												Generic: use compat_urllib_parse_unquote to prevent utf8 mangling
of the entire page in python 2.

-requires- fixed compat_urllib_parse_unquote

example - the following will save with a mangled playlist title,
 instead of the kanji for 'tsunami'. This affects all utf8encoded
 urls as well

youtube-dl -f18 -o '%(playlist_title)s-%(title)s.%(ext)s' \
  https://gist.githubusercontent.com/atomicdryad/fcb97465e6060fc519e1/raw/61c14c1e3a4985471dcf56c281d24d7e781a4e0e/tsunami.html

											
										
										
											2015-07-15 20:30:47 +00:00
+								        webpage = compat_urllib_parse_unquote(webpage)
-												[generic] Unescape webpage contents
											
										
										
											2014-02-24 16:44:31 +00:00
-												Support multiple embedded YouTube URLs (Fixes #1787)

											
										
										
											2013-11-18 12:28:26 +00:00
+								        # it's tempting to parse this further, but you would
 								        # have to take into account all the variations like
 								        #   Video Title - Site Name
 								        #   Site Name | Video Title
 								        #   Video Title - Tagline | Site Name
 								        # and so on and so forth; it's just not practical
-												[extractor/generic] Improve 3qsdn embeds support (Closes #9453)

											
										
										
											2016-05-14 17:58:25 +00:00
+								        video_title = self._og_search_title(
 								            webpage, default=None) or self._html_search_regex(
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								            r'(?s)<title>(.*?)</title>', webpage, 'video title',
 								            default='video')
-												[wistia] Add extractor

											
										
										
											2013-12-06 08:15:04 +00:00
-												[generic] Automatic detection of flow player and age_limit (Fixes #3576)

											
										
										
											2014-08-24 03:31:32 +00:00
+								        # Try to detect age limit automatically
 								        age_limit = self._rta_search(webpage)
 								        # And then there are the jokers who advertise that they use RTA,
 								        # but actually don't.
 								        AGE_LIMIT_MARKERS = [
 								            r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
 								        ]
 								        if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
 								            age_limit = 18
-												[wistia] Add extractor

											
										
										
											2013-12-06 08:15:04 +00:00
+								        # video uploader is domain name
 								        video_uploader = self._search_regex(
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								            r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
-												Support multiple embedded YouTube URLs (Fixes #1787)

											
										
										
											2013-11-18 12:28:26 +00:00
-												[extractor/generic] Improve 3qsdn embeds support (Closes #9453)

											
										
										
											2016-05-14 17:58:25 +00:00
+								        video_description = self._og_search_description(webpage, default=None)
 								        video_thumbnail = self._og_search_thumbnail(webpage, default=None)
-												[generic] Simplify playlist support (#2948)

											
										
										
											2014-08-22 16:19:56 +00:00
+								        # Helper method
-												[generic] Make getter None by default

											
										
										
											2015-01-02 14:54:30 +00:00
+								        def _playlist_from_matches(matches, getter=None, ie=None):
-												[generic] Allow embedded YoutubePlaylists (Fixes #3821)

											
										
										
											2014-09-24 09:05:14 +00:00
+								            urlrs = orderedSet(
-												[generic] Make getter None by default

											
										
										
											2015-01-02 14:54:30 +00:00
+								                self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
-												[generic] Allow embedded YoutubePlaylists (Fixes #3821)

											
										
										
											2014-09-24 09:05:14 +00:00
+								                for m in matches)
-												[generic] Simplify playlist support (#2948)

											
										
										
											2014-08-22 16:19:56 +00:00
+								            return self.playlist_result(
 								                urlrs, playlist_id=video_id, playlist_title=video_title)
-												[generic] Clarify Brightcove Legacy Studio comment

											
										
										
											2015-11-14 00:03:32 +00:00
+								        # Look for Brightcove Legacy Studio embeds
-												[brightcove] Rename extractor to brightcove legacy

Old embedding approaches are now "Legacy Studio"

											
										
										
											2015-11-13 23:54:16 +00:00
+								        bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
-												[generic] Add support for multiple brightcove URLs (Fixes #2283)

											
										
										
											2014-02-03 14:19:40 +00:00
+								        if bc_urls:
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											2014-01-06 00:47:52 +00:00
+								            self.to_screen('Brightcove video detected.')
-												[generic] Add support for multiple brightcove URLs (Fixes #2283)

											
										
										
											2014-02-03 14:19:40 +00:00
+								            entries = [{
 								                '_type': 'url',
 								                'url': smuggle_url(bc_url, {'Referer': url}),
-												Rename all references to legacy studio Brightcove extractor

											
										
										
											2015-11-14 00:05:46 +00:00
+								                'ie_key': 'BrightcoveLegacy'
-												[generic] Add support for multiple brightcove URLs (Fixes #2283)

											
										
										
											2014-02-03 14:19:40 +00:00
+								            } for bc_url in bc_urls]
 								            return {
 								                '_type': 'playlist',
 								                'title': video_title,
 								                'id': video_id,
 								                'entries': entries,
 								            }
-												GenericIE: Detect videos from Brightcove

Brightcove videos info is usually found in an <object class="BrightcoveExperience"></object> node, this is passed to a new method of BrightcoveIE that builds a url to extract the video.

											
										
										
											2013-07-10 15:49:11 +00:00
-												[generic] Extract Brightcove New Studio embeds

											
										
										
											2015-11-14 00:03:07 +00:00
+								        # Look for Brightcove New Studio embeds
 								        bc_urls = BrightcoveNewIE._extract_urls(webpage)
 								        if bc_urls:
 								            return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
-												[brightcove] add support for brightcove in page embed(fixes #6824)

											
										
										
											2015-09-11 03:46:21 +00:00
-												[extractor/generic] Add support for theplatform embeds (Closes #8636, closes #9476)

											
										
										
											2016-05-22 00:52:39 +00:00
+								        # Look for ThePlatform embeds
 								        tp_urls = ThePlatformIE._extract_urls(webpage)
 								        if tp_urls:
 								            return _playlist_from_matches(tp_urls, ie='ThePlatform')
-												[extractor/generic] Add support vessel embeds (Closes #7083)

											
										
										
											2016-06-08 21:02:27 +00:00
+								        # Look for Vessel embeds
 								        vessel_urls = VesselIE._extract_urls(webpage)
 								        if vessel_urls:
 								            return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
-												[rtlnl|generic] Add support for rtl.nl embeds (Fixes #4959)

											
										
										
											2015-02-16 14:45:01 +00:00
+								        # Look for embedded rtl.nl player
 								        matches = re.findall(
-												[generic] Improve rtl.nl embeds detection (Closes #5950)

											
										
										
											2015-06-11 13:04:12 +00:00
+								            r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
-												[rtlnl|generic] Add support for rtl.nl embeds (Fixes #4959)

											
										
										
											2015-02-16 14:45:01 +00:00
+								            webpage)
 								        if matches:
 								            return _playlist_from_matches(matches, ie='RtlNl')
-												[generic] Add support for multiple vimeo embeds (Closes #10862)

											
										
										
											2016-10-06 16:22:52 +00:00
+								        vimeo_urls = VimeoIE._extract_urls(url, webpage)
 								        if vimeo_urls:
 								            return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
-												[vimeo/generic] Add support for embedded SWF vimeo videos

											
										
										
											2013-12-22 02:34:13 +00:00
-												[extractor/generic] Support vid.me embeds

											
										
										
											2015-07-20 10:49:53 +00:00
+								        vid_me_embed_url = self._search_regex(
 								            r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
 								            webpage, 'vid.me embed', default=None)
 								        if vid_me_embed_url is not None:
 								            return self.url_result(vid_me_embed_url, 'Vidme')
-												Check for embedded YouTube player (Fixes #1616)

											
										
										
											2013-10-18 09:44:57 +00:00
+								        # Look for embedded YouTube player
-												[generic] Support YouTube swf embed (Fixes #2010)

											
										
										
											2013-12-19 19:44:30 +00:00
+								        matches = re.findall(r'''(?x)
-												[generic] Add support for <embed YouTube

											
										
										
											2014-06-09 20:06:45 +00:00
+								            (?:
 								                <iframe[^>]+?src=|
-												Merge remote-tracking branch 'anovicecodemonkey/generic-data-video-url'

Conflicts:
	youtube_dl/extractor/generic.py

											
										
										
											2014-08-22 15:40:36 +00:00
+								                data-video-url=|
-												[generic] Add support for <embed YouTube

											
										
										
											2014-06-09 20:06:45 +00:00
+								                <embed[^>]+?src=|
-												[generic] Allow new SWFObject()-style imports

This embed style is used on http://www.bitburger-open.de/ , but that is not included as a test case since the format is likely to be temporary.

											
										
										
											2014-10-26 13:15:48 +00:00
+								                embedSWF\(?:\s*|
 								                new\s+SWFObject\(
-												[generic] Add support for <embed YouTube

											
										
										
											2014-06-09 20:06:45 +00:00
+								            )
 								            (["\'])
-												[generic/youtube] Recognize youtube nocookie embeds (Closes #3713)

											
										
										
											2014-09-10 11:29:20 +00:00
+								                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
-												[youtube] Support for embedded /p players (Fixes #3821)

											
										
										
											2014-09-24 23:58:49 +00:00
+								                (?:embed|v|p)/.+?)
-												[generic] Support YouTube swf embed (Fixes #2010)

											
										
										
											2013-12-19 19:44:30 +00:00
+								            \1''', webpage)
-												Support multiple embedded YouTube URLs (Fixes #1787)

											
										
										
											2013-11-18 12:28:26 +00:00
+								        if matches:
-												[generic] Simplify playlist support (#2948)

											
										
										
											2014-08-22 16:19:56 +00:00
+								            return _playlist_from_matches(
-												[generic] Allow embedded YoutubePlaylists (Fixes #3821)

											
										
										
											2014-09-24 09:05:14 +00:00
+								                matches, lambda m: unescapeHTML(m[1]))
-												Check for embedded YouTube player (Fixes #1616)

											
										
										
											2013-10-18 09:44:57 +00:00
-												[generic] Add support for LazyYT embeds (Fixes #4306)

											
										
										
											2014-11-25 13:34:19 +00:00
+								        # Look for lazyYT YouTube embed
 								        matches = re.findall(
 								            r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
 								        if matches:
 								            return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
-												[generic] Support Wordpress "YouTube Video Importer" plugin

Closes #9938

											
										
										
											2016-07-02 13:50:17 +00:00
+								        # Look for Wordpress "YouTube Video Importer" plugin
 								        matches = re.findall(r'''(?x)<div[^>]+
 								            class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
 								            data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
 								        if matches:
 								            return _playlist_from_matches(matches, lambda m: m[-1])
-												[francetv] Recognize more Dailymotion embedded videos

Closes #9955

											
										
										
											2016-07-06 15:37:54 +00:00
+								        matches = DailymotionIE._extract_urls(webpage)
-												[generic] Find embedded dailymotion videos (Fixes #1848)

											
										
										
											2013-12-01 00:21:33 +00:00
+								        if matches:
-												[francetv] Recognize more Dailymotion embedded videos

Closes #9955

											
										
										
											2016-07-06 15:37:54 +00:00
+								            return _playlist_from_matches(matches)
-												[generic] Find embedded dailymotion videos (Fixes #1848)

											
										
										
											2013-12-01 00:21:33 +00:00
-												[generic] Support embedded Dailymotion playlists (fixes #3822)

											
										
										
											2014-10-02 18:42:45 +00:00
+								        # Look for embedded Dailymotion playlist player (#3822)
 								        m = re.search(
 								            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
 								        if m:
 								            playlists = re.findall(
 								                r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
 								            if playlists:
 								                return _playlist_from_matches(
 								                    playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
-												[wistia] Add extractor

											
										
										
											2013-12-06 08:15:04 +00:00
+								        # Look for embedded Wistia player
 								        match = re.search(
-												[generic/wistia] Improve regex

											
										
										
											2014-10-23 16:03:07 +00:00
+								            r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
-												[wistia] Add extractor

											
										
										
											2013-12-06 08:15:04 +00:00
+								        if match:
-												[generic] Make sure Wistia embed URLs contain the protocol (Closes #3977)

Also, improve detection (Addresses #3662)

											
										
										
											2014-10-17 22:52:55 +00:00
+								            embed_url = self._proto_relative_url(
 								                unescapeHTML(match.group('url')))
-												[wistia] Add extractor

											
										
										
											2013-12-06 08:15:04 +00:00
+								            return {
 								                '_type': 'url_transparent',
-												[generic] Make sure Wistia embed URLs contain the protocol (Closes #3977)

Also, improve detection (Addresses #3662)

											
										
										
											2014-10-17 22:52:55 +00:00
+								                'url': embed_url,
-												[wistia] Add extractor

											
										
										
											2013-12-06 08:15:04 +00:00
+								                'ie_key': 'Wistia',
 								                'uploader': video_uploader,
 								            }
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												[generic] Make sure Wistia embed URLs contain the protocol (Closes #3977)

Also, improve detection (Addresses #3662)

											
										
										
											2014-10-17 22:52:55 +00:00
+								        match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
-												[wistia] Use API and make more generic

											
										
										
											2014-09-20 00:02:11 +00:00
+								        if match:
 								            return {
 								                '_type': 'url_transparent',
-												[extractor/generic] Remove generic id and title from wistia extractionand update tests

											
										
										
											2016-05-20 15:55:35 +00:00
+								                'url': 'wistia:%s' % match.group('id'),
-												[wistia] Use API and make more generic

											
										
										
											2014-09-20 00:02:11 +00:00
+								                'ie_key': 'Wistia',
 								                'uploader': video_uploader,
 								            }
-												[wistia] Add extractor

											
										
										
											2013-12-06 08:15:04 +00:00
-												[extractor/generic] Add support for async wistia embeds (Closes #9549)

											
										
										
											2016-05-20 15:33:31 +00:00
+								        match = re.search(
 								            r'''(?sx)
 								                <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
 								                <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
 								            ''', webpage)
 								        if match:
 								            return self.url_result(self._proto_relative_url(
 								                'wistia:%s' % match.group('id')), 'Wistia')
-												[extractor/generic] Add support for svt embeds (Closes #5622)

											
										
										
											2015-05-08 18:23:35 +00:00
+								        # Look for SVT player
 								        svt_url = SVTIE._extract_url(webpage)
 								        if svt_url:
 								            return self.url_result(svt_url, 'SVT')
-												[condenast|generic] Add support for condenast embeds (Fixes #2783)

											
										
										
											2014-04-21 03:47:52 +00:00
+								        # Look for embedded condenast player
 								        matches = re.findall(
 								            r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
 								            webpage)
 								        if matches:
 								            return {
 								                '_type': 'playlist',
 								                'entries': [{
 								                    '_type': 'url',
 								                    'ie_key': 'CondeNast',
 								                    'url': ma,
 								                } for ma in matches],
 								                'title': video_title,
 								                'id': video_id,
 								            }
-												[generic] Detect bandcamp pages that use custom domains (closes #1662)

They embed the original url in the 'og:url' property.

											
										
										
											2013-10-27 13:40:25 +00:00
+								        # Look for Bandcamp pages with custom domain
 								        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
 								        if mobj is not None:
 								            burl = unescapeHTML(mobj.group(1))
-												[bandcamp] add support for albums (reported in #1270)

											
										
										
											2013-11-22 15:05:14 +00:00
+								            # Don't set the extractor because it can be a track url or an album
 								            return self.url_result(burl)
-												[generic] Detect bandcamp pages that use custom domains (closes #1662)

They embed the original url in the 'og:url' property.

											
										
										
											2013-10-27 13:40:25 +00:00
-												Add support for embedded vevo player (Fixes #1957)

											
										
										
											2013-12-16 20:45:21 +00:00
+								        # Look for embedded Vevo player
 								        mobj = re.search(
 								            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'))
-												fixed viddler support - needed a Referer header; also added a viddler
generic extractor

											
										
										
											2015-01-28 05:08:19 +00:00
 								        # Look for embedded Viddler player
-												[generic] Improve some regexes

											
										
										
											2015-01-28 17:07:37 +00:00
+								        mobj = re.search(
 								            r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
 								            webpage)
-												fixed viddler support - needed a Referer header; also added a viddler
generic extractor

											
										
										
											2015-01-28 05:08:19 +00:00
+								        if mobj is not None:
 								            return self.url_result(mobj.group('url'))
-												Add support for embedded vevo player (Fixes #1957)

											
										
										
											2013-12-16 20:45:21 +00:00
-												[generic] Add support for nytimes embeds (Closes #5234)

											
										
										
											2015-03-19 15:26:57 +00:00
+								        # Look for NYTimes player
 								        mobj = re.search(
 								            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
 								            webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'))
-												[extractor/generic] Support Libsyn embeds

											
										
										
											2015-03-22 02:18:13 +00:00
+								        # Look for Libsyn player
 								        mobj = re.search(
 								            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'))
-												[generic] Detect ooyala videos (fixes #2013)

											
										
										
											2013-12-19 19:28:52 +00:00
+								        # Look for Ooyala videos
-												[extractor/generic] Expand ooyala regex (Closes #6485)

											
										
										
											2015-08-07 19:55:59 +00:00
+								        mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
-												[generic/ooyala] Add support for Ooyala embeds on SBN network websites (Fixes #4859)

											
										
										
											2015-02-04 13:33:37 +00:00
+								                re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
-												[generic] Support another type of Ooyala embedded video

											
										
										
											2015-04-14 04:45:43 +00:00
+								                re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
 								                re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
-												[generic] Detect ooyala videos (fixes #2013)

											
										
										
											2013-12-19 19:28:52 +00:00
+								        if mobj is not None:
-												[ooyala] extract domain,handle errors and change related tests

											
										
										
											2015-10-16 15:02:40 +00:00
+								            return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
-												[generic] Detect ooyala videos (fixes #2013)

											
										
										
											2013-12-19 19:28:52 +00:00
-												[generic/ooyala] Add support for Ooyala embeds on SBN network websites (Fixes #4859)

											
										
										
											2015-02-04 13:33:37 +00:00
+								        # Look for multiple Ooyala embeds on SBN network websites
 								        mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
 								        if mobj is not None:
 								            embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
 								            if embeds:
 								                return _playlist_from_matches(
-												[ooyala] extract domain,handle errors and change related tests

											
										
										
											2015-10-16 15:02:40 +00:00
+								                    embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
-												[generic/ooyala] Add support for Ooyala embeds on SBN network websites (Fixes #4859)

											
										
										
											2015-02-04 13:33:37 +00:00
-												[aparat] Add support (Fixes #2012)

											
										
										
											2013-12-20 16:05:28 +00:00
+								        # Look for Aparat videos
-												[generic] Be more relaxed when looking for aparat embeds (Fixes #2784)

											
										
										
											2014-04-21 10:37:41 +00:00
+								        mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
-												[aparat] Add support (Fixes #2012)

											
										
										
											2013-12-20 16:05:28 +00:00
+								        if mobj is not None:
 								            return self.url_result(mobj.group(1), 'Aparat')
-												[mpora] Add support (Fixes #2096)

											
										
										
											2014-01-07 07:07:46 +00:00
+								        # Look for MPORA videos
-												Improve some regexes for embedded players
											
										
										
											2014-01-29 21:26:46 +00:00
+								        mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
-												[mpora] Add support (Fixes #2096)

											
										
										
											2014-01-07 07:07:46 +00:00
+								        if mobj is not None:
 								            return self.url_result(mobj.group(1), 'Mpora')
-												[novamov] Remove superfluous tabs
											
										
										
											2014-01-08 01:11:46 +00:00
-												[generic] Generalize novamov based embeds

											
										
										
											2014-04-05 10:20:05 +00:00
+								        # Look for embedded NovaMov-based player
-												[novamov] Add embedded player support

											
										
										
											2014-01-08 01:07:11 +00:00
+								        mobj = re.search(
-												[generic] Support pagespeed_iframe for NovaMov embeds

											
										
										
											2014-05-17 11:12:12 +00:00
+								            r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
-												[generic] Generalize novamov based embeds

											
										
										
											2014-04-05 10:20:05 +00:00
+								                    (?P<url>http://(?:(?:embed|www)\.)?
 								                        (?:novamov\.com|
 								                           nowvideo\.(?:ch|sx|eu|at|ag|co)|
 								                           videoweed\.(?:es|com)|
 								                           movshare\.(?:net|sx|ag)|
 								                           divxstage\.(?:eu|net|ch|co|at|ag))
 								                        /embed\.php.+?)\1''', webpage)
-												[novamov] Add embedded player support

											
										
										
											2014-01-08 01:07:11 +00:00
+								        if mobj is not None:
-												[generic] Generalize novamov based embeds

											
										
										
											2014-04-05 10:20:05 +00:00
+								            return self.url_result(mobj.group('url'))
-												[generic] Add support for videoweed embeds

											
										
										
											2014-04-05 08:49:45 +00:00
-												[facebook] Add support for embeds

Example URL: http://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html

											
										
										
											2014-01-21 17:10:14 +00:00
+								        # Look for embedded Facebook player
-												[facebook] Improve Facebook embedded detection

Related to #9938.

Another example comes from 9834872bf63b4e03b66c5e3b8f306556e735d8c5.

											
										
										
											2016-07-02 13:33:23 +00:00
+								        facebook_url = FacebookIE._extract_url(webpage)
 								        if facebook_url is not None:
 								            return self.url_result(facebook_url, 'Facebook')
-												[facebook] Add support for embeds

Example URL: http://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html

											
										
										
											2014-01-21 17:10:14 +00:00
-												[vk] Add support for embedded videos (Closes #2473)
											
										
										
											2014-02-28 16:51:54 +00:00
+								        # Look for embedded VK player
 								        mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'VK')
-												[extractor/generic] Add support for ok embeds (#8619)

											
										
										
											2016-02-21 03:51:54 +00:00
+								        # Look for embedded Odnoklassniki player
 								        mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Odnoklassniki')
-												[generic] Add support for ivi.ru embedded player

											
										
										
											2014-06-29 13:18:23 +00:00
+								        # Look for embedded ivi player
 								        mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Ivi')
-												[huffpost] Add support

											
										
										
											2014-01-27 04:47:30 +00:00
+								        # Look for embedded Huffington Post player
 								        mobj = re.search(
-												Improve some regexes for embedded players
											
										
										
											2014-01-29 21:26:46 +00:00
+								            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
-												[huffpost] Add support

											
										
										
											2014-01-27 04:47:30 +00:00
+								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'HuffPost')
-												Add support for embed.ly

											
										
										
											2014-02-24 00:15:51 +00:00
+								        # Look for embed.ly
 								        mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'))
 								        mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
 								        if mobj is not None:
-												[extractor/generic] Use compat_urllib_parse_unquote

											
										
										
											2015-07-17 17:39:32 +00:00
+								            return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
-												Add support for embed.ly

											
										
										
											2014-02-24 00:15:51 +00:00
-												[generic/funnyordie] Add support for funnyordie embeds (Fixes #2546)

											
										
										
											2014-03-11 15:51:36 +00:00
+								        # Look for funnyordie embed
 								        matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
 								        if matches:
-												[generic] Simplify playlist support (#2948)

											
										
										
											2014-08-22 16:19:56 +00:00
+								            return _playlist_from_matches(
 								                matches, getter=unescapeHTML, ie='FunnyOrDie')
-												[generic/funnyordie] Add support for funnyordie embeds (Fixes #2546)

											
										
										
											2014-03-11 15:51:36 +00:00
-												[generic] Add support for BBC iPlayer embeds (Closes #4619)

											
										
										
											2015-01-02 14:46:17 +00:00
+								        # Look for BBC iPlayer embed
 								        matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
 								        if matches:
-												[generic] Generalize BBC iPlayer playlist extraction

											
										
										
											2015-01-02 14:55:09 +00:00
+								            return _playlist_from_matches(matches, ie='BBCCoUk')
-												[generic] Add support for BBC iPlayer embeds (Closes #4619)

											
										
										
											2015-01-02 14:46:17 +00:00
-												[generic] Add support for embedded rutv player

											
										
										
											2014-03-16 19:00:31 +00:00
+								        # Look for embedded RUTV player
 								        rutv_url = RUTVIE._extract_url(webpage)
 								        if rutv_url:
 								            return self.url_result(rutv_url, 'RUTV')
-												[extractor/generic] Add support for tvc embeds

											
										
										
											2015-06-12 10:22:46 +00:00
+								        # Look for embedded TVC player
-												[extractor/generic] Rename tvc embed url variable

											
										
										
											2015-06-12 12:15:30 +00:00
+								        tvc_url = TVCIE._extract_url(webpage)
 								        if tvc_url:
 								            return self.url_result(tvc_url, 'TVC')
-												[extractor/generic] Add support for tvc embeds

											
										
										
											2015-06-12 10:22:46 +00:00
-												[generic] Add support for sportbox embeds

											
										
										
											2015-05-15 17:09:34 +00:00
+								        # Look for embedded SportBox player
 								        sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
 								        if sportbox_urls:
 								            return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
-												[tumblr] Add support for pornhub embeds (Closes #5963)

											
										
										
											2015-06-12 21:39:14 +00:00
+								        # Look for embedded PornHub player
-												[extractor/generic] Add support for pornhub embeds

											
										
										
											2015-06-12 21:36:16 +00:00
+								        pornhub_url = PornHubIE._extract_url(webpage)
 								        if pornhub_url:
 								            return self.url_result(pornhub_url, 'PornHub')
-												[generic] Add support for xhamster embeds

											
										
										
											2015-06-21 17:11:25 +00:00
+								        # Look for embedded XHamster player
 								        xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
 								        if xhamster_urls:
 								            return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
-												[extractor/generic] Add support for tnaflix network embeds (Closes #7505)

											
										
										
											2016-02-27 11:15:49 +00:00
+								        # Look for embedded TNAFlixNetwork player
 								        tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
 								        if tnaflix_urls:
 								            return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
-												[extractor/generic] Add support for drtuber embds (closes #11098)

											
										
										
											2016-11-06 14:33:51 +00:00
+								        # Look for embedded DrTuber player
 								        drtuber_urls = DrTuberIE._extract_urls(webpage)
 								        if drtuber_urls:
 								            return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
-												[extractor/generic] Add support for tvigle embeds

											
										
										
											2015-06-12 12:37:09 +00:00
+								        # Look for embedded Tvigle player
 								        mobj = re.search(
 								            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Tvigle')
-												[generic] Run TED detection before JW Player detection

Otherwise it overwrittes the `mobj` variable.

											
										
										
											2014-03-22 09:20:44 +00:00
+								        # Look for embedded TED player
 								        mobj = re.search(
-												[generic] PEP8

											
										
										
											2015-01-05 17:16:47 +00:00
+								            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
-												[generic] Run TED detection before JW Player detection

Otherwise it overwrittes the `mobj` variable.

											
										
										
											2014-03-22 09:20:44 +00:00
+								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'TED')
-												[UstreamIE] [generic] Added support for Ustream embed URLs (Fixes #2694)

											
										
										
											2014-04-04 14:23:09 +00:00
+								        # Look for embedded Ustream videos
 								        mobj = re.search(
 								            r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Ustream')
-												[arte] Add support for embedded videos (Fixes #2620)

											
										
										
											2014-03-24 21:01:47 +00:00
+								        # Look for embedded arte.tv player
 								        mobj = re.search(
-												[arte.tv:embed] Extended support (#2620)

											
										
										
											2016-04-11 11:17:11 +00:00
+								            r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
-												[arte] Add support for embedded videos (Fixes #2620)

											
										
										
											2014-03-24 21:01:47 +00:00
+								            webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'ArteTVEmbed')
-												[extractor/generic] Add support for francetv embeds

											
										
										
											2015-07-18 16:56:00 +00:00
+								        # Look for embedded francetv player
 								        mobj = re.search(
 								            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
 								            webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'))
-												[smotri] Modernize and add support for emdebbed videos (Closes #2585)

											
										
										
											2014-03-28 12:58:49 +00:00
+								        # Look for embedded smotri.com player
 								        smotri_url = SmotriIE._extract_url(webpage)
 								        if smotri_url:
 								            return self.url_result(smotri_url, 'Smotri')
-												[extractor/generic:myvi] Add support for myvi embeds

											
										
										
											2015-07-09 18:25:36 +00:00
+								        # Look for embedded Myvi.ru player
-												[myvi:embed] Rename to myvi

											
										
										
											2015-07-09 18:27:44 +00:00
+								        myvi_url = MyviIE._extract_url(webpage)
-												[extractor/generic:myvi] Add support for myvi embeds

											
										
										
											2015-07-09 18:25:36 +00:00
+								        if myvi_url:
 								            return self.url_result(myvi_url)
-												Fix typos

Closes #8200.

											
										
										
											2016-01-10 15:17:47 +00:00
+								        # Look for embedded soundcloud player
-												[extractor/generic] Extract all soundcloud embeds (Closes #10179)

											
										
										
											2016-07-28 15:15:15 +00:00
+								        soundcloud_urls = SoundcloudIE._extract_urls(webpage)
 								        if soundcloud_urls:
 								            return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
-												[soundcloud/generic] Add support for playlists

											
										
										
											2014-05-05 01:12:41 +00:00
-												[generic] Extract mtvservices embedded videos

											
										
										
											2014-06-22 19:38:04 +00:00
+								        # Look for embedded mtvservices player
-												[extractor/generic] Use _extract_url for mtvservices

											
										
										
											2015-09-26 13:47:20 +00:00
+								        mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
 								        if mtvservices_url:
 								            return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
-												[generic] Extract mtvservices embedded videos

											
										
										
											2014-06-22 19:38:04 +00:00
-												[yahoo] Add support for embedded videos (Closes #3525)

											
										
										
											2014-08-16 06:56:22 +00:00
+								        # Look for embedded yahoo player
 								        mobj = re.search(
 								            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
 								            webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Yahoo')
-												[sbs] Add new extractor (Fixes #3566)

											
										
										
											2014-08-23 13:20:49 +00:00
+								        # Look for embedded sbs.com.au player
 								        mobj = re.search(
-												[generic] Improve SBS detection (Fixes #4899)

											
										
										
											2015-02-09 13:46:10 +00:00
+								            r'''(?x)
 								            (?:
 								                <meta\s+property="og:video"\s+content=|
 								                <iframe[^>]+?src=
 								            )
 								            (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
-												[sbs] Add new extractor (Fixes #3566)

											
										
										
											2014-08-23 13:20:49 +00:00
+								            webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'SBS')
-												[cinchcast] Add new extractor (Fixes #4428)

											
										
										
											2014-12-12 01:57:36 +00:00
+								        # Look for embedded Cinchcast player
 								        mobj = re.search(
 								            r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
 								            webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Cinchcast')
-												[mlb] Add support for embedded videos (Closes #3653)

											
										
										
											2014-09-02 13:19:28 +00:00
+								        mobj = re.search(
-												[generic] Improve MLB iframe regex

											
										
										
											2014-10-31 21:01:58 +00:00
+								            r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
-												[mlb] Add support for embedded videos (Closes #3653)

											
										
										
											2014-09-02 13:19:28 +00:00
+								            webpage)
-												[generic] Detect more MLB videos (fixes #5443)

											
										
										
											2015-05-03 18:20:07 +00:00
+								        if not mobj:
 								            mobj = re.search(
 								                r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
 								                webpage)
-												[mlb] Add support for embedded videos (Closes #3653)

											
										
										
											2014-09-02 13:19:28 +00:00
+								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'MLB')
-												[condenast] Add support for embedded videos (Closes #3929)

											
										
										
											2014-10-13 12:59:35 +00:00
+								        mobj = re.search(
-												[extractor/generic] Add support for condenast script embeds (Closes #6885, closes #6991)

											
										
										
											2015-09-26 23:55:48 +00:00
+								            r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
-												[condenast] Add support for embedded videos (Closes #3929)

											
										
										
											2014-10-13 12:59:35 +00:00
+								            webpage)
 								        if mobj is not None:
 								            return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
-												[generic] Add support for livestream embeds (Fixes #4185)

											
										
										
											2014-11-13 15:12:51 +00:00
+								        mobj = re.search(
-												[generic] Improve Livestream detection (closes #2234)

											
										
										
											2016-05-22 17:39:09 +00:00
+								            r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
-												[generic] Add support for livestream embeds (Fixes #4185)

											
										
										
											2014-11-13 15:12:51 +00:00
+								            webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Livestream')
-												[generic] Add support for Zapiks embeds (#5014)

											
										
										
											2015-02-21 19:39:26 +00:00
+								        # Look for Zapiks embed
 								        mobj = re.search(
 								            r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Zapiks')
-												[generic] Support dynamic Kaltura embeds (#5016) (#5073)

											
										
										
											2015-02-26 22:34:19 +00:00
+								        # Look for Kaltura embeds
-												[extractor/generic] Use _extract_url for kaltura embeds (Closes #9922)

											
										
										
											2016-06-27 15:45:26 +00:00
+								        kaltura_url = KalturaIE._extract_url(webpage)
 								        if kaltura_url:
 								            return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
-												[generic] Support dynamic Kaltura embeds (#5016) (#5073)

											
										
										
											2015-02-26 22:34:19 +00:00
-												[eagleplatform] Add support for embeds

											
										
										
											2015-03-07 16:22:57 +00:00
+								        # Look for Eagle.Platform embeds
-												[eagleplatform] Improve embed detection and extract in separate routine (Closes #9926)

											
										
										
											2016-06-29 16:01:34 +00:00
+								        eagleplatform_url = EaglePlatformIE._extract_url(webpage)
 								        if eagleplatform_url:
 								            return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
-												[eagleplatform] Add support for embeds

											
										
										
											2015-03-07 16:22:57 +00:00
-												[eagleplatform] Add support for ClipYou embeds

											
										
										
											2015-03-07 16:34:44 +00:00
+								        # Look for ClipYou (uses Eagle.Platform) embeds
 								        mobj = re.search(
 								            r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
 								        if mobj is not None:
 								            return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
-												[pladform] Add support for embeds

											
										
										
											2015-03-08 12:07:10 +00:00
+								        # Look for Pladform embeds
-												[extractor/generic] Use _extract_url for pladform

											
										
										
											2015-12-07 16:03:21 +00:00
+								        pladform_url = PladformIE._extract_url(webpage)
 								        if pladform_url:
 								            return self.url_result(pladform_url)
-												[pladform] Add support for embeds

											
										
										
											2015-03-08 12:07:10 +00:00
-												[extractor/generic] Add support for videomore embeds

											
										
										
											2015-12-29 17:58:23 +00:00
+								        # Look for Videomore embeds
 								        videomore_url = VideomoreIE._extract_url(webpage)
 								        if videomore_url:
 								            return self.url_result(videomore_url)
-												[generic] Add support for playwire embeds (Closes #5430)

											
										
										
											2015-04-15 16:10:08 +00:00
+								        # Look for Playwire embeds
 								        mobj = re.search(
 								            r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'))
-												[generic] Add support for 5min embeds (#5310)

											
										
										
											2015-03-29 01:57:37 +00:00
+								        # Look for 5min embeds
 								        mobj = re.search(
 								            r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
 								        if mobj is not None:
 								            return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
-												[generic] Add support for Crooks and Liars embeds

											
										
										
											2015-04-11 14:20:20 +00:00
+								        # Look for Crooks and Liars embeds
 								        mobj = re.search(
 								            r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'))
-												[NBC/ThePlatform/Generic] Add a generic detector for NBCSportsVPlayer and enhance error detection in ThePlatformIE

											
										
										
											2015-03-30 19:36:09 +00:00
+								        # Look for NBC Sports VPlayer embeds
 								        nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
 								        if nbc_sports_url:
 								            return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
-												[generic] Detect NBC News embeds

											
										
										
											2016-06-10 05:32:59 +00:00
+								        # Look for NBC News embeds
 								        nbc_news_embed_url = re.search(
 								            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
 								        if nbc_news_embed_url:
 								            return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
-												add google drive embeds
											
										
										
											2015-06-29 07:01:30 +00:00
+								        # Look for Google Drive embeds
-												[googledrive] Modernize

											
										
										
											2015-12-21 02:05:34 +00:00
+								        google_drive_url = GoogleDriveIE._extract_url(webpage)
-												add google drive embeds
											
										
										
											2015-06-29 07:01:30 +00:00
+								        if google_drive_url:
 								            return self.url_result(google_drive_url, 'GoogleDrive')
-												[udn] Add new extractor

											
										
										
											2015-04-08 09:26:51 +00:00
+								        # Look for UDN embeds
 								        mobj = re.search(
-												[UDNEmbed] Fix generic UDN pages

Closes #7547

											
										
										
											2015-11-19 14:22:57 +00:00
+								            r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
-												[udn] Add new extractor

											
										
										
											2015-04-08 09:26:51 +00:00
+								        if mobj is not None:
 								            return self.url_result(
-												[utils] Remove url_infer_protocol

											
										
										
											2015-04-08 13:39:34 +00:00
+								                compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
-												[udn] Add new extractor

											
										
										
											2015-04-08 09:26:51 +00:00
-												[CSpan] Add detection for Senate ISVP. Closes #5302

											
										
										
											2015-04-20 19:18:38 +00:00
+								        # Look for Senate ISVP iframe
 								        senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
 								        if senate_isvp_url:
-												[generic] Fix typo

											
										
										
											2015-05-15 17:23:51 +00:00
+								            return self.url_result(senate_isvp_url, 'SenateISVP')
-												[CSpan] Add detection for Senate ISVP. Closes #5302

											
										
										
											2015-04-20 19:18:38 +00:00
-												[dailymotion/generic] Add DailymotionCloudIE

											
										
										
											2015-06-21 13:30:34 +00:00
+								        # Look for Dailymotion Cloud videos
 								        dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
 								        if dmcloud_url:
 								            return self.url_result(dmcloud_url, 'DailymotionCloud')
-												[extractor/generic] Add support for OnionStudios embeds (Closes #5841)

											
										
										
											2015-06-24 17:19:50 +00:00
+								        # Look for OnionStudios embeds
 								        onionstudios_url = OnionStudiosIE._extract_url(webpage)
 								        if onionstudios_url:
 								            return self.url_result(onionstudios_url)
-												[viewlift] replace SnagFilms extractors

- add support for other sites that use the same logic
- improve format extraction and sorting

											
										
										
											2016-04-29 10:14:42 +00:00
+								        # Look for ViewLift embeds
 								        viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
 								        if viewlift_url:
 								            return self.url_result(viewlift_url)
-												[extractor/generic] Add support for snagfilms embeds

											
										
										
											2015-06-27 12:26:14 +00:00
-												[makertv] improve extraction

											
										
										
											2015-12-21 03:24:58 +00:00
+								        # Look for JWPlatform embeds
 								        jwplatform_url = JWPlatformIE._extract_url(webpage)
 								        if jwplatform_url:
 								            return self.url_result(jwplatform_url, 'JWPlatform')
-												[generic] Look for ScreenwaveMedia embeds

											
										
										
											2015-08-29 05:55:20 +00:00
+								        # Look for ScreenwaveMedia embeds
-												[generic] Use screenwavemedia embed pattern

											
										
										
											2015-08-29 13:07:31 +00:00
+								        mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
-												[generic] Look for ScreenwaveMedia embeds

											
										
										
											2015-08-29 05:55:20 +00:00
+								        if mobj is not None:
-												[generic] Use screenwavemedia embed pattern

											
										
										
											2015-08-29 13:07:31 +00:00
+								            return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
-												[generic] Look for ScreenwaveMedia embeds

											
										
										
											2015-08-29 05:55:20 +00:00
-												[ultimedia] Rename to digiteka

											
										
										
											2016-01-19 15:51:46 +00:00
+								        # Look for Digiteka embeds
 								        digiteka_url = DigitekaIE._extract_url(webpage)
 								        if digiteka_url:
 								            return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
-												[jukebox] remove extractor and handle it using generic extractor

											
										
										
											2015-09-25 09:52:48 +00:00
-												[extractor/generic] Add support for arkena embeds

											
										
										
											2016-07-23 10:56:48 +00:00
+								        # Look for Arkena embeds
 								        arkena_url = ArkenaIE._extract_url(webpage)
 								        if arkena_url:
 								            return self.url_result(arkena_url, ArkenaIE.ie_key())
-												[generic] Add support for Limelight API

											
										
										
											2016-01-30 19:45:56 +00:00
+								        # Look for Limelight embeds
 								        mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
 								        if mobj:
 								            lm = {
 								                'Media': 'media',
 								                'Channel': 'channel',
 								                'ChannelList': 'channel_list',
 								            }
 								            return self.url_result('limelight:%s:%s' % (
 								                lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
-												[generic/adobetv] Support AdobeTVVideo embeds (#6039)

											
										
										
											2015-06-22 07:02:53 +00:00
+								        # Look for AdobeTVVideo embeds
 								        mobj = re.search(
 								            r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
 								            webpage)
 								        if mobj is not None:
 								            return self.url_result(
 								                self._proto_relative_url(unescapeHTML(mobj.group(1))),
 								                'AdobeTVVideo')
-												[generic] Support Vine embeds (#8817)

											
										
										
											2016-03-23 15:55:08 +00:00
+								        # Look for Vine embeds
 								        mobj = re.search(
 								            r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
 								            webpage)
 								        if mobj is not None:
 								            return self.url_result(
 								                self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
-												[vodplatform] Add new extractor

											
										
										
											2016-08-04 08:37:27 +00:00
+								        # Look for VODPlatform embeds
 								        mobj = re.search(
-												[generic,vodplatform] improve embed regex

											
										
										
											2016-08-29 06:56:54 +00:00
+								            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
-												[vodplatform] Add new extractor

											
										
										
											2016-08-04 08:37:27 +00:00
+								            webpage)
 								        if mobj is not None:
 								            return self.url_result(
-												[generic,vodplatform] improve embed regex

											
										
										
											2016-08-29 06:56:54 +00:00
+								                self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
-												[vodplatform] Add new extractor

											
										
										
											2016-08-04 08:37:27 +00:00
-												[mangomolo] add support for Mangomolo embeds

											
										
										
											2016-09-16 18:31:39 +00:00
+								        # Look for Mangomolo embeds
 								        mobj = re.search(
-												[mangomolo] fix domain regex

											
										
										
											2016-09-17 07:11:01 +00:00
+								            r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
-												[mangomolo] add support for Mangomolo embeds

											
										
										
											2016-09-16 18:31:39 +00:00
+								                (?:
 								                    video\?.*?\bid=(?P<video_id>\d+)|
 								                    index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
 								                ).+?)\1''', webpage)
 								        if mobj is not None:
 								            info = {
 								                '_type': 'url_transparent',
 								                'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
 								                'title': video_title,
 								                'description': video_description,
 								                'thumbnail': video_thumbnail,
 								                'uploader': video_uploader,
 								            }
 								            video_id = mobj.group('video_id')
 								            if video_id:
 								                info.update({
 								                    'ie_key': 'MangomoloVideo',
 								                    'id': video_id,
 								                })
 								            else:
 								                info.update({
 								                    'ie_key': 'MangomoloLive',
 								                    'id': mobj.group('channel_id'),
 								                })
 								            return info
-												[generic] Extract Instagram embeds (#8817)

											
										
										
											2016-03-24 08:32:27 +00:00
+								        # Look for Instagram embeds
 								        instagram_embed_url = InstagramIE._extract_embed_url(webpage)
 								        if instagram_embed_url is not None:
-												[extractor/generic] Improve instagram embeds (Closes #9213)

											
										
										
											2016-04-16 16:39:20 +00:00
+								            return self.url_result(
 								                self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
-												[generic] Extract Instagram embeds (#8817)

											
										
										
											2016-03-24 08:32:27 +00:00
-												[generic] Add support for LiveLeak embeds

											
										
										
											2016-03-31 18:42:55 +00:00
+								        # Look for LiveLeak embeds
 								        liveleak_url = LiveLeakIE._extract_url(webpage)
 								        if liveleak_url:
 								            return self.url_result(liveleak_url, 'LiveLeak')
-												[extractor/generic:3qsdn] Add support for embeds

											
										
										
											2016-05-14 17:40:34 +00:00
+								        # Look for 3Q SDN embeds
 								        threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
 								        if threeqsdn_url:
-												[extractor/generic] Improve 3qsdn embeds support (Closes #9453)

											
										
										
											2016-05-14 17:58:25 +00:00
+								            return {
 								                '_type': 'url_transparent',
 								                'ie_key': ThreeQSDNIE.ie_key(),
 								                'url': self._proto_relative_url(threeqsdn_url),
 								                'title': video_title,
 								                'description': video_description,
 								                'thumbnail': video_thumbnail,
 								                'uploader': video_uploader,
 								            }
-												[extractor/generic:3qsdn] Add support for embeds

											
										
										
											2016-05-14 17:40:34 +00:00
-												[vbox7:generic] Add support for vbox7 embeds

											
										
										
											2016-08-16 18:02:59 +00:00
+								        # Look for VBOX7 embeds
 								        vbox7_url = Vbox7IE._extract_url(webpage)
 								        if vbox7_url:
 								            return self.url_result(vbox7_url, Vbox7IE.ie_key())
-												[DBTV:generic] Add support for embeds

											
										
										
											2016-08-17 10:45:24 +00:00
+								        # Look for DBTV embeds
 								        dbtv_urls = DBTVIE._extract_urls(webpage)
 								        if dbtv_urls:
 								            return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
-												[extractor/generic] Detect schema.org/VideoObject embeds

											
										
										
											2016-07-08 20:29:07 +00:00
+								        # Looking for http://schema.org/VideoObject
 								        json_ld = self._search_json_ld(
-												[extractor/generic] Add proper default to _search_json_ld call

											
										
										
											2016-08-08 15:48:33 +00:00
+								            webpage, video_id, default={}, expected_type='VideoObject')
 								        if json_ld.get('url'):
-												[extractor/generic] Detect schema.org/VideoObject embeds

											
										
										
											2016-07-08 20:29:07 +00:00
+								            info_dict.update({
 								                'title': video_title or info_dict['title'],
 								                'description': video_description,
 								                'thumbnail': video_thumbnail,
 								                'age_limit': age_limit
 								            })
 								            info_dict.update(json_ld)
 								            return info_dict
-												[utils] Fix xattr error handling

											
										
										
											2016-10-01 19:03:41 +00:00
+								        # Look for HTML5 media
 								        entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
 								        if entries:
 								            for entry in entries:
 								                entry.update({
 								                    'id': video_id,
 								                    'title': video_title,
 								                })
 								                self._sort_formats(entry['formats'])
 								            return self.playlist_result(entries)
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											2014-10-09 12:26:23 +00:00
+								        def check_video(vurl):
-												[generic] Add support for jwPlayer YouTube videos

This makes nationalarchives.gov.uk work (Fixes #4907, fixes #4876)

											
										
										
											2015-02-09 09:42:25 +00:00
+								            if YoutubeIE.suitable(vurl):
 								                return True
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											2014-10-09 12:26:23 +00:00
+								            vpath = compat_urlparse.urlparse(vurl).path
 								            vext = determine_ext(vpath)
-												[utils] Fix xattr error handling

											
										
										
											2016-10-01 19:03:41 +00:00
+								            return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											2014-10-09 12:26:23 +00:00
 								        def filter_video(urls):
 								            return list(filter(check_video, urls))
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
+								        # Start with something easy: JW Player in SWFObject
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											2014-10-09 12:26:23 +00:00
+								        found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											2014-04-30 00:23:51 +00:00
+								        if not found:
-												[generic] Support gorillavid.in

Previously, we were a little bit over-eager and got a random swf file.
Fixes #2084.

											
										
										
											2014-01-05 04:34:06 +00:00
+								            # Look for gorilla-vid style embedding
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											2014-10-09 12:26:23 +00:00
+								            found = filter_video(re.findall(r'''(?sx)
-												[generic] Improve jwplayer detection (Fixes #2731)

											
										
										
											2014-04-21 14:16:53 +00:00
+								                (?:
 								                    jw_plugins|
 								                    JWPlayerOptions|
 								                    jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
 								                )
-												[generic] Add support for jwPlayer YouTube videos

This makes nationalarchives.gov.uk work (Fixes #4907, fixes #4876)

											
										
										
											2015-02-09 09:42:25 +00:00
+								                .*?
 								                ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											2014-04-30 00:23:51 +00:00
+								        if not found:
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
+								            # Broaden the search a little bit
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											2014-10-09 12:26:23 +00:00
+								            found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											2014-04-30 00:23:51 +00:00
+								        if not found:
 								            # Broaden the findall a little bit: JWPlayer JS loader
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											2014-10-09 12:26:23 +00:00
+								            found = filter_video(re.findall(
-												[generic] Expand jwplayer support

											
										
										
											2015-08-05 15:19:52 +00:00
+								                r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
-												[generic] Automatic detection of flow player and age_limit (Fixes #3576)

											
										
										
											2014-08-24 03:31:32 +00:00
+								        if not found:
 								            # Flow player
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											2014-10-09 12:26:23 +00:00
+								            found = filter_video(re.findall(r'''(?xs)
-												[generic] Automatic detection of flow player and age_limit (Fixes #3576)

											
										
										
											2014-08-24 03:31:32 +00:00
+								                flowplayer\("[^"]+",\s*
 								                    \{[^}]+?\}\s*,
-												The opening curly brace `{` is a regex reserved [control character](http://stackoverflow.com/a/400316/1106367), so it needs to be escaped.

											
										
										
											2015-01-30 07:41:40 +00:00
+								                    \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
-												[generic] Automatic detection of flow player and age_limit (Fixes #3576)

											
										
										
											2014-08-24 03:31:32 +00:00
+								                        ["']?url["']?\s*:\s*["']([^"']+)["']
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											2014-10-09 12:26:23 +00:00
+								            ''', webpage))
-												[generic] Add support for Cinerama player (Fixes #4752)

											
										
										
											2015-01-23 11:00:25 +00:00
+								        if not found:
 								            # Cinerama player
 								            found = re.findall(
 								                r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											2014-04-30 00:23:51 +00:00
+								        if not found:
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
+								            # Try to find twitter cards info
-												[extractor/generic] Change twitter:player embeds priority to lowest (Closes #10090)

											
										
										
											2016-07-16 08:59:43 +00:00
+								            # twitter:player:stream should be checked before twitter:player since
 								            # it is expected to contain a raw stream (see
 								            # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											2014-10-09 12:26:23 +00:00
+								            found = filter_video(re.findall(
 								                r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											2014-04-30 00:23:51 +00:00
+								        if not found:
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
+								            # We look for Open Graph info:
 								            # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											2014-04-30 00:23:51 +00:00
+								            m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
+								            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
 								            if m_video_type is not None:
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											2014-10-09 12:26:23 +00:00
+								                found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											2014-04-30 00:23:51 +00:00
+								        if not found:
-												[generic] Generalize redirect regex

											
										
										
											2015-03-17 18:05:40 +00:00
+								            REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
-												[generic] Fix redirect

											
										
										
											2014-05-16 13:32:53 +00:00
+								            found = re.search(
-												[generic] Add support for <meta redirect>

Fixes #413

											
										
										
											2014-02-27 06:21:59 +00:00
+								                r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
-												[generic] Generalize redirect regex

											
										
										
											2015-03-17 18:05:40 +00:00
+								                r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
-												[generic] Add support for <meta redirect>

Fixes #413

											
										
										
											2014-02-27 06:21:59 +00:00
+								                webpage)
-												[generic] Follow redirects specified by `Refresh` HTTP header

											
										
										
											2015-03-17 17:51:40 +00:00
+								            if not found:
 								                # Look also in Refresh HTTP header
 								                refresh_header = head_response.headers.get('Refresh')
 								                if refresh_header:
-												[extractor/generic] Fix following redirect in Refresh HTTP header on python 2

											
										
										
											2015-09-20 05:16:12 +00:00
+								                    # In python 2 response HTTP headers are bytestrings
 								                    if sys.version_info < (3, 0) and isinstance(refresh_header, str):
 								                        refresh_header = refresh_header.decode('iso-8859-1')
-												[generic] Generalize redirect regex

											
										
										
											2015-03-17 18:05:40 +00:00
+								                    found = re.search(REDIRECT_REGEX, refresh_header)
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											2014-04-30 00:23:51 +00:00
+								            if found:
-												[generic] Unescape HTML escape sequences in redirect urls (fixes #6311)

											
										
										
											2015-07-22 21:33:49 +00:00
+								                new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
-												[generic] Add support for <meta redirect>

Fixes #413

											
										
										
											2014-02-27 06:21:59 +00:00
+								                self.report_following_redirect(new_url)
 								                return {
 								                    '_type': 'url',
 								                    'url': new_url,
 								                }
-												[extractor/generic] Change twitter:player embeds priority to lowest (Closes #10090)

											
										
										
											2016-07-16 08:59:43 +00:00
 								        if not found:
 								            # twitter:player is a https URL to iframe player that may or may not
 								            # be supported by youtube-dl thus this is checked the very last (see
 								            # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
 								            embed_url = self._html_search_meta('twitter:player', webpage, default=None)
 								            if embed_url:
 								                return self.url_result(embed_url)
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											2014-04-30 00:23:51 +00:00
+								        if not found:
-												Add documentation about supported sites (Fixes #4503)

											
										
										
											2014-12-30 18:35:35 +00:00
+								            raise UnsupportedError(url)
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											2014-04-30 00:23:51 +00:00
+								        entries = []
-												[genric] Eliminate duplicated video URLs (closes #6562)

											
										
										
											2016-05-22 14:22:27 +00:00
+								        for video_url in orderedSet(found):
-												[generic] Unescape the video URL

Fixes #9279

											
										
										
											2016-04-24 08:23:21 +00:00
+								            video_url = unescapeHTML(video_url)
-												[generic] Unescape URLs from JWPlayer (#7582)

											
										
										
											2015-11-21 06:12:34 +00:00
+								            video_url = video_url.replace('\\/', '/')
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											2014-04-30 00:23:51 +00:00
+								            video_url = compat_urlparse.urljoin(url, video_url)
-												[extractor/generic] Use compat_urllib_parse_unquote

											
										
										
											2015-07-17 17:39:32 +00:00
+								            video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											2014-04-30 00:23:51 +00:00
+								            # Sometimes, jwplayer extraction will result in a YouTube URL
 								            if YoutubeIE.suitable(video_url):
 								                entries.append(self.url_result(video_url, 'Youtube'))
 								                continue
-												Move GenericIE into its own file

											
										
										
											2013-06-23 18:31:45 +00:00
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											2014-04-30 00:23:51 +00:00
+								            # here's a fun little line of code for you:
 								            video_id = os.path.splitext(video_id)[0]
-												[youtube] Support jwplayer with YouTube URLs (Closes #2075)

											
										
										
											2014-01-06 00:42:58 +00:00
-												[generic] Refactor

											
										
										
											2015-11-21 08:08:54 +00:00
+								            entry_info_dict = {
 								                'id': video_id,
 								                'uploader': video_uploader,
 								                'title': video_title,
 								                'age_limit': age_limit,
 								            }
-												[extractor/generic] Add support for xspf playlists

											
										
										
											2015-08-09 13:43:42 +00:00
+								            ext = determine_ext(video_url)
 								            if ext == 'smil':
-												[generic] Refactor

											
										
										
											2015-11-21 08:08:54 +00:00
+								                entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
-												[extractor/generic] Add support for xspf playlists

											
										
										
											2015-08-09 13:43:42 +00:00
+								            elif ext == 'xspf':
 								                return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
-												[generic] Extract M3U8 formats (closes #7582)

											
										
										
											2015-11-21 08:43:01 +00:00
+								            elif ext == 'm3u8':
 								                entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
-												[extractor/generic] Detect DASH manifests in found URLs and extract mpd formats

											
										
										
											2016-02-06 13:42:03 +00:00
+								            elif ext == 'mpd':
 								                entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
-												[extractor/generic] Extract f4m formats from final URLs

											
										
										
											2016-03-12 21:38:20 +00:00
+								            elif ext == 'f4m':
 								                entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
-												[extractor/generic] Skip URLs we came from when delegating ISM extraction

											
										
										
											2016-11-02 16:43:41 +00:00
+								            elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
-												[extractor/generic] Improve ISM extraction

											
										
										
											2016-11-02 16:34:37 +00:00
+								                # Just matching .ism/manifest is not enough to be reliably sure
 								                # whether it's actually an ISM manifest or some other streaming
 								                # manifest since there are various streaming URL formats
 								                # possible (see [1]) as well as some other shenanigans like
 								                # .smil/manifest URLs that actually serve an ISM (see [2]) and
 								                # so on.
 								                # Thus the most reasonable way to solve this is to delegate
 								                # to generic extractor in order to look into the contents of
 								                # the manifest itself.
 								                # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
 								                # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
 								                entry_info_dict = self.url_result(
 								                    smuggle_url(video_url, {'to_generic': True}),
 								                    GenericIE.ie_key())
-												[generic] Extract videos from SMIL manifests (closes #5145 and fixes #5135)

											
										
										
											2015-04-16 09:16:11 +00:00
+								            else:
-												[generic] Refactor

											
										
										
											2015-11-21 08:08:54 +00:00
+								                entry_info_dict['url'] = video_url
-												Remove _sort_formats from _extract_*_formats methods

Now _sort_formats should be called explicitly.
_sort_formats has been added to all the necessary places in code.

Closes #8051

											
										
										
											2016-03-27 01:03:08 +00:00
+								            if entry_info_dict.get('formats'):
 								                self._sort_formats(entry_info_dict['formats'])
-												[generic] Refactor

											
										
										
											2015-11-21 08:08:54 +00:00
+								            entries.append(entry_info_dict)
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											2014-04-30 00:23:51 +00:00
 								        if len(entries) == 1:
-												[generic] Fix wrong entries index

											
										
										
											2014-05-01 09:28:37 +00:00
+								            return entries[0]
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											2014-04-30 00:23:51 +00:00
+								        else:
 								            for num, e in enumerate(entries, start=1):
-												[generic] Don't set the 'title' if it's not defined in the entry (closes #5061)

Some of them may be an 'url' result, which in general don't have the 'title' field.

											
										
										
											2015-02-25 16:56:51 +00:00
+								                # 'url' results don't have a title
 								                if e.get('title') is not None:
 								                    e['title'] = '%s (%d)' % (e['title'], num)
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											2014-04-30 00:23:51 +00:00
+								            return {
 								                '_type': 'playlist',
 								                'entries': entries,
 								            }