[br] Allow '/' in URL, allow empty author + broadcastDate fields
* Allow URLs that have a 'subdirectory' before the actual program name, e.g. 'xyz/xyz-episode-1'. * The author and broadcastDate fields in the XML file may be empty. * Add test case for the two problems above.
This commit is contained in:
parent
98ff9d82d4
commit
c21215b421
|
@ -9,10 +9,11 @@ from ..utils import ExtractorError
|
||||||
|
|
||||||
class BRIE(InfoExtractor):
|
class BRIE(InfoExtractor):
|
||||||
IE_DESC = "Bayerischer Rundfunk Mediathek"
|
IE_DESC = "Bayerischer Rundfunk Mediathek"
|
||||||
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?P<id>[a-z0-9\-]+)\.html$"
|
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$"
|
||||||
_BASE_URL = "http://www.br.de"
|
_BASE_URL = "http://www.br.de"
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
|
{
|
||||||
"url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
|
"url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
|
||||||
"md5": "c4f83cf0f023ba5875aba0bf46860df2",
|
"md5": "c4f83cf0f023ba5875aba0bf46860df2",
|
||||||
"info_dict": {
|
"info_dict": {
|
||||||
|
@ -23,7 +24,20 @@ class BRIE(InfoExtractor):
|
||||||
"uploader": "BR/Birgit Baier",
|
"uploader": "BR/Birgit Baier",
|
||||||
"upload_date": "20140301"
|
"upload_date": "20140301"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html",
|
||||||
|
"md5": "ab451b09d861dbed7d7cc9ab0be19ebe",
|
||||||
|
"info_dict": {
|
||||||
|
"id": "2c060e69-3a27-4e13-b0f0-668fac17d812",
|
||||||
|
"ext": "mp4",
|
||||||
|
"title": "Über den Pass",
|
||||||
|
"description": "Die Eroberung der Alpen: Über den Pass",
|
||||||
|
"uploader": None,
|
||||||
|
"upload_date": None
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -33,16 +47,21 @@ class BRIE(InfoExtractor):
|
||||||
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
|
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
|
||||||
xml = self._download_xml(self._BASE_URL + xml_url, None)
|
xml = self._download_xml(self._BASE_URL + xml_url, None)
|
||||||
|
|
||||||
videos = [{
|
videos = []
|
||||||
|
for xml_video in xml.findall("video"):
|
||||||
|
video = {
|
||||||
"id": xml_video.get("externalId"),
|
"id": xml_video.get("externalId"),
|
||||||
"title": xml_video.find("title").text,
|
"title": xml_video.find("title").text,
|
||||||
"formats": self._extract_formats(xml_video.find("assets")),
|
"formats": self._extract_formats(xml_video.find("assets")),
|
||||||
"thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
|
"thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
|
||||||
"description": " ".join(xml_video.find("shareTitle").text.splitlines()),
|
"description": " ".join(xml_video.find("shareTitle").text.splitlines()),
|
||||||
"uploader": xml_video.find("author").text,
|
"webpage_url": xml_video.find("permalink").text
|
||||||
"upload_date": "".join(reversed(xml_video.find("broadcastDate").text.split("."))),
|
}
|
||||||
"webpage_url": xml_video.find("permalink").text,
|
if xml_video.find("author").text:
|
||||||
} for xml_video in xml.findall("video")]
|
video["uploader"] = xml_video.find("author").text
|
||||||
|
if xml_video.find("broadcastDate").text:
|
||||||
|
video["upload_date"] = "".join(reversed(xml_video.find("broadcastDate").text.split(".")))
|
||||||
|
videos.append(video)
|
||||||
|
|
||||||
if len(videos) > 1:
|
if len(videos) > 1:
|
||||||
self._downloader.report_warning(
|
self._downloader.report_warning(
|
||||||
|
|
Loading…
Reference in New Issue