[generic] Suppress warning about doctypes in RSS parser
This commit is contained in:
parent
e3899d0e00
commit
bcf89ce62c
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
|||
|
||||
import os
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
|
@ -17,6 +16,7 @@ from ..utils import (
|
|||
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
parse_xml,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
|
@ -274,7 +274,7 @@ class GenericIE(InfoExtractor):
|
|||
|
||||
# Is it an RSS feed?
|
||||
try:
|
||||
doc = xml.etree.ElementTree.fromstring(webpage.encode('utf-8'))
|
||||
doc = parse_xml(webpage)
|
||||
if doc.tag == 'rss':
|
||||
return self._extract_rss(url, video_id, doc)
|
||||
except compat_xml_parse_error:
|
||||
|
|
|
@ -22,6 +22,7 @@ import struct
|
|||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
import xml.etree.ElementTree
|
||||
import zlib
|
||||
|
||||
try:
|
||||
|
@ -1267,3 +1268,13 @@ def read_batch_urls(batch_fd):
|
|||
|
||||
def urlencode_postdata(*args, **kargs):
|
||||
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
|
||||
|
||||
|
||||
def parse_xml(s):
|
||||
class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
|
||||
def doctype(self, name, pubid, system):
|
||||
pass # Ignore doctypes
|
||||
|
||||
parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
|
||||
kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
|
||||
return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
|
||||
|
|
Loading…
Reference in New Issue