[extractor/common] Allow quoteless content attribute in og regexes (Closes #7115)

pull/8/head
Sergey M․ 9 years ago
parent ef47b2c15f
commit 4180a3d8b7

@ -645,7 +645,7 @@ class InfoExtractor(object):
# Helper functions for extracting OpenGraph info # Helper functions for extracting OpenGraph info
@staticmethod @staticmethod
def _og_regexes(prop): def _og_regexes(prop):
content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')' content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\'|\s*([^\s"\'=<>`]+?))'
property_re = r'(?:name|property)=[\'"]?og:%s[\'"]?' % re.escape(prop) property_re = r'(?:name|property)=[\'"]?og:%s[\'"]?' % re.escape(prop)
template = r'<meta[^>]+?%s[^>]+?%s' template = r'<meta[^>]+?%s[^>]+?%s'
return [ return [

Loading…
Cancel
Save