Compare commits

...

4 Commits

Author SHA1 Message Date
Bart Broere 007bbeacd7
Remove afspelen and trailing slashes with one regex
Co-authored-by: dirkf <fieldhouse@gmx.net>
2 months ago
Bart Broere 4c90b2f587
Adhere to code style
Co-authored-by: dirkf <fieldhouse@gmx.net>
2 months ago
Bart Broere bc86c5f73b
Make regex more specific and remove redundant .* 2 months ago
Bart Broere ad64f3751e
Improve regex
Co-authored-by: Roy <git@rvsit.nl>
2 months ago

@ -11,7 +11,7 @@ from ..utils import ExtractorError
class NPOIE(InfoExtractor):
IE_NAME = 'npo'
IE_DESC = 'npo.nl'
_VALID_URL = r'https?://(?:www\.)?npo\.nl/.*'
_VALID_URL = r'https?://(?:www\.)?npo\.nl/start/serie/'
_TESTS = [{
'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/',
@ -44,19 +44,12 @@ class NPOIE(InfoExtractor):
note='Downloading token')['token']
def _real_extract(self, url):
# You might want to use removesuffix here,
# but removesuffix is introduced in Python 3.9
# and youtube-dl supports Python 3.2+
if url.endswith('/afspelen'):
url = url[:-9]
elif url.endswith('/afspelen/'):
url = url[:-10]
url = url.rstrip('/')
# Remove /afspelen and/or any trailing `/`s
url = re.sub(r'/(?:afspelen)?/*$', '', url)
slug = url.split('/')[-1]
program_metadata = self._download_json('https://npo.nl/start/api/domain/program-detail',
slug,
query={'slug': slug})
slug, query={'slug': slug})
product_id = program_metadata.get('productId')
images = program_metadata.get('images')
thumbnail = None
@ -307,7 +300,7 @@ class VPROIE(NPOIE):
def _real_extract(self, url):
video_id = url.rstrip('/').split('/')[-1]
page, _ = self._download_webpage_handle(url, video_id)
results = re.findall(r'data-media-id="(.+_.+)"\s', page)
results = re.findall(r'data-media-id="([a-zA-Z0-9_]+)"\s', page)
formats = []
for result in results:
formats.extend(self._extract_formats_by_product_id(result, video_id))

Loading…
Cancel
Save