Remove afspelen and trailing slashes with one regex

Co-authored-by: dirkf <fieldhouse@gmx.net>
Adhere to code style
1 changed files with 5 additions and 12 deletions
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@ -11,7 +11,7 @@ from ..utils import ExtractorError
 class NPOIE(InfoExtractor):
    IE_NAME = 'npo'
    IE_DESC = 'npo.nl'
-    _VALID_URL = r'https?://(?:www\.)?npo\.nl/.*'
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/start/serie/'

    _TESTS = [{
        'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/',
@ -44,19 +44,12 @@ class NPOIE(InfoExtractor):
            note='Downloading token')['token']

    def _real_extract(self, url):
-        # You might want to use removesuffix here,
-        # but removesuffix is introduced in Python 3.9
-        # and youtube-dl supports Python 3.2+
-        if url.endswith('/afspelen'):
-            url = url[:-9]
-        elif url.endswith('/afspelen/'):
-            url = url[:-10]
-        url = url.rstrip('/')
+            # Remove /afspelen and/or any trailing `/`s
+            url = re.sub(r'/(?:afspelen)?/*$', '', url)
        slug = url.split('/')[-1]

        program_metadata = self._download_json('https://npo.nl/start/api/domain/program-detail',
-                                               slug,
-                                               query={'slug': slug})
+                                               slug, query={'slug': slug})
        product_id = program_metadata.get('productId')
        images = program_metadata.get('images')
        thumbnail = None
@ -307,7 +300,7 @@ class VPROIE(NPOIE):
    def _real_extract(self, url):
        video_id = url.rstrip('/').split('/')[-1]
        page, _ = self._download_webpage_handle(url, video_id)
-        results = re.findall(r'data-media-id="(.+_.+)"\s', page)
+        results = re.findall(r'data-media-id="([a-zA-Z0-9_]+)"\s', page)
        formats = []
        for result in results:
            formats.extend(self._extract_formats_by_product_id(result, video_id))
Author	SHA1	Message	Date
Bart Broere	007bbeacd7	Remove afspelen and trailing slashes with one regex Co-authored-by: dirkf <fieldhouse@gmx.net>	2 months ago
Bart Broere	4c90b2f587	Adhere to code style Co-authored-by: dirkf <fieldhouse@gmx.net>	2 months ago
Bart Broere	bc86c5f73b	Make regex more specific and remove redundant .*	2 months ago
Bart Broere	ad64f3751e	Improve regex Co-authored-by: Roy <git@rvsit.nl>	2 months ago