[soundcloud] Make playlist extraction lazy

3 years ago · aa6c25309a
parent d98b006b85
commit aa6c25309a
1 changed files with 21 additions and 40 deletions
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@ -14,7 +14,6 @@ from ..compat import (
    compat_HTTPError,
    compat_kwargs,
    compat_str,
    compat_urlparse,
 )
 from ..utils import (
    error_to_compat_str,
@ -24,6 +23,7 @@ from ..utils import (
    int_or_none,
    KNOWN_EXTENSIONS,
    mimetype2ext,
    parse_qs,
    str_or_none,
    try_get,
    unified_timestamp,
@ -49,8 +49,7 @@ class SoundcloudEmbedIE(InfoExtractor):
            webpage)]
    def _real_extract(self, url):
-        query = compat_urlparse.parse_qs(
+        query = parse_qs(url)
            compat_urlparse.urlparse(url).query)
        api_url = query['url'][0]
        secret_token = query.get('secret_token')
        if secret_token:
@ -656,64 +655,46 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
 class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
    def _extract_playlist(self, base_url, playlist_id, playlist_title):
        return {
            '_type': 'playlist',
            'id': playlist_id,
            'title': playlist_title,
            'entries': self._entries(base_url, playlist_id),
        }
    def _entries(self, base_url, playlist_id):
        # Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
        # https://developers.soundcloud.com/blog/offset-pagination-deprecated
-        COMMON_QUERY = {
+        query = {
            'limit': 200,
            'linked_partitioning': '1',
            'offset': 0,
        }
        query = COMMON_QUERY.copy()
        query['offset'] = 0
        next_href = base_url
        entries = []
        for i in itertools.count():
            response = self._download_json(
                next_href, playlist_id,
                'Downloading track page %s' % (i + 1), query=query, headers=self._HEADERS)
-            collection = response['collection']
+            def resolve_entry(*candidates):
            if not isinstance(collection, list):
                collection = []
            # Empty collection may be returned, in this case we proceed
            # straight to next_href
            def resolve_entry(candidates):
                for cand in candidates:
                    if not isinstance(cand, dict):
                        continue
                    permalink_url = url_or_none(cand.get('permalink_url'))
-                    if not permalink_url:
+                    if permalink_url:
                        continue
                        return self.url_result(
                            permalink_url,
                            SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
                            str_or_none(cand.get('id')), cand.get('title'))
-            for e in collection:
+            for e in response['collection'] or []:
-                entry = resolve_entry((e, e.get('track'), e.get('playlist')))
+                yield resolve_entry(e, e.get('track'), e.get('playlist'))
                if entry:
                    entries.append(entry)
            next_href = response.get('next_href')
-            if not next_href:
+            query.pop('offset', None)
                break
            next_href = response['next_href']
            parsed_next_href = compat_urlparse.urlparse(next_href)
            query = compat_urlparse.parse_qs(parsed_next_href.query)
            query.update(COMMON_QUERY)
        return {
            '_type': 'playlist',
            'id': playlist_id,
            'title': playlist_title,
            'entries': entries,
        }
 class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):