yt-dlp/yt_dlp/extractor/facebook.py

import json
import re
import urllib.parse

from .common import InfoExtractor
from ..compat import (
    compat_etree_fromstring,
    compat_str,
    compat_urllib_parse_unquote,
)
from ..networking import Request
from ..networking.exceptions import network_exceptions
from ..utils import (
    ExtractorError,
    clean_html,
    determine_ext,
    error_to_compat_str,
    float_or_none,
    format_field,
    get_element_by_id,
    get_first,
    int_or_none,
    join_nonempty,
    js_to_json,
    merge_dicts,
    parse_count,
    parse_qs,
    qualities,
    str_or_none,
    traverse_obj,
    try_get,
    url_or_none,
    urlencode_postdata,
    urljoin,
    variadic,
)


class FacebookIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                (?:
                    https?://
                        (?:[\w-]+\.)?(?:facebook\.com|facebookwkhpilnemxj7asaniu7vnjjbiltxjqhye3mhbshg7kx5tfyd\.onion)/
                        (?:[^#]*?\#!/)?
                        (?:
                            (?:
                                permalink\.php|
                                video/video\.php|
                                photo\.php|
                                video\.php|
                                video/embed|
                                story\.php|
                                watch(?:/live)?/?
                            )\?(?:.*?)(?:v|video_id|story_fbid)=|
                            [^/]+/videos/(?:[^/]+/)?|
                            [^/]+/posts/|
                            events/(?:[^/]+/)?|
                            groups/[^/]+/(?:permalink|posts)/|
                            watchparty/
                        )|
                    facebook:
                )
                (?P<id>pfbid[A-Za-z0-9]+|\d+)
                '''
    _EMBED_REGEX = [
        r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
        # Facebook API embed https://developers.facebook.com/docs/plugins/embedded-video-player
        r'''(?x)<div[^>]+
                class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
                data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''',
    ]
    _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
    _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
    _NETRC_MACHINE = 'facebook'
    IE_NAME = 'facebook'

    _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
    _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'

    _TESTS = [{
        'url': 'https://www.facebook.com/radiokicksfm/videos/3676516585958356/',
        'info_dict': {
            'id': '3676516585958356',
            'ext': 'mp4',
            'title': 'dr Adam Przygoda',
            'description': 'md5:34675bda53336b1d16400265c2bb9b3b',
            'uploader': 'RADIO KICKS FM',
            'upload_date': '20230818',
            'timestamp': 1692346159,
            'thumbnail': r're:^https?://.*',
            'uploader_id': '100063551323670',
            'duration': 3132.184,
            'view_count': int,
            'concurrent_view_count': 0,
        },
    }, {
        'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
        'md5': '6a40d33c0eccbb1af76cf0485a052659',
        'info_dict': {
            'id': '637842556329505',
            'ext': 'mp4',
            'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
            'uploader': 'Tennis on Facebook',
            'upload_date': '20140908',
            'timestamp': 1410199200,
        },
        'skip': 'Requires logging in',
    }, {
        # data.video
        'url': 'https://www.facebook.com/video.php?v=274175099429670',
        'info_dict': {
            'id': '274175099429670',
            'ext': 'mp4',
            'title': 'Asif',
            'description': '',
            'uploader': 'Asif Nawab Butt',
            'upload_date': '20140506',
            'timestamp': 1399398998,
            'thumbnail': r're:^https?://.*',
            'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
            'duration': 131.03,
            'concurrent_view_count': int,
        },
    }, {
        'note': 'Video with DASH manifest',
        'url': 'https://www.facebook.com/video.php?v=957955867617029',
        'md5': 'b2c28d528273b323abe5c6ab59f0f030',
        'info_dict': {
            'id': '957955867617029',
            'ext': 'mp4',
            'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
            'uploader': 'Demy de Zeeuw',
            'upload_date': '20160110',
            'timestamp': 1452431627,
        },
        'skip': 'Requires logging in',
    }, {
        'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
        'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
        'info_dict': {
            'id': '544765982287235',
            'ext': 'mp4',
            'title': '"What are you doing running in the snow?"',
            'uploader': 'FailArmy',
        },
        'skip': 'Video gone',
    }, {
        'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
        'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
        'info_dict': {
            'id': '1035862816472149',
            'ext': 'mp4',
            'title': 'What the Flock Is Going On In New Zealand  Credit: ViralHog',
            'uploader': 'S. Saint',
        },
        'skip': 'Video gone',
    }, {
        'note': 'swf params escaped',
        'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
        'md5': '97ba073838964d12c70566e0085c2b91',
        'info_dict': {
            'id': '10153664894881749',
            'ext': 'mp4',
            'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...',
            'thumbnail': r're:^https?://.*',
            'timestamp': 1456259628,
            'upload_date': '20160223',
            'uploader': 'Barack Obama',
        },
        'skip': 'Gif on giphy.com gone',
    }, {
        # have 1080P, but only up to 720p in swf params
        # data.video.story.attachments[].media
        'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
        'md5': 'ca63897a90c9452efee5f8c40d080e25',
        'info_dict': {
            'id': '10155529876156509',
            'ext': 'mp4',
            'title': 'Holocaust survivor becomes US citizen',
            'description': 'She survived the holocaust — and years later, she’s getting her citizenship so she can vote for Hillary Clinton http://cnn.it/2eERh5f',
            'timestamp': 1477818095,
            'upload_date': '20161030',
            'uploader': 'CNN',
            'thumbnail': r're:^https?://.*',
            'view_count': int,
            'uploader_id': '100059479812265',
            'concurrent_view_count': int,
            'duration': 44.478,
        },
    }, {
        # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
        'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
        'info_dict': {
            'id': '1417995061575415',
            'ext': 'mp4',
            'title': 'Довгоочікуване відео | By Yaroslav - Facebook',
            'description': 'Довгоочікуване відео',
            'timestamp': 1486648217,
            'upload_date': '20170209',
            'uploader': 'Yaroslav Korpan',
            'uploader_id': 'pfbid06AScABAWcW91qpiuGrLt99Ef9tvwHoXP6t8KeFYEqkSfreMtfa9nTveh8b2ZEVSWl',
            'concurrent_view_count': int,
            'thumbnail': r're:^https?://.*',
            'view_count': int,
            'duration': 11736.446,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # FIXME
        'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
        'info_dict': {
            'id': '1072691702860471',
            'ext': 'mp4',
            'title': 'md5:ae2d22a93fbb12dad20dc393a869739d',
            'timestamp': 1477305000,
            'upload_date': '20161024',
            'uploader': 'La Guía Del Varón',
            'thumbnail': r're:^https?://.*',
        },
        'skip': 'Requires logging in',
    }, {
        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
        'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
        'info_dict': {
            'id': '202882990186699',
            'ext': 'mp4',
            'title': 'birb (O v O") | Hello? Yes your uber ride is here',
            'description': 'Hello? Yes your uber ride is here * Jukin Media Verified * Find this video and others like it by visiting...',
            'timestamp': 1486035513,
            'upload_date': '20170202',
            'uploader': 'Elisabeth Ahtn',
            'uploader_id': '100013949973717',
        },
        'skip': 'Requires logging in',
    }, {
        # data.node.comet_sections.content.story.attachments[].throwbackStyles.attachment_target_renderer.attachment.target.attachments[].styles.attachment.media
        'url': 'https://www.facebook.com/groups/1645456212344334/posts/3737828833107051/',
        'info_dict': {
            'id': '1569199726448814',
            'ext': 'mp4',
            'title': 'Pence MUST GO!',
            'description': 'Vickie Gentry shared a memory.',
            'timestamp': 1511548260,
            'upload_date': '20171124',
            'uploader': 'Vickie Gentry',
            'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
            'thumbnail': r're:^https?://.*',
            'duration': 148.435,
        },
    }, {
        # data.node.comet_sections.content.story.attachments[].styles.attachment.media
        'url': 'https://www.facebook.com/attn/posts/pfbid0j1Czf2gGDVqeQ8KiMLFm3pWN8GxsQmeRrVhimWDzMuKQoR8r4b1knNsejELmUgyhl',
        'info_dict': {
            'id': '6968553779868435',
            'ext': 'mp4',
            'description': 'md5:2f2fcf93e97ac00244fe64521bbdb0cb',
            'uploader': 'ATTN:',
            'upload_date': '20231207',
            'title': 'ATTN:',
            'duration': 132.675,
            'uploader_id': '100064451419378',
            'view_count': int,
            'thumbnail': r're:^https?://.*',
            'timestamp': 1701975646,
        },
    }, {
        # data.node.comet_sections.content.story.attachments[].styles.attachment.media
        'url': 'https://www.facebook.com/permalink.php?story_fbid=pfbid0fqQuVEQyXRa9Dp4RcaTR14KHU3uULHV1EK7eckNXSH63JMuoALsAvVCJ97zAGitil&id=100068861234290',
        'info_dict': {
            'id': '270103405756416',
            'ext': 'mp4',
            'title': 'Lela Evans',
            'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
            'thumbnail': r're:^https?://.*',
            'uploader': 'Lela Evans',
            'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
            'upload_date': '20231228',
            'timestamp': 1703804085,
            'duration': 394.347,
            'view_count': int,
        },
    }, {
        'url': 'https://www.facebook.com/story.php?story_fbid=pfbid0Fnzhm8UuzjBYpPMNFzaSpFE9UmLdU4fJN8qTANi1Dmtj5q7DNrL5NERXfsAzDEV7l&id=100073071055552',
        'only_matching': True,
    }, {
        'url': 'https://www.facebook.com/video.php?v=10204634152394104',
        'only_matching': True,
    }, {
        'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',
        'only_matching': True,
    }, {
        # data.mediaset.currMedia.edges
        'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
        'only_matching': True,
    }, {
        # data.video.story.attachments[].media
        'url': 'facebook:544765982287235',
        'only_matching': True,
    }, {
        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
        'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
        'only_matching': True,
    }, {
        # data.video.creation_story.attachments[].media
        'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
        'only_matching': True,
    }, {
        # data.video
        'url': 'https://www.facebookwkhpilnemxj7asaniu7vnjjbiltxjqhye3mhbshg7kx5tfyd.onion/video.php?v=274175099429670',
        'only_matching': True,
    }, {
        # no title
        'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
        'only_matching': True,
    }, {
        # data.video
        'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
        'info_dict': {
            'id': '359649331226507',
            'ext': 'mp4',
            'title': 'Fnatic vs. EG - Group A - Opening Match - ESL One Birmingham Day 1',
            'description': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
            'timestamp': 1527084179,
            'upload_date': '20180523',
            'uploader': 'ESL One Dota 2',
            'uploader_id': '100066514874195',
            'duration': 4524.212,
            'view_count': int,
            'thumbnail': r're:^https?://.*',
            'concurrent_view_count': int,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
        'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/',
        'info_dict': {
            'id': '106560053808006',
            'ext': 'mp4',
            'title': 'Josef',
            'thumbnail': r're:^https?://.*',
            'concurrent_view_count': int,
            'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
            'timestamp': 1549275572,
            'duration': 3.413,
            'uploader': 'Josef Novak',
            'description': '',
            'upload_date': '20190204',
        },
    }, {
        # data.video.story.attachments[].media
        'url': 'https://www.facebook.com/watch/?v=647537299265662',
        'only_matching': True,
    }, {
        # FIXME: https://github.com/yt-dlp/yt-dlp/issues/542
        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
        'url': 'https://www.facebook.com/PankajShahLondon/posts/10157667649866271',
        'info_dict': {
            'id': '10157667649866271',
        },
        'playlist_count': 3,
        'skip': 'Requires logging in',
    }, {
        # data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media
        'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330',
        'info_dict': {
            'id': '117576630041613',
            'ext': 'mp4',
            # TODO: title can be extracted from video page
            'title': 'Facebook video #117576630041613',
            'uploader_id': '189393014416438',
            'upload_date': '20201123',
            'timestamp': 1606162592,
        },
        'skip': 'Requires logging in',
    }, {
        # node.comet_sections.content.story.attached_story.attachments.style_type_renderer.attachment.media
        'url': 'https://www.facebook.com/groups/ateistiskselskab/permalink/10154930137678856/',
        'info_dict': {
            'id': '211567722618337',
            'ext': 'mp4',
            'title': 'Facebook video #211567722618337',
            'uploader_id': '127875227654254',
            'upload_date': '20161122',
            'timestamp': 1479793574,
        },
        'skip': 'No video',
    }, {
        # data.video.creation_story.attachments[].media
        'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
        'only_matching': True,
    }, {
        'url': 'https://www.facebook.com/watchparty/211641140192478',
        'info_dict': {
            'id': '211641140192478',
        },
        'playlist_count': 1,
        'skip': 'Requires logging in',
    }, {
        # data.event.cover_media_renderer.cover_video
        'url': 'https://m.facebook.com/events/1509582499515440',
        'info_dict': {
            'id': '637246984455045',
            'ext': 'mp4',
            'title': 'ANALISI IN CAMPO OSCURO " Coaguli nel sangue dei vaccinati"',
            'description': 'Other event by Comitato Liberi Pensatori on Tuesday, October 18 2022',
            'thumbnail': r're:^https?://.*',
            'uploader': 'Comitato Liberi Pensatori',
            'uploader_id': '100065709540881',
        },
    }]
    _SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
    _api_config = {
        'graphURI': '/api/graphql/'
    }

    def _perform_login(self, username, password):
        login_page_req = Request(self._LOGIN_URL)
        self._set_cookie('facebook.com', 'locale', 'en_US')
        login_page = self._download_webpage(login_page_req, None,
                                            note='Downloading login page',
                                            errnote='Unable to download login page')
        lsd = self._search_regex(
            r'<input type="hidden" name="lsd" value="([^"]*)"',
            login_page, 'lsd')
        lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')

        login_form = {
            'email': username,
            'pass': password,
            'lsd': lsd,
            'lgnrnd': lgnrnd,
            'next': 'http://facebook.com/home.php',
            'default_persistent': '0',
            'legacy_return': '1',
            'timezone': '-60',
            'trynum': '1',
        }
        request = Request(self._LOGIN_URL, urlencode_postdata(login_form))
        request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
        try:
            login_results = self._download_webpage(request, None,
                                                   note='Logging in', errnote='unable to fetch login page')
            if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
                error = self._html_search_regex(
                    r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>',
                    login_results, 'login error', default=None, group='error')
                if error:
                    raise ExtractorError('Unable to login: %s' % error, expected=True)
                self.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.')
                return

            fb_dtsg = self._search_regex(
                r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None)
            h = self._search_regex(
                r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None)

            if not fb_dtsg or not h:
                return

            check_form = {
                'fb_dtsg': fb_dtsg,
                'h': h,
                'name_action_selected': 'dont_save',
            }
            check_req = Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
            check_req.headers['Content-Type'] = 'application/x-www-form-urlencoded'
            check_response = self._download_webpage(check_req, None,
                                                    note='Confirming login')
            if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
                self.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.')
        except network_exceptions as err:
            self.report_warning('unable to log in: %s' % error_to_compat_str(err))
            return

    def _extract_from_url(self, url, video_id):
        webpage = self._download_webpage(
            url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)

        def extract_metadata(webpage):
            post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
                r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
            post = traverse_obj(post_data, (
                ..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
            media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
                k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
            title = get_first(media, ('title', 'text'))
            description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
            page_title = title or self._html_search_regex((
                r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
                r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(?P<content>.*?)</span>',
                self._meta_regex('og:title'), self._meta_regex('twitter:title'), r'<title>(?P<content>.+?)</title>'
            ), webpage, 'title', default=None, group='content')
            description = description or self._html_search_meta(
                ['description', 'og:description', 'twitter:description'],
                webpage, 'description', default=None)
            uploader_data = (
                get_first(media, ('owner', {dict}))
                or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
                or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
                or get_first(post, ('node', 'actors', ..., {dict}))
                or get_first(post, ('event', 'event_creator', {dict})) or {})
            uploader = uploader_data.get('name') or (
                clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
                or self._search_regex(
                    (r'ownerName\s*:\s*"([^"]+)"', *self._og_regexes('title')), webpage, 'uploader', fatal=False))
            timestamp = int_or_none(self._search_regex(
                r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
                'timestamp', default=None))
            thumbnail = self._html_search_meta(
                ['og:image', 'twitter:image'], webpage, 'thumbnail', default=None)
            # some webpages contain unretrievable thumbnail urls
            # like https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=10155168902769113&get_thumbnail=1
            # in https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/
            if thumbnail and not re.search(r'\.(?:jpg|png)', thumbnail):
                thumbnail = None
            info_dict = {
                'description': description,
                'uploader': uploader,
                'uploader_id': uploader_data.get('id'),
                'timestamp': timestamp,
                'thumbnail': thumbnail,
                'view_count': parse_count(self._search_regex(
                    (r'\bviewCount\s*:\s*["\']([\d,.]+)', r'video_view_count["\']\s*:\s*(\d+)',),
                    webpage, 'view count', default=None)),
                'concurrent_view_count': get_first(post, (
                    ('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
            }

            info_json_ld = self._search_json_ld(webpage, video_id, default={})
            info_json_ld['title'] = (re.sub(r'\s*\|\s*Facebook$', '', title or info_json_ld.get('title') or page_title or '')
                                     or (description or '').replace('\n', ' ') or f'Facebook video #{video_id}')
            return merge_dicts(info_json_ld, info_dict)

        video_data = None

        def extract_video_data(instances):
            video_data = []
            for item in instances:
                if try_get(item, lambda x: x[1][0]) == 'VideoConfig':
                    video_item = item[2][0]
                    if video_item.get('video_id'):
                        video_data.append(video_item['videoData'])
            return video_data

        server_js_data = self._parse_json(self._search_regex(
            [r'handleServerJS\(({.+})(?:\);|,")', r'\bs\.handle\(({.+?})\);'],
            webpage, 'server js data', default='{}'), video_id, fatal=False)

        if server_js_data:
            video_data = extract_video_data(server_js_data.get('instances', []))

        def extract_from_jsmods_instances(js_data):
            if js_data:
                return extract_video_data(try_get(
                    js_data, lambda x: x['jsmods']['instances'], list) or [])

        def extract_dash_manifest(video, formats):
            dash_manifest = video.get('dash_manifest')
            if dash_manifest:
                formats.extend(self._parse_mpd_formats(
                    compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
                    mpd_url=video.get('dash_manifest_url')))

        def process_formats(info):
            # Downloads with browser's User-Agent are rate limited. Working around
            # with non-browser User-Agent.
            for f in info['formats']:
                # Downloads with browser's User-Agent are rate limited. Working around
                # with non-browser User-Agent.
                f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
                # Formats larger than ~500MB will return error 403 unless chunk size is regulated
                f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20

        def extract_relay_data(_filter):
            return self._parse_json(self._search_regex(
                r'data-sjs>({.*?%s.*?})</script>' % _filter,
                webpage, 'replay data', default='{}'), video_id, fatal=False) or {}

        def extract_relay_prefetched_data(_filter):
            return traverse_obj(extract_relay_data(_filter), (
                'require', (None, (..., ..., ..., '__bbox', 'require')),
                lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
                ..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {}

        if not video_data:
            server_js_data = self._parse_json(self._search_regex([
                r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
                r'bigPipe\.onPageletArrive\(({.*?id\s*:\s*"%s".*?})\);' % self._SUPPORTED_PAGLETS_REGEX
            ], webpage, 'js data', default='{}'), video_id, js_to_json, False)
            video_data = extract_from_jsmods_instances(server_js_data)

        if not video_data:
            data = extract_relay_prefetched_data(
                r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
            if data:
                entries = []

                def parse_graphql_video(video):
                    v_id = video.get('videoId') or video.get('id') or video_id
                    reel_info = traverse_obj(
                        video, ('creation_story', 'short_form_video_context', 'playback_video', {dict}))
                    if reel_info:
                        video = video['creation_story']
                        video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
                        video.update(reel_info)
                    formats = []
                    q = qualities(['sd', 'hd'])
                    for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
                                           ('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
                                           ('browser_native_sd_url', 'sd')):
                        playable_url = video.get(key)
                        if not playable_url:
                            continue
                        if determine_ext(playable_url) == 'mpd':
                            formats.extend(self._extract_mpd_formats(playable_url, video_id))
                        else:
                            formats.append({
                                'format_id': format_id,
                                # sd, hd formats w/o resolution info should be deprioritized below DASH
                                'quality': q(format_id) - 3,
                                'url': playable_url,
                            })
                    extract_dash_manifest(video, formats)

                    automatic_captions, subtitles = {}, {}
                    is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))
                    for caption in traverse_obj(video, (
                        'video_available_captions_locales',
                        {lambda x: sorted(x, key=lambda c: c['locale'])},
                        lambda _, v: url_or_none(v['captions_url'])
                    )):
                        lang = caption.get('localized_language') or 'und'
                        subs = {
                            'url': caption['captions_url'],
                            'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
                        }
                        if caption.get('localized_creation_method') or is_broadcast:
                            automatic_captions.setdefault(caption['locale'], []).append(subs)
                        else:
                            subtitles.setdefault(caption['locale'], []).append(subs)
                    captions_url = traverse_obj(video, ('captions_url', {url_or_none}))
                    if captions_url and not automatic_captions and not subtitles:
                        locale = self._html_search_meta(
                            ['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
                        (automatic_captions if is_broadcast else subtitles)[locale] = [{'url': captions_url}]

                    info = {
                        'id': v_id,
                        'formats': formats,
                        'thumbnail': traverse_obj(
                            video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')),
                        'uploader_id': traverse_obj(video, ('owner', 'id', {str_or_none})),
                        'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
                        'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
                                     or float_or_none(video.get('length_in_second'))),
                        'automatic_captions': automatic_captions,
                        'subtitles': subtitles,
                    }
                    process_formats(info)
                    description = try_get(video, lambda x: x['savable_description']['text'])
                    title = video.get('name')
                    if title:
                        info.update({
                            'title': title,
                            'description': description,
                        })
                    else:
                        info['title'] = description or 'Facebook video #%s' % v_id
                    entries.append(info)

                def parse_attachment(attachment, key='media'):
                    media = attachment.get(key) or {}
                    if media.get('__typename') == 'Video':
                        return parse_graphql_video(media)

                nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
                attachments = traverse_obj(nodes, (
                    ..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
                    ..., ('styles', 'style_type_renderer', ('throwbackStyles', 'attachment_target_renderer')),
                    'attachment', {dict}))
                for attachment in attachments:
                    ns = traverse_obj(attachment, ('all_subattachments', 'nodes', ..., {dict}),
                                      ('target', 'attachments', ..., 'styles', 'attachment', {dict}))
                    for n in ns:
                        parse_attachment(n)
                    parse_attachment(attachment)

                edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
                for edge in edges:
                    parse_attachment(edge, key='node')

                video = traverse_obj(data, (
                    'event', 'cover_media_renderer', 'cover_video'), 'video', expected_type=dict) or {}
                if video:
                    attachments = try_get(video, [
                        lambda x: x['story']['attachments'],
                        lambda x: x['creation_story']['attachments']
                    ], list) or []
                    for attachment in attachments:
                        parse_attachment(attachment)
                    if not entries:
                        parse_graphql_video(video)

                if len(entries) > 1:
                    return self.playlist_result(entries, video_id)

                video_info = entries[0] if entries else {'id': video_id}
                webpage_info = extract_metadata(webpage)
                # honor precise duration in video info
                if video_info.get('duration'):
                    webpage_info['duration'] = video_info['duration']
                # preserve preferred_thumbnail in video info
                if video_info.get('thumbnail'):
                    webpage_info['thumbnail'] = video_info['thumbnail']
                return merge_dicts(webpage_info, video_info)

        if not video_data:
            m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
            if m_msg is not None:
                raise ExtractorError(
                    'The video is not available, Facebook said: "%s"' % m_msg.group(1),
                    expected=True)
            elif any(p in webpage for p in (
                    '>You must log in to continue',
                    'id="login_form"',
                    'id="loginbutton"')):
                self.raise_login_required()

        if not video_data and '/watchparty/' in url:
            post_data = {
                'doc_id': 3731964053542869,
                'variables': json.dumps({
                    'livingRoomID': video_id,
                }),
            }

            prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{')
            if prefetched_data:
                lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict)
                if lsd:
                    post_data[lsd['name']] = lsd['value']

            relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,')
            for define in (relay_data.get('define') or []):
                if define[0] == 'RelayAPIConfigDefaults':
                    self._api_config = define[2]

            living_room = self._download_json(
                urljoin(url, self._api_config['graphURI']), video_id,
                data=urlencode_postdata(post_data))['data']['living_room']

            entries = []
            for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []):
                video = try_get(edge, lambda x: x['node']['video']) or {}
                v_id = video.get('id')
                if not v_id:
                    continue
                v_id = compat_str(v_id)
                entries.append(self.url_result(
                    self._VIDEO_PAGE_TEMPLATE % v_id,
                    self.ie_key(), v_id, video.get('name')))

            return self.playlist_result(entries, video_id)

        if not video_data:
            # Video info not in first request, do a secondary request using
            # tahoe player specific URL
            tahoe_data = self._download_webpage(
                self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
                data=urlencode_postdata({
                    '__a': 1,
                    '__pc': self._search_regex(
                        r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
                        'pkg cohort', default='PHASED:DEFAULT'),
                    '__rev': self._search_regex(
                        r'client_revision["\']\s*:\s*(\d+),', webpage,
                        'client revision', default='3944515'),
                    'fb_dtsg': self._search_regex(
                        r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
                        webpage, 'dtsg token', default=''),
                }),
                headers={
                    'Content-Type': 'application/x-www-form-urlencoded',
                })
            tahoe_js_data = self._parse_json(
                self._search_regex(
                    r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data,
                    'tahoe js data', default='{}'),
                video_id, fatal=False)
            video_data = extract_from_jsmods_instances(tahoe_js_data)

        if not video_data:
            raise ExtractorError('Cannot parse data')

        if len(video_data) > 1:
            entries = []
            for v in video_data:
                video_url = v[0].get('video_url')
                if not video_url:
                    continue
                entries.append(self.url_result(urljoin(
                    url, video_url), self.ie_key(), v[0].get('video_id')))
            return self.playlist_result(entries, video_id)
        video_data = video_data[0]

        formats = []
        subtitles = {}
        for f in video_data:
            format_id = f['stream_type']
            if f and isinstance(f, dict):
                f = [f]
            if not f or not isinstance(f, list):
                continue
            for quality in ('sd', 'hd'):
                for src_type in ('src', 'src_no_ratelimit'):
                    src = f[0].get('%s_%s' % (quality, src_type))
                    if src:
                        # sd, hd formats w/o resolution info should be deprioritized below DASH
                        # TODO: investigate if progressive or src formats still exist
                        preference = -10 if format_id == 'progressive' else -3
                        if quality == 'hd':
                            preference += 1
                        formats.append({
                            'format_id': '%s_%s_%s' % (format_id, quality, src_type),
                            'url': src,
                            'quality': preference,
                            'height': 720 if quality == 'hd' else None
                        })
            extract_dash_manifest(f[0], formats)
            subtitles_src = f[0].get('subtitles_src')
            if subtitles_src:
                subtitles.setdefault('en', []).append({'url': subtitles_src})

        info_dict = {
            'id': video_id,
            'formats': formats,
            'subtitles': subtitles,
        }
        process_formats(info_dict)
        info_dict.update(extract_metadata(webpage))

        return info_dict

    def _real_extract(self, url):
        video_id = self._match_id(url)

        real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
        return self._extract_from_url(real_url, video_id)


class FacebookPluginsVideoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'

    _TESTS = [{
        'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560',
        'md5': '5954e92cdfe51fe5782ae9bda7058a07',
        'info_dict': {
            'id': '10154383743583686',
            'ext': 'mp4',
            # TODO: Fix title, uploader
            'title': 'What to do during the haze?',
            'uploader': 'Gov.sg',
            'upload_date': '20160826',
            'timestamp': 1472184808,
        },
        'add_ie': [FacebookIE.ie_key()],
    }, {
        'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104',
        'only_matching': True,
    }, {
        'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        return self.url_result(
            compat_urllib_parse_unquote(self._match_id(url)),
            FacebookIE.ie_key())


class FacebookRedirectURLIE(InfoExtractor):
    IE_DESC = False  # Do not list
    _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/flx/warn[/?]'
    _TESTS = [{
        'url': 'https://www.facebook.com/flx/warn/?h=TAQHsoToz&u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&s=1',
        'info_dict': {
            'id': 'pO8h3EaFRdo',
            'ext': 'mp4',
            'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
            'description': 'md5:2d713ccbb45b686a1888397b2c77ca6b',
            'channel_id': 'UCGBpxWJr9FNOcFYA5GkKrMg',
            'playable_in_embed': True,
            'categories': ['Music'],
            'channel': 'Boiler Room',
            'uploader_id': 'brtvofficial',
            'uploader': 'Boiler Room',
            'tags': 'count:11',
            'duration': 3332,
            'live_status': 'not_live',
            'thumbnail': 'https://i.ytimg.com/vi/pO8h3EaFRdo/maxresdefault.jpg',
            'channel_url': 'https://www.youtube.com/channel/UCGBpxWJr9FNOcFYA5GkKrMg',
            'availability': 'public',
            'uploader_url': 'http://www.youtube.com/user/brtvofficial',
            'upload_date': '20150917',
            'age_limit': 0,
            'view_count': int,
            'like_count': int,
        },
        'add_ie': ['Youtube'],
        'params': {'skip_download': 'Youtube'},
    }]

    def _real_extract(self, url):
        redirect_url = url_or_none(parse_qs(url).get('u', [None])[-1])
        if not redirect_url:
            raise ExtractorError('Invalid facebook redirect URL', expected=True)
        return self.url_result(redirect_url)


class FacebookReelIE(InfoExtractor):
    _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/reel/(?P<id>\d+)'
    IE_NAME = 'facebook:reel'

    _TESTS = [{
        'url': 'https://www.facebook.com/reel/1195289147628387',
        'md5': 'f13dd37f2633595982db5ed8765474d3',
        'info_dict': {
            'id': '1195289147628387',
            'ext': 'mp4',
            'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
            'description': 'md5:22f03309b216ac84720183961441d8db',
            'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
            'uploader_id': '100040874179269',
            'duration': 9.579,
            'timestamp': 1637502609,
            'upload_date': '20211121',
            'thumbnail': r're:^https?://.*',
        }
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        return self.url_result(
            f'https://m.facebook.com/watch/?v={video_id}&_rdr', FacebookIE, video_id)


class FacebookAdsIE(InfoExtractor):
    _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/ads/library/?\?(?:[^#]+&)?id=(?P<id>\d+)'
    IE_NAME = 'facebook:ads'

    _TESTS = [{
        'url': 'https://www.facebook.com/ads/library/?id=899206155126718',
        'info_dict': {
            'id': '899206155126718',
            'ext': 'mp4',
            'title': 'video by Kandao',
            'uploader': 'Kandao',
            'uploader_id': '774114102743284',
            'uploader_url': r're:^https?://.*',
            'timestamp': 1702548330,
            'thumbnail': r're:^https?://.*',
            'upload_date': '20231214',
            'like_count': int,
        }
    }, {
        'url': 'https://www.facebook.com/ads/library/?id=893637265423481',
        'info_dict': {
            'id': '893637265423481',
            'title': 'Jusqu\u2019\u00e0 -25% sur une s\u00e9lection de vins p\u00e9tillants italiens ',
            'uploader': 'Eataly Paris Marais',
            'uploader_id': '2086668958314152',
            'uploader_url': r're:^https?://.*',
            'timestamp': 1703571529,
            'upload_date': '20231226',
            'like_count': int,
        },
        'playlist_count': 3,
    }, {
        'url': 'https://es-la.facebook.com/ads/library/?id=901230958115569',
        'only_matching': True,
    }, {
        'url': 'https://m.facebook.com/ads/library/?id=901230958115569',
        'only_matching': True,
    }]

    _FORMATS_MAP = {
        'watermarked_video_sd_url': ('sd-wmk', 'SD, watermarked'),
        'video_sd_url': ('sd', None),
        'watermarked_video_hd_url': ('hd-wmk', 'HD, watermarked'),
        'video_hd_url': ('hd', None),
    }

    def _extract_formats(self, video_dict):
        formats = []
        for format_key, format_url in traverse_obj(video_dict, (
            {dict.items}, lambda _, v: v[0] in self._FORMATS_MAP and url_or_none(v[1])
        )):
            formats.append({
                'format_id': self._FORMATS_MAP[format_key][0],
                'format_note': self._FORMATS_MAP[format_key][1],
                'url': format_url,
                'ext': 'mp4',
                'quality': qualities(tuple(self._FORMATS_MAP))(format_key),
            })
        return formats

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        post_data = [self._parse_json(j, video_id, fatal=False)
                     for j in re.findall(r's\.handle\(({.*})\);requireLazy\(', webpage)]
        data = traverse_obj(post_data, (
            ..., 'require', ..., ..., ..., 'props', 'deeplinkAdCard', 'snapshot', {dict}), get_all=False)
        if not data:
            raise ExtractorError('Unable to extract ad data')

        title = data.get('title')
        if not title or title == '{{product.name}}':
            title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data)

        info_dict = traverse_obj(data, {
            'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}),
            'uploader': ('page_name', {str}),
            'uploader_id': ('page_id', {str_or_none}),
            'uploader_url': ('page_profile_uri', {url_or_none}),
            'timestamp': ('creation_time', {int_or_none}),
            'like_count': ('page_like_count', {int_or_none}),
        })

        entries = []
        for idx, entry in enumerate(traverse_obj(
            data, (('videos', 'cards'), lambda _, v: any([url_or_none(v[f]) for f in self._FORMATS_MAP]))), 1
        ):
            entries.append({
                'id': f'{video_id}_{idx}',
                'title': entry.get('title') or title,
                'description': entry.get('link_description') or info_dict.get('description'),
                'thumbnail': url_or_none(entry.get('video_preview_image_url')),
                'formats': self._extract_formats(entry),
            })

        if len(entries) == 1:
            info_dict.update(entries[0])

        elif len(entries) > 1:
            info_dict.update({
                'title': entries[0]['title'],
                'entries': entries,
                '_type': 'playlist',
            })

        info_dict['id'] = video_id

        return info_dict
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								import json
-												Move Facebook into its own file

											
										
										
											12 years ago
+								import re
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2 years ago
+								import urllib.parse
-												Move Facebook into its own file

											
										
										
											12 years ago
 								from .common import InfoExtractor
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											10 years ago
+								from ..compat import (
-												[facebook] Add support for DASH manifests

											
										
										
											9 years ago
+								    compat_etree_fromstring,
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								    compat_str,
-												[facebook] Use compat_urllib_parse_unquote

											
										
										
											9 years ago
+								    compat_urllib_parse_unquote,
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											10 years ago
+								)
-												[compat, networking] Deprecate old functions (#2861)

Authored by: coletdjnz, pukkandan

											
										
										
											1 year ago
+								from ..networking import Request
 								from ..networking.exceptions import network_exceptions
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											10 years ago
+								from ..utils import (
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2 years ago
+								    ExtractorError,
-												[facebook] Fix extraction (closes #11926)

											
										
										
											8 years ago
+								    clean_html,
-												[facebook] Parse dash manifests

											
										
										
											3 years ago
+								    determine_ext,
-												Rename error_to_str to error_to_compat_str

											
										
										
											9 years ago
+								    error_to_compat_str,
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								    float_or_none,
-												[ie/facebook] Improve subtitles extraction (#8296)

Authored by: kclauhk
											
										
										
											11 months ago
+								    format_field,
-												[facebook] Fix extraction (closes #11926)

											
										
										
											8 years ago
+								    get_element_by_id,
-												[utils] Add `get_first`

											
										
										
											3 years ago
+								    get_first,
-												[facebook] Extract timestamp (Closes #10508)

											
										
										
											8 years ago
+								    int_or_none,
-												[ie/facebook:ads] Add extractor (#8870)

Closes #8083
Authored by: kclauhk
											
										
										
											9 months ago
+								    join_nonempty,
-												[facebook] Fix extraction (closes #11926)

											
										
										
											8 years ago
+								    js_to_json,
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								    merge_dicts,
-												[facebook] Extract view count and update tests (closes #16942)

											
										
										
											6 years ago
+								    parse_count,
-												[Facebook] Handle redirect URLs
Closes #1035

											
										
										
											3 years ago
+								    parse_qs,
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								    qualities,
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								    str_or_none,
-												[facebook] Improve title and uploader extraction

Closes #1943, closes #795

											
										
										
											3 years ago
+								    traverse_obj,
-												[facebook] Fix extraction (closes #11926)

											
										
										
											8 years ago
+								    try_get,
-												[Facebook] Handle redirect URLs
Closes #1035

											
										
										
											3 years ago
+								    url_or_none,
-												[facebook] Make thumbnail and duration optional

Fixes #4425.
Looks like both properties aren't given to us anymore. For now, just fall back to not returning them.

											
										
										
											10 years ago
+								    urlencode_postdata,
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								    urljoin,
-												[facebook] Fix extraction from groups

Closes #2264, related: #2320

											
										
										
											3 years ago
+								    variadic,
-												Move Facebook into its own file

											
										
										
											12 years ago
+								)
 								class FacebookIE(InfoExtractor):
-												[facebook] Add support for embeds

Example URL: http://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html

											
										
										
											11 years ago
+								    _VALID_URL = r'''(?x)
-												[facebook] Add shortcut and reformat _VALID_URL

											
										
										
											9 years ago
+								                (?:
 								                    https?://
-												[facebook] Update onion URL (#788)

Authored by: Derkades
											
										
										
											3 years ago
+								                        (?:[\w-]+\.)?(?:facebook\.com|facebookwkhpilnemxj7asaniu7vnjjbiltxjqhye3mhbshg7kx5tfyd\.onion)/
-												[facebook] Add shortcut and reformat _VALID_URL

											
										
										
											9 years ago
+								                        (?:[^#]*?\#!/)?
 								                        (?:
 								                            (?:
-												[ie/facebook] Support permalink URLs (#9061)

Authored by: kclauhk
											
										
										
											9 months ago
+								                                permalink\.php|
-												[facebook] Add shortcut and reformat _VALID_URL

											
										
										
											9 years ago
+								                                video/video\.php|
 								                                photo\.php|
 								                                video\.php|
-												[facebook] Support mobile URLs (closes #8638)

											
										
										
											9 years ago
+								                                video/embed|
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								                                story\.php|
 								                                watch(?:/live)?/?
-												[facebook] Support mobile URLs (closes #8638)

											
										
										
											9 years ago
+								                            )\?(?:.*?)(?:v|video_id|story_fbid)=|
-												[facebook] Merge FacebookPostIE into FacebookIE

Fixes #8713

											
										
										
											9 years ago
+								                            [^/]+/videos/(?:[^/]+/)?|
-												[facebook] Support videos in groups

Viewing/Downloading videos in groups requires logging in, even for
those in public groups.

Fixes #6951.

											
										
										
											9 years ago
+								                            [^/]+/posts/|
-												[ie/facebook] Support events (#9055)

Closes #5355
Authored by: kclauhk
											
										
										
											9 months ago
+								                            events/(?:[^/]+/)?|
-												[ie/Facebook] Fix Memories extraction (#8681)

- Support group /posts/ URLs
- Raise a proper error message if no formats are found

Closes #8669
Authored by: kclauhk
											
										
										
											10 months ago
+								                            groups/[^/]+/(?:permalink|posts)/|
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								                            watchparty/
-												[facebook] Add shortcut and reformat _VALID_URL

											
										
										
											9 years ago
+								                        )|
 								                    facebook:
 								                )
-												[ie/Facebook] Add new ID format (#3824)

Closes #3496
Authored by: Wikidepia, kclauhk

Co-authored-by: kclauhk <78251477+kclauhk@users.noreply.github.com>
											
										
										
											10 months ago
+								                (?P<id>pfbid[A-Za-z0-9]+|\d+)
-												[facebook] Add shortcut and reformat _VALID_URL

											
										
										
											9 years ago
+								                '''
-												[extractors] Use new framework for existing embeds (#4307)

`Brightcove` is difficult to migrate because it's subclasses may depend
on the signature of the current functions. So it is left as-is for now

Note: Tests have not been migrated

											
										
										
											2 years ago
+								    _EMBED_REGEX = [
 								        r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
 								        # Facebook API embed https://developers.facebook.com/docs/plugins/embedded-video-player
 								        r'''(?x)<div[^>]+
 								                class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
 								                data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''',
 								    ]
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											11 years ago
+								    _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
 								    _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
-												Move Facebook into its own file

											
										
										
											12 years ago
+								    _NETRC_MACHINE = 'facebook'
-												[facebook] Modernize

											
										
										
											11 years ago
+								    IE_NAME = 'facebook'
-												[facebook] Add support for DASH manifests

											
										
										
											9 years ago
-												[facebook] Merge FacebookPostIE into FacebookIE

Fixes #8713

											
										
										
											9 years ago
+								    _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
-												[facebook] fix tahoe request(closes #17171)

											
										
										
											6 years ago
+								    _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
-												[facebook] Merge FacebookPostIE into FacebookIE

Fixes #8713

											
										
										
											9 years ago
-												[facebook] Match video.php URLs

											
										
										
											10 years ago
+								    _TESTS = [{
-												[ie/facebook] Fix webpage extraction (#7890)

Closes #7901
Authored by: ringus1
											
										
										
											1 year ago
+								        'url': 'https://www.facebook.com/radiokicksfm/videos/3676516585958356/',
 								        'info_dict': {
 								            'id': '3676516585958356',
 								            'ext': 'mp4',
 								            'title': 'dr Adam Przygoda',
 								            'description': 'md5:34675bda53336b1d16400265c2bb9b3b',
 								            'uploader': 'RADIO KICKS FM',
 								            'upload_date': '20230818',
 								            'timestamp': 1692346159,
 								            'thumbnail': r're:^https?://.*',
 								            'uploader_id': '100063551323670',
 								            'duration': 3132.184,
 								            'view_count': int,
 								            'concurrent_view_count': 0,
 								        },
 								    }, {
-												[facebook] Fix and caption if title is empty

											
										
										
											10 years ago
+								        'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
 								        'md5': '6a40d33c0eccbb1af76cf0485a052659',
-												[facebook] Modernize

											
										
										
											11 years ago
+								        'info_dict': {
-												[facebook] Fix and caption if title is empty

											
										
										
											10 years ago
+								            'id': '637842556329505',
-												[facebook] Modernize

											
										
										
											11 years ago
+								            'ext': 'mp4',
-												[facebook] Fix test case

											
										
										
											10 years ago
+								            'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
-												[facebook] add uploader value to the tests

											
										
										
											9 years ago
+								            'uploader': 'Tennis on Facebook',
-												[facebook] Extract timestamp (Closes #10508)

											
										
										
											8 years ago
+								            'upload_date': '20140908',
 								            'timestamp': 1410199200,
-												[facebook] Support thumbnails (closes #14416)

											
										
										
											7 years ago
+								        },
 								        'skip': 'Requires logging in',
-												[facebook] Fix support for untitled videos (Fixes #3757)

											
										
										
											10 years ago
+								    }, {
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        # data.video
-												[facebook] Fix support for untitled videos (Fixes #3757)

											
										
										
											10 years ago
+								        'url': 'https://www.facebook.com/video.php?v=274175099429670',
 								        'info_dict': {
 								            'id': '274175099429670',
 								            'ext': 'mp4',
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								            'title': 'Asif',
 								            'description': '',
-												[facebook] add uploader value to the tests

											
										
										
											9 years ago
+								            'uploader': 'Asif Nawab Butt',
-												[facebook] Extract timestamp (Closes #10508)

											
										
										
											8 years ago
+								            'upload_date': '20140506',
 								            'timestamp': 1399398998,
-												[facebook] Support thumbnails (closes #14416)

											
										
										
											7 years ago
+								            'thumbnail': r're:^https?://.*',
-												[ie/facebook] Fix webpage extraction (#7890)

Closes #7901
Authored by: ringus1
											
										
										
											1 year ago
+								            'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								            'duration': 131.03,
 								            'concurrent_view_count': int,
-												[facebook] Move the title extraction warning below (fixes #5820)

											
										
										
											10 years ago
+								        },
-												[facebook] Add support for DASH manifests

											
										
										
											9 years ago
+								    }, {
 								        'note': 'Video with DASH manifest',
 								        'url': 'https://www.facebook.com/video.php?v=957955867617029',
-												[facebook] Extract timestamp (Closes #10508)

											
										
										
											8 years ago
+								        'md5': 'b2c28d528273b323abe5c6ab59f0f030',
-												[facebook] Add support for DASH manifests

											
										
										
											9 years ago
+								        'info_dict': {
 								            'id': '957955867617029',
 								            'ext': 'mp4',
 								            'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
 								            'uploader': 'Demy de Zeeuw',
-												[facebook] Extract timestamp (Closes #10508)

											
										
										
											8 years ago
+								            'upload_date': '20160110',
 								            'timestamp': 1452431627,
-												[facebook] Add support for DASH manifests

											
										
										
											9 years ago
+								        },
-												[facebook] Support thumbnails (closes #14416)

											
										
										
											7 years ago
+								        'skip': 'Requires logging in',
-												[facebook] Merge FacebookPostIE into FacebookIE

Fixes #8713

											
										
										
											9 years ago
+								    }, {
 								        'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
 								        'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
 								        'info_dict': {
 								            'id': '544765982287235',
 								            'ext': 'mp4',
 								            'title': '"What are you doing running in the snow?"',
 								            'uploader': 'FailArmy',
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											8 years ago
+								        },
 								        'skip': 'Video gone',
-												[facebook] Fix for m.facebook.com URLs

											
										
										
											9 years ago
+								    }, {
 								        'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
 								        'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
 								        'info_dict': {
 								            'id': '1035862816472149',
 								            'ext': 'mp4',
 								            'title': 'What the Flock Is Going On In New Zealand  Credit: ViralHog',
 								            'uploader': 'S. Saint',
 								        },
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											8 years ago
+								        'skip': 'Video gone',
-												[facebook] Handle escaped swf params

Fixes #8713

											
										
										
											9 years ago
+								    }, {
 								        'note': 'swf params escaped',
 								        'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
 								        'md5': '97ba073838964d12c70566e0085c2b91',
 								        'info_dict': {
 								            'id': '10153664894881749',
 								            'ext': 'mp4',
-												[facebook] Support thumbnails (closes #14416)

											
										
										
											7 years ago
+								            'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...',
 								            'thumbnail': r're:^https?://.*',
 								            'timestamp': 1456259628,
 								            'upload_date': '20160223',
 								            'uploader': 'Barack Obama',
-												[facebook] Handle escaped swf params

Fixes #8713

											
										
										
											9 years ago
+								        },
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								        'skip': 'Gif on giphy.com gone',
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											8 years ago
+								    }, {
 								        # have 1080P, but only up to 720p in swf params
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        # data.video.story.attachments[].media
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											8 years ago
+								        'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								        'md5': 'ca63897a90c9452efee5f8c40d080e25',
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											8 years ago
+								        'info_dict': {
 								            'id': '10155529876156509',
 								            'ext': 'mp4',
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								            'title': 'Holocaust survivor becomes US citizen',
 								            'description': 'She survived the holocaust — and years later, she’s getting her citizenship so she can vote for Hillary Clinton http://cnn.it/2eERh5f',
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											8 years ago
+								            'timestamp': 1477818095,
 								            'upload_date': '20161030',
 								            'uploader': 'CNN',
-												[facebook] Support thumbnails (closes #14416)

											
										
										
											7 years ago
+								            'thumbnail': r're:^https?://.*',
-												[facebook] Extract view count and update tests (closes #16942)

											
										
										
											6 years ago
+								            'view_count': int,
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								            'uploader_id': '100059479812265',
 								            'concurrent_view_count': int,
 								            'duration': 44.478,
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											8 years ago
+								        },
-												[facebook] Improve JS data regex (closes #12042)

											
										
										
											8 years ago
+								    }, {
 								        # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
-												[facebook] Improve JS data regex (closes #12042)

											
										
										
											8 years ago
+								        'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
 								        'info_dict': {
 								            'id': '1417995061575415',
 								            'ext': 'mp4',
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								            'title': 'Довгоочікуване відео | By Yaroslav - Facebook',
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								            'description': 'Довгоочікуване відео',
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								            'timestamp': 1486648217,
-												[facebook] Improve JS data regex (closes #12042)

											
										
										
											8 years ago
+								            'upload_date': '20170209',
 								            'uploader': 'Yaroslav Korpan',
-												[ie/facebook] Fix webpage extraction (#7890)

Closes #7901
Authored by: ringus1
											
										
										
											1 year ago
+								            'uploader_id': 'pfbid06AScABAWcW91qpiuGrLt99Ef9tvwHoXP6t8KeFYEqkSfreMtfa9nTveh8b2ZEVSWl',
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								            'concurrent_view_count': int,
 								            'thumbnail': r're:^https?://.*',
 								            'view_count': int,
 								            'duration': 11736.446,
-												[facebook] Improve JS data regex (closes #12042)

											
										
										
											8 years ago
+								        },
 								        'params': {
 								            'skip_download': True,
 								        },
-												[facebook] Relax video id matching (closes #11017, closes #12055, closes #12056)

											
										
										
											8 years ago
+								    }, {
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								        # FIXME
-												[facebook] Relax video id matching (closes #11017, closes #12055, closes #12056)

											
										
										
											8 years ago
+								        'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
 								        'info_dict': {
 								            'id': '1072691702860471',
 								            'ext': 'mp4',
 								            'title': 'md5:ae2d22a93fbb12dad20dc393a869739d',
 								            'timestamp': 1477305000,
 								            'upload_date': '20161024',
 								            'uploader': 'La Guía Del Varón',
-												[facebook] Support thumbnails (closes #14416)

											
										
										
											7 years ago
+								            'thumbnail': r're:^https?://.*',
-												[facebook] Relax video id matching (closes #11017, closes #12055, closes #12056)

											
										
										
											8 years ago
+								        },
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								        'skip': 'Requires logging in',
-												[facebook] Relax video id matching (closes #11017, closes #12055, closes #12056)

											
										
										
											8 years ago
+								    }, {
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
-												[facebook] Relax video id matching (closes #11017, closes #12055, closes #12056)

											
										
										
											8 years ago
+								        'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
 								        'info_dict': {
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								            'id': '202882990186699',
-												[facebook] Relax video id matching (closes #11017, closes #12055, closes #12056)

											
										
										
											8 years ago
+								            'ext': 'mp4',
-												[facebook] Fix extraction from groups

Closes #2264, related: #2320

											
										
										
											3 years ago
+								            'title': 'birb (O v O") | Hello? Yes your uber ride is here',
 								            'description': 'Hello? Yes your uber ride is here * Jukin Media Verified * Find this video and others like it by visiting...',
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								            'timestamp': 1486035513,
-												[facebook] Relax video id matching (closes #11017, closes #12055, closes #12056)

											
										
										
											8 years ago
+								            'upload_date': '20170202',
 								            'uploader': 'Elisabeth Ahtn',
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								            'uploader_id': '100013949973717',
-												[facebook] Relax video id matching (closes #11017, closes #12055, closes #12056)

											
										
										
											8 years ago
+								        },
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								        'skip': 'Requires logging in',
-												[ie/Facebook] Fix Memories extraction (#8681)

- Support group /posts/ URLs
- Raise a proper error message if no formats are found

Closes #8669
Authored by: kclauhk
											
										
										
											10 months ago
+								    }, {
 								        # data.node.comet_sections.content.story.attachments[].throwbackStyles.attachment_target_renderer.attachment.target.attachments[].styles.attachment.media
 								        'url': 'https://www.facebook.com/groups/1645456212344334/posts/3737828833107051/',
 								        'info_dict': {
 								            'id': '1569199726448814',
 								            'ext': 'mp4',
 								            'title': 'Pence MUST GO!',
 								            'description': 'Vickie Gentry shared a memory.',
 								            'timestamp': 1511548260,
 								            'upload_date': '20171124',
 								            'uploader': 'Vickie Gentry',
 								            'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
 								            'thumbnail': r're:^https?://.*',
 								            'duration': 148.435,
 								        },
-												[ie/Facebook] Add new ID format (#3824)

Closes #3496
Authored by: Wikidepia, kclauhk

Co-authored-by: kclauhk <78251477+kclauhk@users.noreply.github.com>
											
										
										
											10 months ago
+								    }, {
-												[ie/facebook] Support permalink URLs (#9061)

Authored by: kclauhk
											
										
										
											9 months ago
+								        # data.node.comet_sections.content.story.attachments[].styles.attachment.media
-												[ie/Facebook] Add new ID format (#3824)

Closes #3496
Authored by: Wikidepia, kclauhk

Co-authored-by: kclauhk <78251477+kclauhk@users.noreply.github.com>
											
										
										
											10 months ago
+								        'url': 'https://www.facebook.com/attn/posts/pfbid0j1Czf2gGDVqeQ8KiMLFm3pWN8GxsQmeRrVhimWDzMuKQoR8r4b1knNsejELmUgyhl',
 								        'info_dict': {
 								            'id': '6968553779868435',
 								            'ext': 'mp4',
 								            'description': 'md5:2f2fcf93e97ac00244fe64521bbdb0cb',
 								            'uploader': 'ATTN:',
 								            'upload_date': '20231207',
 								            'title': 'ATTN:',
 								            'duration': 132.675,
 								            'uploader_id': '100064451419378',
 								            'view_count': int,
 								            'thumbnail': r're:^https?://.*',
 								            'timestamp': 1701975646,
 								        },
-												[ie/facebook] Support permalink URLs (#9061)

Authored by: kclauhk
											
										
										
											9 months ago
+								    }, {
 								        # data.node.comet_sections.content.story.attachments[].styles.attachment.media
 								        'url': 'https://www.facebook.com/permalink.php?story_fbid=pfbid0fqQuVEQyXRa9Dp4RcaTR14KHU3uULHV1EK7eckNXSH63JMuoALsAvVCJ97zAGitil&id=100068861234290',
 								        'info_dict': {
 								            'id': '270103405756416',
 								            'ext': 'mp4',
 								            'title': 'Lela Evans',
 								            'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
 								            'thumbnail': r're:^https?://.*',
 								            'uploader': 'Lela Evans',
 								            'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
 								            'upload_date': '20231228',
 								            'timestamp': 1703804085,
 								            'duration': 394.347,
 								            'view_count': int,
 								        },
-												[ie/Facebook] Add new ID format (#3824)

Closes #3496
Authored by: Wikidepia, kclauhk

Co-authored-by: kclauhk <78251477+kclauhk@users.noreply.github.com>
											
										
										
											10 months ago
+								    }, {
 								        'url': 'https://www.facebook.com/story.php?story_fbid=pfbid0Fnzhm8UuzjBYpPMNFzaSpFE9UmLdU4fJN8qTANi1Dmtj5q7DNrL5NERXfsAzDEV7l&id=100073071055552',
 								        'only_matching': True,
-												[facebook] Match video.php URLs

											
										
										
											10 years ago
+								    }, {
 								        'url': 'https://www.facebook.com/video.php?v=10204634152394104',
 								        'only_matching': True,
-												[facebook] Extend _VALID_URL

											
										
										
											10 years ago
+								    }, {
 								        'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',
 								        'only_matching': True,
-												[facebook] Extend _VALID_URL take 2 (#5120)

											
										
										
											10 years ago
+								    }, {
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        # data.mediaset.currMedia.edges
-												[facebook] Extend _VALID_URL take 2 (#5120)

											
										
										
											10 years ago
+								        'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
 								        'only_matching': True,
-												[facebook] Add shortcut and reformat _VALID_URL

											
										
										
											9 years ago
+								    }, {
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        # data.video.story.attachments[].media
-												[facebook] Add shortcut and reformat _VALID_URL

											
										
										
											9 years ago
+								        'url': 'facebook:544765982287235',
 								        'only_matching': True,
-												[facebook] Support videos in groups

Viewing/Downloading videos in groups requires logging in, even for
those in public groups.

Fixes #6951.

											
										
										
											9 years ago
+								    }, {
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
-												[facebook] Support videos in groups

Viewing/Downloading videos in groups requires logging in, even for
those in public groups.

Fixes #6951.

											
										
										
											9 years ago
+								        'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
 								        'only_matching': True,
-												[facebook] Relax _VALID_URL (Closes #10151)

											
										
										
											8 years ago
+								    }, {
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        # data.video.creation_story.attachments[].media
-												[facebook] Relax _VALID_URL (Closes #10151)

											
										
										
											8 years ago
+								        'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
 								        'only_matching': True,
-												[facebook] Recognize .onion URLs (closes #11443)

											
										
										
											8 years ago
+								    }, {
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        # data.video
-												[facebook] Update onion URL (#788)

Authored by: Derkades
											
										
										
											3 years ago
+								        'url': 'https://www.facebookwkhpilnemxj7asaniu7vnjjbiltxjqhye3mhbshg7kx5tfyd.onion/video.php?v=274175099429670',
-												[facebook] Recognize .onion URLs (closes #11443)

											
										
										
											8 years ago
+								        'only_matching': True,
-												[facebook] Make title optional (closes #12443)

											
										
										
											8 years ago
+								    }, {
 								        # no title
 								        'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
 								        'only_matching': True,
-												[facebook] Add support for tahoe player videos (closes #15441)

Specific videos appear to use a newer/different player, this requires a
second request for the video data as the initial request is missing the
specified data.

Additionally these videos have different page content for the uploader
value, which is stored in the `<meta property="og:title"...>` element of
the initial request.

											
										
										
											7 years ago
+								    }, {
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        # data.video
-												[facebook] Add support for tahoe player videos (closes #15441)

Specific videos appear to use a newer/different player, this requires a
second request for the video data as the initial request is missing the
specified data.

Additionally these videos have different page content for the uploader
value, which is stored in the `<meta property="og:title"...>` element of
the initial request.

											
										
										
											7 years ago
+								        'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
 								        'info_dict': {
 								            'id': '359649331226507',
 								            'ext': 'mp4',
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								            'title': 'Fnatic vs. EG - Group A - Opening Match - ESL One Birmingham Day 1',
 								            'description': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
 								            'timestamp': 1527084179,
 								            'upload_date': '20180523',
-												[facebook] Add support for tahoe player videos (closes #15441)

Specific videos appear to use a newer/different player, this requires a
second request for the video data as the initial request is missing the
specified data.

Additionally these videos have different page content for the uploader
value, which is stored in the `<meta property="og:title"...>` element of
the initial request.

											
										
										
											7 years ago
+								            'uploader': 'ESL One Dota 2',
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								            'uploader_id': '100066514874195',
 								            'duration': 4524.212,
 								            'view_count': int,
 								            'thumbnail': r're:^https?://.*',
 								            'concurrent_view_count': int,
-												[facebook] Add support for tahoe player videos (closes #15441)

Specific videos appear to use a newer/different player, this requires a
second request for the video data as the initial request is missing the
specified data.

Additionally these videos have different page content for the uploader
value, which is stored in the `<meta property="og:title"...>` element of
the initial request.

											
										
										
											7 years ago
+								        },
 								        'params': {
 								            'skip_download': True,
 								        },
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								    }, {
 								        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
 								        'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/',
 								        'info_dict': {
 								            'id': '106560053808006',
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								            'ext': 'mp4',
 								            'title': 'Josef',
 								            'thumbnail': r're:^https?://.*',
 								            'concurrent_view_count': int,
-												[ie/facebook] Fix webpage extraction (#7890)

Closes #7901
Authored by: ringus1
											
										
										
											1 year ago
+								            'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								            'timestamp': 1549275572,
 								            'duration': 3.413,
 								            'uploader': 'Josef Novak',
 								            'description': '',
 								            'upload_date': '20190204',
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        },
 								    }, {
 								        # data.video.story.attachments[].media
 								        'url': 'https://www.facebook.com/watch/?v=647537299265662',
 								        'only_matching': True,
 								    }, {
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								        # FIXME: https://github.com/yt-dlp/yt-dlp/issues/542
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
 								        'url': 'https://www.facebook.com/PankajShahLondon/posts/10157667649866271',
 								        'info_dict': {
 								            'id': '10157667649866271',
 								        },
 								        'playlist_count': 3,
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								        'skip': 'Requires logging in',
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								    }, {
 								        # data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media
 								        'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330',
 								        'info_dict': {
 								            'id': '117576630041613',
 								            'ext': 'mp4',
 								            # TODO: title can be extracted from video page
 								            'title': 'Facebook video #117576630041613',
 								            'uploader_id': '189393014416438',
 								            'upload_date': '20201123',
 								            'timestamp': 1606162592,
 								        },
 								        'skip': 'Requires logging in',
 								    }, {
 								        # node.comet_sections.content.story.attached_story.attachments.style_type_renderer.attachment.media
 								        'url': 'https://www.facebook.com/groups/ateistiskselskab/permalink/10154930137678856/',
 								        'info_dict': {
 								            'id': '211567722618337',
 								            'ext': 'mp4',
 								            'title': 'Facebook video #211567722618337',
 								            'uploader_id': '127875227654254',
 								            'upload_date': '20161122',
 								            'timestamp': 1479793574,
 								        },
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								        'skip': 'No video',
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								    }, {
 								        # data.video.creation_story.attachments[].media
 								        'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
 								        'only_matching': True,
 								    }, {
 								        'url': 'https://www.facebook.com/watchparty/211641140192478',
 								        'info_dict': {
 								            'id': '211641140192478',
 								        },
 								        'playlist_count': 1,
 								        'skip': 'Requires logging in',
-												[ie/facebook] Support events (#9055)

Closes #5355
Authored by: kclauhk
											
										
										
											9 months ago
+								    }, {
 								        # data.event.cover_media_renderer.cover_video
 								        'url': 'https://m.facebook.com/events/1509582499515440',
 								        'info_dict': {
 								            'id': '637246984455045',
 								            'ext': 'mp4',
 								            'title': 'ANALISI IN CAMPO OSCURO " Coaguli nel sangue dei vaccinati"',
 								            'description': 'Other event by Comitato Liberi Pensatori on Tuesday, October 18 2022',
 								            'thumbnail': r're:^https?://.*',
 								            'uploader': 'Comitato Liberi Pensatori',
 								            'uploader_id': '100065709540881',
 								        },
-												[facebook] Match video.php URLs

											
										
										
											10 years ago
+								    }]
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								    _SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
 								    _api_config = {
 								        'graphURI': '/api/graphql/'
 								    }
-												Move Facebook into its own file

											
										
										
											12 years ago
-												[extractor] Add `_perform_login` function (#2943)

* Adds new functions `_initialize_pre_login` and `_perform_login` as part of the extractor API
* Adds `ie.supports_login` to the public API
											
										
										
											3 years ago
+								    def _perform_login(self, username, password):
-												[compat, networking] Deprecate old functions (#2861)

Authored by: coletdjnz, pukkandan

											
										
										
											1 year ago
+								        login_page_req = Request(self._LOGIN_URL)
-												[facebook] Fix authentication

											
										
										
											9 years ago
+								        self._set_cookie('facebook.com', 'locale', 'en_US')
-												[facebook] Fix login process

It was broken and didn't work in python 3.
And use `_download_webpage` instead of `compat_urllib_request.urlopen`.

											
										
										
											11 years ago
+								        login_page = self._download_webpage(login_page_req, None,
-												PEP8: applied even more rules

											
										
										
											10 years ago
+								                                            note='Downloading login page',
 								                                            errnote='Unable to download login page')
-												[facebook] Fix login detection (#2505)

											
										
										
											11 years ago
+								        lsd = self._search_regex(
-												[facebook] Correct regexp

											
										
										
											11 years ago
+								            r'<input type="hidden" name="lsd" value="([^"]*)"',
-												[facebook] Fix login detection (#2505)

											
										
										
											11 years ago
+								            login_page, 'lsd')
-												[facebook] Modernize

											
										
										
											11 years ago
+								        lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											11 years ago
-												Move Facebook into its own file

											
										
										
											12 years ago
+								        login_form = {
-												[extractor] Add `_perform_login` function (#2943)

* Adds new functions `_initialize_pre_login` and `_perform_login` as part of the extractor API
* Adds `ie.supports_login` to the public API
											
										
										
											3 years ago
+								            'email': username,
-												Move Facebook into its own file

											
										
										
											12 years ago
+								            'pass': password,
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											11 years ago
+								            'lsd': lsd,
 								            'lgnrnd': lgnrnd,
 								            'next': 'http://facebook.com/home.php',
 								            'default_persistent': '0',
 								            'legacy_return': '1',
 								            'timezone': '-60',
 								            'trynum': '1',
-												Fix all PEP8 issues except E501

											
										
										
											10 years ago
+								        }
-												[compat, networking] Deprecate old functions (#2861)

Authored by: coletdjnz, pukkandan

											
										
										
											1 year ago
+								        request = Request(self._LOGIN_URL, urlencode_postdata(login_form))
 								        request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
-												Move Facebook into its own file

											
										
										
											12 years ago
+								        try:
-												[facebook] Fix login process

It was broken and didn't work in python 3.
And use `_download_webpage` instead of `compat_urllib_request.urlopen`.

											
										
										
											11 years ago
+								            login_results = self._download_webpage(request, None,
-												PEP8: applied even more rules

											
										
										
											10 years ago
+								                                                   note='Logging in', errnote='unable to fetch login page')
-												Move Facebook into its own file

											
										
										
											12 years ago
+								            if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
-												[facebook] Extract login error

											
										
										
											9 years ago
+								                error = self._html_search_regex(
 								                    r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>',
 								                    login_results, 'login error', default=None, group='error')
 								                if error:
 								                    raise ExtractorError('Unable to login: %s' % error, expected=True)
-												Fix inconsistent use of `report_warning`

											
										
										
											4 years ago
+								                self.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.')
-												Move Facebook into its own file

											
										
										
											12 years ago
+								                return
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											11 years ago
-												[facebook] Fix authentication

											
										
										
											9 years ago
+								            fb_dtsg = self._search_regex(
 								                r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None)
 								            h = self._search_regex(
 								                r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None)
 								            if not fb_dtsg or not h:
 								                return
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											11 years ago
+								            check_form = {
-												[facebook] Fix authentication

											
										
										
											9 years ago
+								                'fb_dtsg': fb_dtsg,
 								                'h': h,
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											11 years ago
+								                'name_action_selected': 'dont_save',
 								            }
-												[compat, networking] Deprecate old functions (#2861)

Authored by: coletdjnz, pukkandan

											
										
										
											1 year ago
+								            check_req = Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
 								            check_req.headers['Content-Type'] = 'application/x-www-form-urlencoded'
-												[facebook] Fix login process

It was broken and didn't work in python 3.
And use `_download_webpage` instead of `compat_urllib_request.urlopen`.

											
										
										
											11 years ago
+								            check_response = self._download_webpage(check_req, None,
-												PEP8: applied even more rules

											
										
										
											10 years ago
+								                                                    note='Confirming login')
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											11 years ago
+								            if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
-												Fix inconsistent use of `report_warning`

											
										
										
											4 years ago
+								                self.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.')
-												[utils] Add `network_exceptions`

											
										
										
											4 years ago
+								        except network_exceptions as err:
-												Fix inconsistent use of `report_warning`

											
										
										
											4 years ago
+								            self.report_warning('unable to log in: %s' % error_to_compat_str(err))
-												Move Facebook into its own file

											
										
										
											12 years ago
+								            return
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								    def _extract_from_url(self, url, video_id):
 								        webpage = self._download_webpage(
 								            url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)
-												Move Facebook into its own file

											
										
										
											12 years ago
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								        def extract_metadata(webpage):
-												[facebook] Fix extraction from groups

Closes #2264, related: #2320

											
										
										
											3 years ago
+								            post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
-												[ie/facebook] Fix webpage extraction (#7890)

Closes #7901
Authored by: ringus1
											
										
										
											1 year ago
+								                r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
-												[facebook] Fix extraction from groups

Closes #2264, related: #2320

											
										
										
											3 years ago
+								            post = traverse_obj(post_data, (
-												[ie/facebook] Fix webpage extraction (#7890)

Closes #7901
Authored by: ringus1
											
										
										
											1 year ago
+								                ..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
-												bugfix for a44ca5a470e09b5170fc9c3a46733f050fadbfae, 19a0394044bfad36cd665450271b8eb048a41c02, 77f9033095cd8e1092a80db67f2b577cf13f95a8

Closes #3472

											
										
										
											3 years ago
+								            media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
 								                k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
-												[utils] Add `get_first`

											
										
										
											3 years ago
+								            title = get_first(media, ('title', 'text'))
 								            description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
-												[facebook] Fix extraction from groups

Closes #2264, related: #2320

											
										
										
											3 years ago
+								            page_title = title or self._html_search_regex((
-												[facebook] Improve title and uploader extraction

Closes #1943, closes #795

											
										
										
											3 years ago
+								                r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
 								                r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(?P<content>.*?)</span>',
-												[facebook] Fix extraction from groups

Closes #2264, related: #2320

											
										
										
											3 years ago
+								                self._meta_regex('og:title'), self._meta_regex('twitter:title'), r'<title>(?P<content>.+?)</title>'
-												[facebook] Improve title and uploader extraction

Closes #1943, closes #795

											
										
										
											3 years ago
+								            ), webpage, 'title', default=None, group='content')
 								            description = description or self._html_search_meta(
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								                ['description', 'og:description', 'twitter:description'],
 								                webpage, 'description', default=None)
-												[ie/facebook] Support events (#9055)

Closes #5355
Authored by: kclauhk
											
										
										
											9 months ago
+								            uploader_data = (
 								                get_first(media, ('owner', {dict}))
-												[ie/facebook] Improve extraction

Partially addresses #4311

Authored by: jingtra, ringus1

Co-authored-by: Jing Kjeldsen <jingtra@gmail.com>

											
										
										
											9 months ago
+								                or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
-												[ie/facebook] Support events (#9055)

Closes #5355
Authored by: kclauhk
											
										
										
											9 months ago
+								                or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
 								                or get_first(post, ('node', 'actors', ..., {dict}))
 								                or get_first(post, ('event', 'event_creator', {dict})) or {})
-												[facebook] Fix extraction from groups

Closes #2264, related: #2320

											
										
										
											3 years ago
+								            uploader = uploader_data.get('name') or (
-												[facebook] Improve title and uploader extraction

Closes #1943, closes #795

											
										
										
											3 years ago
+								                clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
 								                or self._search_regex(
 								                    (r'ownerName\s*:\s*"([^"]+)"', *self._og_regexes('title')), webpage, 'uploader', fatal=False))
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								            timestamp = int_or_none(self._search_regex(
 								                r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
 								                'timestamp', default=None))
 								            thumbnail = self._html_search_meta(
 								                ['og:image', 'twitter:image'], webpage, 'thumbnail', default=None)
 								            # some webpages contain unretrievable thumbnail urls
 								            # like https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=10155168902769113&get_thumbnail=1
 								            # in https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/
 								            if thumbnail and not re.search(r'\.(?:jpg|png)', thumbnail):
 								                thumbnail = None
 								            info_dict = {
 								                'description': description,
 								                'uploader': uploader,
-												[facebook] Fix extraction from groups

Closes #2264, related: #2320

											
										
										
											3 years ago
+								                'uploader_id': uploader_data.get('id'),
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								                'timestamp': timestamp,
 								                'thumbnail': thumbnail,
-												[extractor/facebook] Fix metadata extraction (#6856)

Closes #3432
Authored by: ringus1
											
										
										
											2 years ago
+								                'view_count': parse_count(self._search_regex(
 								                    (r'\bviewCount\s*:\s*["\']([\d,.]+)', r'video_view_count["\']\s*:\s*(\d+)',),
 								                    webpage, 'view count', default=None)),
 								                'concurrent_view_count': get_first(post, (
 								                    ('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								            }
-												[facebook] Fix extraction from groups

Closes #2264, related: #2320

											
										
										
											3 years ago
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								            info_json_ld = self._search_json_ld(webpage, video_id, default={})
-												[facebook] Fix extraction from groups

Closes #2264, related: #2320

											
										
										
											3 years ago
+								            info_json_ld['title'] = (re.sub(r'\s*\|\s*Facebook$', '', title or info_json_ld.get('title') or page_title or '')
 								                                     or (description or '').replace('\n', ' ') or f'Facebook video #{video_id}')
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								            return merge_dicts(info_json_ld, info_dict)
-												[facebook] Support alternative webpage form

Fixes #8371

											
										
										
											9 years ago
+								        video_data = None
-												[facebook] Fix extraction (closes #11926)

											
										
										
											8 years ago
+								        def extract_video_data(instances):
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								            video_data = []
-												[facebook] Fix extraction (closes #11926)

											
										
										
											8 years ago
+								            for item in instances:
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								                if try_get(item, lambda x: x[1][0]) == 'VideoConfig':
-												[facebook] Fix extraction (closes #11926)

											
										
										
											8 years ago
+								                    video_item = item[2][0]
-												[facebook] Relax video id matching (closes #11017, closes #12055, closes #12056)

											
										
										
											8 years ago
+								                    if video_item.get('video_id'):
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								                        video_data.append(video_item['videoData'])
 								            return video_data
-												[facebook] Fix extraction (closes #11926)

											
										
										
											8 years ago
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											8 years ago
+								        server_js_data = self._parse_json(self._search_regex(
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								            [r'handleServerJS\(({.+})(?:\);|,")', r'\bs\.handle\(({.+?})\);'],
 								            webpage, 'server js data', default='{}'), video_id, fatal=False)
-												[facebook] Fix extraction (closes #11926)

											
										
										
											8 years ago
 								        if server_js_data:
 								            video_data = extract_video_data(server_js_data.get('instances', []))
-												[facebook] Improve extraction (closes #16554)

											
										
										
											7 years ago
+								        def extract_from_jsmods_instances(js_data):
 								            if js_data:
 								                return extract_video_data(try_get(
 								                    js_data, lambda x: x['jsmods']['instances'], list) or [])
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        def extract_dash_manifest(video, formats):
 								            dash_manifest = video.get('dash_manifest')
 								            if dash_manifest:
 								                formats.extend(self._parse_mpd_formats(
-												[ie/facebook] Add dash manifest URL (#7743)

Fixes #7742
Authored by: ringus1
											
										
										
											1 year ago
+								                    compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
 								                    mpd_url=video.get('dash_manifest_url')))
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
-												[extractor] Deprecate `_sort_formats`

											
										
										
											2 years ago
+								        def process_formats(info):
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								            # Downloads with browser's User-Agent are rate limited. Working around
 								            # with non-browser User-Agent.
-												[extractor] Deprecate `_sort_formats`

											
										
										
											2 years ago
+								            for f in info['formats']:
-												[ie/facebook] Set format HTTP chunk size (#9058)

Closes #8197
Authored by: bashonly, kclauhk
											
										
										
											9 months ago
+								                # Downloads with browser's User-Agent are rate limited. Working around
 								                # with non-browser User-Agent.
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								                f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
-												[ie/facebook] Set format HTTP chunk size (#9058)

Closes #8197
Authored by: bashonly, kclauhk
											
										
										
											9 months ago
+								                # Formats larger than ~500MB will return error 403 unless chunk size is regulated
 								                f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
 								        def extract_relay_data(_filter):
 								            return self._parse_json(self._search_regex(
-												[ie/facebook] Fix webpage extraction (#7890)

Closes #7901
Authored by: ringus1
											
										
										
											1 year ago
+								                r'data-sjs>({.*?%s.*?})</script>' % _filter,
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								                webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
 								        def extract_relay_prefetched_data(_filter):
-												[ie/facebook] Fix webpage extraction (#7890)

Closes #7901
Authored by: ringus1
											
										
										
											1 year ago
+								            return traverse_obj(extract_relay_data(_filter), (
 								                'require', (None, (..., ..., ..., '__bbox', 'require')),
-												[ie/facebook] Improve extraction

Partially addresses #4311

Authored by: jingtra, ringus1

Co-authored-by: Jing Kjeldsen <jingtra@gmail.com>

											
										
										
											9 months ago
+								                lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
 								                ..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {}
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
-												[facebook] Fix extraction (closes #11926)

											
										
										
											8 years ago
+								        if not video_data:
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								            server_js_data = self._parse_json(self._search_regex([
 								                r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
 								                r'bigPipe\.onPageletArrive\(({.*?id\s*:\s*"%s".*?})\);' % self._SUPPORTED_PAGLETS_REGEX
 								            ], webpage, 'js data', default='{}'), video_id, js_to_json, False)
-												[facebook] Improve extraction (closes #16554)

											
										
										
											7 years ago
+								            video_data = extract_from_jsmods_instances(server_js_data)
-												[facebook] Support alternative webpage form

Fixes #8371

											
										
										
											9 years ago
-												[facebook] Add support for tahoe player videos (closes #15441)

Specific videos appear to use a newer/different player, this requires a
second request for the video data as the initial request is missing the
specified data.

Additionally these videos have different page content for the uploader
value, which is stored in the `<meta property="og:title"...>` element of
the initial request.

											
										
										
											7 years ago
+								        if not video_data:
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								            data = extract_relay_prefetched_data(
-												[ie/facebook] Fix webpage extraction (#7890)

Closes #7901
Authored by: ringus1
											
										
										
											1 year ago
+								                r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								            if data:
 								                entries = []
 								                def parse_graphql_video(video):
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								                    v_id = video.get('videoId') or video.get('id') or video_id
 								                    reel_info = traverse_obj(
 								                        video, ('creation_story', 'short_form_video_context', 'playback_video', {dict}))
 								                    if reel_info:
 								                        video = video['creation_story']
 								                        video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
 								                        video.update(reel_info)
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								                    formats = []
 								                    q = qualities(['sd', 'hd'])
-												[facebook] Parse dash manifests

											
										
										
											3 years ago
+								                    for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
-												[ie/facebook] Fix webpage extraction (#7890)

Closes #7901
Authored by: ringus1
											
										
										
											1 year ago
+								                                           ('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
 								                                           ('browser_native_sd_url', 'sd')):
-												[facebook] Parse dash manifests

											
										
										
											3 years ago
+								                        playable_url = video.get(key)
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								                        if not playable_url:
 								                            continue
-												[facebook] Parse dash manifests

											
										
										
											3 years ago
+								                        if determine_ext(playable_url) == 'mpd':
 								                            formats.extend(self._extract_mpd_formats(playable_url, video_id))
 								                        else:
 								                            formats.append({
 								                                'format_id': format_id,
-												[ie/facebook] Improve format sorting (#8074)

Authored by: fireattack
											
										
										
											1 year ago
+								                                # sd, hd formats w/o resolution info should be deprioritized below DASH
 								                                'quality': q(format_id) - 3,
-												[facebook] Parse dash manifests

											
										
										
											3 years ago
+								                                'url': playable_url,
 								                            })
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								                    extract_dash_manifest(video, formats)
-												[ie/facebook] Support events (#9055)

Closes #5355
Authored by: kclauhk
											
										
										
											9 months ago
 								                    automatic_captions, subtitles = {}, {}
 								                    is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))
 								                    for caption in traverse_obj(video, (
 								                        'video_available_captions_locales',
 								                        {lambda x: sorted(x, key=lambda c: c['locale'])},
 								                        lambda _, v: url_or_none(v['captions_url'])
 								                    )):
 								                        lang = caption.get('localized_language') or 'und'
 								                        subs = {
 								                            'url': caption['captions_url'],
 								                            'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
 								                        }
 								                        if caption.get('localized_creation_method') or is_broadcast:
 								                            automatic_captions.setdefault(caption['locale'], []).append(subs)
 								                        else:
 								                            subtitles.setdefault(caption['locale'], []).append(subs)
 								                    captions_url = traverse_obj(video, ('captions_url', {url_or_none}))
 								                    if captions_url and not automatic_captions and not subtitles:
 								                        locale = self._html_search_meta(
 								                            ['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
 								                        (automatic_captions if is_broadcast else subtitles)[locale] = [{'url': captions_url}]
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								                    info = {
 								                        'id': v_id,
 								                        'formats': formats,
-												[facebook] Improve thumbnail extraction (#3392)

Authored by: Wikidepia
											
										
										
											3 years ago
+								                        'thumbnail': traverse_obj(
 								                            video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')),
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								                        'uploader_id': traverse_obj(video, ('owner', 'id', {str_or_none})),
 								                        'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
 								                        'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
 								                                     or float_or_none(video.get('length_in_second'))),
-												[ie/facebook] Support events (#9055)

Closes #5355
Authored by: kclauhk
											
										
										
											9 months ago
+								                        'automatic_captions': automatic_captions,
 								                        'subtitles': subtitles,
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								                    }
-												[extractor] Deprecate `_sort_formats`

											
										
										
											2 years ago
+								                    process_formats(info)
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								                    description = try_get(video, lambda x: x['savable_description']['text'])
 								                    title = video.get('name')
 								                    if title:
 								                        info.update({
 								                            'title': title,
 								                            'description': description,
 								                        })
 								                    else:
 								                        info['title'] = description or 'Facebook video #%s' % v_id
 								                    entries.append(info)
 								                def parse_attachment(attachment, key='media'):
 								                    media = attachment.get(key) or {}
 								                    if media.get('__typename') == 'Video':
 								                        return parse_graphql_video(media)
-												[facebook] Fix extraction from groups

Closes #2264, related: #2320

											
										
										
											3 years ago
+								                nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
 								                attachments = traverse_obj(nodes, (
 								                    ..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
-												[ie/Facebook] Fix Memories extraction (#8681)

- Support group /posts/ URLs
- Raise a proper error message if no formats are found

Closes #8669
Authored by: kclauhk
											
										
										
											10 months ago
+								                    ..., ('styles', 'style_type_renderer', ('throwbackStyles', 'attachment_target_renderer')),
 								                    'attachment', {dict}))
-												[facebook] Fix extraction from groups

Closes #2264, related: #2320

											
										
										
											3 years ago
+								                for attachment in attachments:
-												[ie/Facebook] Fix Memories extraction (#8681)

- Support group /posts/ URLs
- Raise a proper error message if no formats are found

Closes #8669
Authored by: kclauhk
											
										
										
											10 months ago
+								                    ns = traverse_obj(attachment, ('all_subattachments', 'nodes', ..., {dict}),
 								                                      ('target', 'attachments', ..., 'styles', 'attachment', {dict}))
-												[facebook] Fix extraction from groups

Closes #2264, related: #2320

											
										
										
											3 years ago
+								                    for n in ns:
 								                        parse_attachment(n)
 								                    parse_attachment(attachment)
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
 								                edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
 								                for edge in edges:
 								                    parse_attachment(edge, key='node')
-												[ie/facebook] Support events (#9055)

Closes #5355
Authored by: kclauhk
											
										
										
											9 months ago
+								                video = traverse_obj(data, (
 								                    'event', 'cover_media_renderer', 'cover_video'), 'video', expected_type=dict) or {}
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								                if video:
 								                    attachments = try_get(video, [
 								                        lambda x: x['story']['attachments'],
 								                        lambda x: x['creation_story']['attachments']
 								                    ], list) or []
 								                    for attachment in attachments:
 								                        parse_attachment(attachment)
 								                    if not entries:
 								                        parse_graphql_video(video)
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								                if len(entries) > 1:
 								                    return self.playlist_result(entries, video_id)
-												[ie/Facebook] Fix Memories extraction (#8681)

- Support group /posts/ URLs
- Raise a proper error message if no formats are found

Closes #8669
Authored by: kclauhk
											
										
										
											10 months ago
+								                video_info = entries[0] if entries else {'id': video_id}
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								                webpage_info = extract_metadata(webpage)
 								                # honor precise duration in video info
 								                if video_info.get('duration'):
 								                    webpage_info['duration'] = video_info['duration']
-												[ie/facebook] Improve thumbnail extraction (#9060)

Authored by: kclauhk
											
										
										
											9 months ago
+								                # preserve preferred_thumbnail in video info
 								                if video_info.get('thumbnail'):
 								                    webpage_info['thumbnail'] = video_info['thumbnail']
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								                return merge_dicts(webpage_info, video_info)
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
 								        if not video_data:
-												[facebook] Improve extraction (closes #16554)

											
										
										
											7 years ago
+								            m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
 								            if m_msg is not None:
 								                raise ExtractorError(
 								                    'The video is not available, Facebook said: "%s"' % m_msg.group(1),
 								                    expected=True)
-												Update to ytdl-commit-c2350ca

Update MSVC 2010 redist URL
https://github.com/ytdl-org/youtube-dl/commit/c2350cac243ba1ec1586fe85b0d62d1b700047a2

											
										
										
											3 years ago
+								            elif any(p in webpage for p in (
 								                    '>You must log in to continue',
 								                    'id="login_form"',
 								                    'id="loginbutton"')):
-												[facebook] Improve extraction (closes #16554)

											
										
										
											7 years ago
+								                self.raise_login_required()
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        if not video_data and '/watchparty/' in url:
 								            post_data = {
 								                'doc_id': 3731964053542869,
 								                'variables': json.dumps({
 								                    'livingRoomID': video_id,
 								                }),
 								            }
 								            prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{')
 								            if prefetched_data:
 								                lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict)
 								                if lsd:
 								                    post_data[lsd['name']] = lsd['value']
 								            relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,')
 								            for define in (relay_data.get('define') or []):
 								                if define[0] == 'RelayAPIConfigDefaults':
 								                    self._api_config = define[2]
 								            living_room = self._download_json(
 								                urljoin(url, self._api_config['graphURI']), video_id,
 								                data=urlencode_postdata(post_data))['data']['living_room']
 								            entries = []
 								            for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []):
 								                video = try_get(edge, lambda x: x['node']['video']) or {}
 								                v_id = video.get('id')
 								                if not v_id:
 								                    continue
 								                v_id = compat_str(v_id)
 								                entries.append(self.url_result(
 								                    self._VIDEO_PAGE_TEMPLATE % v_id,
 								                    self.ie_key(), v_id, video.get('name')))
 								            return self.playlist_result(entries, video_id)
 								        if not video_data:
-												[facebook] Improve extraction (closes #16554)

											
										
										
											7 years ago
+								            # Video info not in first request, do a secondary request using
 								            # tahoe player specific URL
-												[facebook] Add support for tahoe player videos (closes #15441)

Specific videos appear to use a newer/different player, this requires a
second request for the video data as the initial request is missing the
specified data.

Additionally these videos have different page content for the uploader
value, which is stored in the `<meta property="og:title"...>` element of
the initial request.

											
										
										
											7 years ago
+								            tahoe_data = self._download_webpage(
 								                self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
 								                data=urlencode_postdata({
 								                    '__a': 1,
-												[facebook] Improve extraction (closes #16554)

											
										
										
											7 years ago
+								                    '__pc': self._search_regex(
 								                        r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
 								                        'pkg cohort', default='PHASED:DEFAULT'),
 								                    '__rev': self._search_regex(
 								                        r'client_revision["\']\s*:\s*(\d+),', webpage,
 								                        'client revision', default='3944515'),
-												[facebook] fix tahoe request for authenticated users(closes #16655)

											
										
										
											6 years ago
+								                    'fb_dtsg': self._search_regex(
 								                        r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
 								                        webpage, 'dtsg token', default=''),
-												[facebook] Add support for tahoe player videos (closes #15441)

Specific videos appear to use a newer/different player, this requires a
second request for the video data as the initial request is missing the
specified data.

Additionally these videos have different page content for the uploader
value, which is stored in the `<meta property="og:title"...>` element of
the initial request.

											
										
										
											7 years ago
+								                }),
 								                headers={
 								                    'Content-Type': 'application/x-www-form-urlencoded',
 								                })
-												[facebook] Improve extraction (closes #16554)

											
										
										
											7 years ago
+								            tahoe_js_data = self._parse_json(
 								                self._search_regex(
 								                    r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data,
 								                    'tahoe js data', default='{}'),
 								                video_id, fatal=False)
 								            video_data = extract_from_jsmods_instances(tahoe_js_data)
-												[facebook] Add support for tahoe player videos (closes #15441)

Specific videos appear to use a newer/different player, this requires a
second request for the video data as the initial request is missing the
specified data.

Additionally these videos have different page content for the uploader
value, which is stored in the `<meta property="og:title"...>` element of
the initial request.

											
										
										
											7 years ago
-												[facebook] Support alternative webpage form

Fixes #8371

											
										
										
											9 years ago
+								        if not video_data:
-												[facebook] Improve extraction (closes #16554)

											
										
										
											7 years ago
+								            raise ExtractorError('Cannot parse data')
-												[facebook] Extract all the formats (closes #5037)

											
										
										
											10 years ago
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        if len(video_data) > 1:
 								            entries = []
 								            for v in video_data:
 								                video_url = v[0].get('video_url')
 								                if not video_url:
 								                    continue
 								                entries.append(self.url_result(urljoin(
 								                    url, video_url), self.ie_key(), v[0].get('video_id')))
 								            return self.playlist_result(entries, video_id)
 								        video_data = video_data[0]
-												[facebook] Extract all the formats (closes #5037)

											
										
										
											10 years ago
+								        formats = []
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        subtitles = {}
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											8 years ago
+								        for f in video_data:
 								            format_id = f['stream_type']
-												Fix issue downloading facebook videos

youtube-dl expects the format items to be returned as a list,
but when there's only one item Facebook returns a dict instead,
this wraps the dict in a list if necessary

											
										
										
											8 years ago
+								            if f and isinstance(f, dict):
 								                f = [f]
-												[facebook] Fix extraction (Closes #7252)

											
										
										
											9 years ago
+								            if not f or not isinstance(f, list):
 								                continue
 								            for quality in ('sd', 'hd'):
 								                for src_type in ('src', 'src_no_ratelimit'):
 								                    src = f[0].get('%s_%s' % (quality, src_type))
 								                    if src:
-												[ie/facebook] Improve format sorting (#8074)

Authored by: fireattack
											
										
										
											1 year ago
+								                        # sd, hd formats w/o resolution info should be deprioritized below DASH
 								                        # TODO: investigate if progressive or src formats still exist
 								                        preference = -10 if format_id == 'progressive' else -3
-												[facebook] Fix format sorting

'hd' formats should have higher priorities

											
										
										
											9 years ago
+								                        if quality == 'hd':
-												[ie/facebook] Improve format sorting (#8074)

Authored by: fireattack
											
										
										
											1 year ago
+								                            preference += 1
-												[facebook] Fix extraction (Closes #7252)

											
										
										
											9 years ago
+								                        formats.append({
 								                            'format_id': '%s_%s_%s' % (format_id, quality, src_type),
 								                            'url': src,
-												[formatsort] Remove misuse of 'preference'

'preference' is to be used only when the format is better that ALL qualities of a lower preference irrespective of ANY sorting order the user requests. See deezer.py for correct use of this

In the older sorting method, `preference`, `quality` and `language_preference` were functionally almost equivalent. So these disparities doesn't really matter there

Also, despite what the documentation says, the default for `preference` was actually 0 and not -1. I have tried to correct this and also account for it when converting `preference` to `quality`

											
										
										
											4 years ago
+								                            'quality': preference,
-												[facebook] Fix format sorting
Closes #795

											
										
										
											3 years ago
+								                            'height': 720 if quality == 'hd' else None
-												[facebook] Fix extraction (Closes #7252)

											
										
										
											9 years ago
+								                        })
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								            extract_dash_manifest(f[0], formats)
-												[facebook] extract subtitles(closes #22777)

											
										
										
											5 years ago
+								            subtitles_src = f[0].get('subtitles_src')
 								            if subtitles_src:
 								                subtitles.setdefault('en', []).append({'url': subtitles_src})
-												Move Facebook into its own file

											
										
										
											12 years ago
-												[facebook] Merge FacebookPostIE into FacebookIE

Fixes #8713

											
										
										
											9 years ago
+								        info_dict = {
-												Move Facebook into its own file

											
										
										
											12 years ago
+								            'id': video_id,
-												[facebook] Extract all the formats (closes #5037)

											
										
										
											10 years ago
+								            'formats': formats,
-												[facebook] extract subtitles(closes #22777)

											
										
										
											5 years ago
+								            'subtitles': subtitles,
-												Move Facebook into its own file

											
										
										
											12 years ago
+								        }
-												[extractor] Deprecate `_sort_formats`

											
										
										
											2 years ago
+								        process_formats(info_dict)
-												[facebook] Fix metadata extraction
Original PR: https://github.com/ytdl-org/youtube-dl/pull/29796
Closes #453, https://github.com/ytdl-org/youtube-dl/issues/29421, https://github.com/ytdl-org/youtube-dl/issues/23627, https://github.com/ytdl-org/youtube-dl/issues/23180, https://github.com/ytdl-org/youtube-dl/issues/14156

Authored by: kikuyan

											
										
										
											3 years ago
+								        info_dict.update(extract_metadata(webpage))
-												[facebook:post] Add extractor (Closes #8321)

											
										
										
											9 years ago
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        return info_dict
-												[facebook:post] Add extractor (Closes #8321)

											
										
										
											9 years ago
 								    def _real_extract(self, url):
-												[facebook] Merge FacebookPostIE into FacebookIE

Fixes #8713

											
										
										
											9 years ago
+								        video_id = self._match_id(url)
 								        real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
-												Update to ytdl-2021.01.03

											
										
										
											4 years ago
+								        return self._extract_from_url(real_url, video_id)
-												[facebook:plugins:video] Add extractor (Closes #10530)

											
										
										
											8 years ago
 								class FacebookPluginsVideoIE(InfoExtractor):
 								    _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'
 								    _TESTS = [{
 								        'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560',
 								        'md5': '5954e92cdfe51fe5782ae9bda7058a07',
 								        'info_dict': {
 								            'id': '10154383743583686',
 								            'ext': 'mp4',
-												[facebook] Fix extraction from groups

Closes #2264, related: #2320

											
										
										
											3 years ago
+								            # TODO: Fix title, uploader
-												[facebook:plugins:video] Add extractor (Closes #10530)

											
										
										
											8 years ago
+								            'title': 'What to do during the haze?',
 								            'uploader': 'Gov.sg',
 								            'upload_date': '20160826',
 								            'timestamp': 1472184808,
 								        },
 								        'add_ie': [FacebookIE.ie_key()],
 								    }, {
 								        'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104',
 								        'only_matching': True,
 								    }, {
 								        'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560',
 								        'only_matching': True,
 								    }]
 								    def _real_extract(self, url):
 								        return self.url_result(
 								            compat_urllib_parse_unquote(self._match_id(url)),
 								            FacebookIE.ie_key())
-												[Facebook] Handle redirect URLs
Closes #1035

											
										
										
											3 years ago
 								class FacebookRedirectURLIE(InfoExtractor):
 								    IE_DESC = False  # Do not list
 								    _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/flx/warn[/?]'
 								    _TESTS = [{
 								        'url': 'https://www.facebook.com/flx/warn/?h=TAQHsoToz&u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&s=1',
 								        'info_dict': {
 								            'id': 'pO8h3EaFRdo',
 								            'ext': 'mp4',
 								            'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
 								            'description': 'md5:2d713ccbb45b686a1888397b2c77ca6b',
 								            'channel_id': 'UCGBpxWJr9FNOcFYA5GkKrMg',
 								            'playable_in_embed': True,
 								            'categories': ['Music'],
 								            'channel': 'Boiler Room',
 								            'uploader_id': 'brtvofficial',
 								            'uploader': 'Boiler Room',
 								            'tags': 'count:11',
 								            'duration': 3332,
 								            'live_status': 'not_live',
 								            'thumbnail': 'https://i.ytimg.com/vi/pO8h3EaFRdo/maxresdefault.jpg',
 								            'channel_url': 'https://www.youtube.com/channel/UCGBpxWJr9FNOcFYA5GkKrMg',
 								            'availability': 'public',
 								            'uploader_url': 'http://www.youtube.com/user/brtvofficial',
 								            'upload_date': '20150917',
 								            'age_limit': 0,
 								            'view_count': int,
 								            'like_count': int,
 								        },
 								        'add_ie': ['Youtube'],
 								        'params': {'skip_download': 'Youtube'},
 								    }]
 								    def _real_extract(self, url):
 								        redirect_url = url_or_none(parse_qs(url).get('u', [None])[-1])
 								        if not redirect_url:
 								            raise ExtractorError('Invalid facebook redirect URL', expected=True)
 								        return self.url_result(redirect_url)
-												[extractor/facebook] Add reel support (#4660)

Closes #4039 
Authored by: bashonly
											
										
										
											2 years ago
 								class FacebookReelIE(InfoExtractor):
 								    _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/reel/(?P<id>\d+)'
 								    IE_NAME = 'facebook:reel'
 								    _TESTS = [{
 								        'url': 'https://www.facebook.com/reel/1195289147628387',
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								        'md5': 'f13dd37f2633595982db5ed8765474d3',
-												[extractor/facebook] Add reel support (#4660)

Closes #4039 
Authored by: bashonly
											
										
										
											2 years ago
+								        'info_dict': {
 								            'id': '1195289147628387',
 								            'ext': 'mp4',
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								            'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
 								            'description': 'md5:22f03309b216ac84720183961441d8db',
 								            'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
 								            'uploader_id': '100040874179269',
 								            'duration': 9.579,
 								            'timestamp': 1637502609,
-												[extractor/facebook] Add reel support (#4660)

Closes #4039 
Authored by: bashonly
											
										
										
											2 years ago
+								            'upload_date': '20211121',
-												[ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
											
										
										
											1 year ago
+								            'thumbnail': r're:^https?://.*',
-												[extractor/facebook] Add reel support (#4660)

Closes #4039 
Authored by: bashonly
											
										
										
											2 years ago
+								        }
 								    }]
 								    def _real_extract(self, url):
 								        video_id = self._match_id(url)
 								        return self.url_result(
 								            f'https://m.facebook.com/watch/?v={video_id}&_rdr', FacebookIE, video_id)
-												[ie/facebook:ads] Add extractor (#8870)

Closes #8083
Authored by: kclauhk
											
										
										
											9 months ago
 								class FacebookAdsIE(InfoExtractor):
 								    _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/ads/library/?\?(?:[^#]+&)?id=(?P<id>\d+)'
 								    IE_NAME = 'facebook:ads'
 								    _TESTS = [{
 								        'url': 'https://www.facebook.com/ads/library/?id=899206155126718',
 								        'info_dict': {
 								            'id': '899206155126718',
 								            'ext': 'mp4',
 								            'title': 'video by Kandao',
 								            'uploader': 'Kandao',
 								            'uploader_id': '774114102743284',
 								            'uploader_url': r're:^https?://.*',
 								            'timestamp': 1702548330,
 								            'thumbnail': r're:^https?://.*',
 								            'upload_date': '20231214',
 								            'like_count': int,
 								        }
 								    }, {
 								        'url': 'https://www.facebook.com/ads/library/?id=893637265423481',
 								        'info_dict': {
 								            'id': '893637265423481',
 								            'title': 'Jusqu\u2019\u00e0 -25% sur une s\u00e9lection de vins p\u00e9tillants italiens ',
 								            'uploader': 'Eataly Paris Marais',
 								            'uploader_id': '2086668958314152',
 								            'uploader_url': r're:^https?://.*',
 								            'timestamp': 1703571529,
 								            'upload_date': '20231226',
 								            'like_count': int,
 								        },
 								        'playlist_count': 3,
 								    }, {
 								        'url': 'https://es-la.facebook.com/ads/library/?id=901230958115569',
 								        'only_matching': True,
 								    }, {
 								        'url': 'https://m.facebook.com/ads/library/?id=901230958115569',
 								        'only_matching': True,
 								    }]
 								    _FORMATS_MAP = {
 								        'watermarked_video_sd_url': ('sd-wmk', 'SD, watermarked'),
 								        'video_sd_url': ('sd', None),
 								        'watermarked_video_hd_url': ('hd-wmk', 'HD, watermarked'),
 								        'video_hd_url': ('hd', None),
 								    }
 								    def _extract_formats(self, video_dict):
 								        formats = []
 								        for format_key, format_url in traverse_obj(video_dict, (
 								            {dict.items}, lambda _, v: v[0] in self._FORMATS_MAP and url_or_none(v[1])
 								        )):
 								            formats.append({
 								                'format_id': self._FORMATS_MAP[format_key][0],
 								                'format_note': self._FORMATS_MAP[format_key][1],
 								                'url': format_url,
 								                'ext': 'mp4',
 								                'quality': qualities(tuple(self._FORMATS_MAP))(format_key),
 								            })
 								        return formats
 								    def _real_extract(self, url):
 								        video_id = self._match_id(url)
 								        webpage = self._download_webpage(url, video_id)
 								        post_data = [self._parse_json(j, video_id, fatal=False)
 								                     for j in re.findall(r's\.handle\(({.*})\);requireLazy\(', webpage)]
 								        data = traverse_obj(post_data, (
 								            ..., 'require', ..., ..., ..., 'props', 'deeplinkAdCard', 'snapshot', {dict}), get_all=False)
 								        if not data:
 								            raise ExtractorError('Unable to extract ad data')
 								        title = data.get('title')
 								        if not title or title == '{{product.name}}':
 								            title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data)
 								        info_dict = traverse_obj(data, {
 								            'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}),
 								            'uploader': ('page_name', {str}),
 								            'uploader_id': ('page_id', {str_or_none}),
 								            'uploader_url': ('page_profile_uri', {url_or_none}),
 								            'timestamp': ('creation_time', {int_or_none}),
 								            'like_count': ('page_like_count', {int_or_none}),
 								        })
 								        entries = []
 								        for idx, entry in enumerate(traverse_obj(
 								            data, (('videos', 'cards'), lambda _, v: any([url_or_none(v[f]) for f in self._FORMATS_MAP]))), 1
 								        ):
 								            entries.append({
 								                'id': f'{video_id}_{idx}',
 								                'title': entry.get('title') or title,
 								                'description': entry.get('link_description') or info_dict.get('description'),
 								                'thumbnail': url_or_none(entry.get('video_preview_image_url')),
 								                'formats': self._extract_formats(entry),
 								            })
 								        if len(entries) == 1:
 								            info_dict.update(entries[0])
 								        elif len(entries) > 1:
 								            info_dict.update({
 								                'title': entries[0]['title'],
 								                'entries': entries,
 								                '_type': 'playlist',
 								            })
 								        info_dict['id'] = video_id
 								        return info_dict