[youtube] INNERTUBE_CONTEXT regex adjustment

[youtube] post entire client context to api endpoint
[youtube] stop loading pages if videos are already seen
2020-11-10 22:49:55 -08:00 · 2020-11-10 21:44:22 -08:00 · 2020-11-10 14:39:38 -08:00 · 2020-11-10 06:39:03 -08:00 · 2020-11-10 06:14:25 -08:00 · 2020-11-10 04:39:04 -08:00
1 changed files with 61 additions and 0 deletions
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@ -36,6 +36,7 @@ from ..utils import (
    get_element_by_attribute,
    get_element_by_id,
    int_or_none,
+    js_to_json,
    mimetype2ext,
    orderedSet,
    parse_codecs,
@ -2891,6 +2892,66 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
        url = self._TEMPLATE_URL % playlist_id
        page = self._download_webpage(url, playlist_id)

+        yt_initial = self._get_yt_initial_data('', page)
+        if yt_initial:
+            playlist_items = try_get(yt_initial, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents'], list)
+            entries = []
+            playlist_page = 1
+            api_key = self._search_regex(
+                r'"INNERTUBE_API_KEY":"([^"]+)"',
+                page, 'api key', default="AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", fatal=False)
+            ytcfg_string = self._search_regex(
+                r'ytcfg\.set\(({.*?"INNERTUBE_CONTEXT".*?})\);',
+                page, 'client context')
+            api_client_context = self._parse_json(ytcfg_string, 'client context', transform_source=js_to_json)['INNERTUBE_CONTEXT']
+            while playlist_items:
+                item = playlist_items.pop(0)
+
+                item_video = try_get(item, lambda x: x['playlistVideoRenderer'], dict)
+                if item_video:
+                    video_id = try_get(item_video, lambda x: x['videoId'], compat_str)
+                    if not video_id:
+                        continue
+                    entry = {
+                        '_type': 'url',
+                        'duration': int_or_none(try_get(item_video, lambda x: x['lengthSeconds'], compat_str)),
+                        'id': video_id,
+                        'ie_key': 'Youtube',
+                        # 'thumbnails': try_get(item_video, lambda x: x['thumbnail']['thumbnails'], list),
+                        'title': try_get(item_video, lambda x: x['title']['runs'][0]['text'], compat_str),
+                        'url': video_id
+                    }
+                    entries.append(entry)
+
+                item_continue = try_get(item, lambda x: x['continuationItemRenderer'], dict)
+                if item_continue:
+                    playlist_page += 1
+                    continuation_token = try_get(item_continue, lambda x: x['continuationEndpoint']['continuationCommand']['token'], compat_str)
+                    request_data = {
+                        'context': api_client_context,
+                        'continuation': continuation_token
+                    }
+                    response = self._download_json(
+                        'https://www.youtube.com/youtubei/v1/browse?key=%s' % api_key,
+                        data=json.dumps(request_data).encode('utf8'),
+                        errnote='Unable to download playlist page', fatal=False,
+                        headers={'Content-Type': 'application/json'},
+                        note='Downloading page %s' % playlist_page,
+                        video_id=playlist_id)
+                    playlist_items_new = try_get(response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
+                    if playlist_items_new:
+                        playlist_items.extend(playlist_items_new)
+
+            playlist_title = try_get(yt_initial, lambda x: x['microformat']['microformatDataRenderer']['title'], compat_str)
+            playlist_description = try_get(yt_initial, lambda x: x['microformat']['microformatDataRenderer']['description'], compat_str)
+            playlist = self.playlist_result(
+                entries,
+                playlist_id=playlist_id,
+                playlist_title=playlist_title,
+                playlist_description=playlist_description)
+            has_videos = bool(entries)
+            return has_videos, playlist
+
        # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
        for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
            match = match.strip()
Author	SHA1	Message	Date
insaneracist	63afc7936d	[youtube] INNERTUBE_CONTEXT regex adjustment	2020-11-10 22:49:55 -08:00
insaneracist	2fd829049c	[youtube] post entire client context to api endpoint	2020-11-10 21:44:22 -08:00
insaneracist	29e9c94948	[youtube] stop loading pages if videos are already seen	2020-11-10 14:39:38 -08:00
insaneracist	965a404be3	[youtube] poking github	2020-11-10 06:39:03 -08:00
insaneracist	b2a462a24c	[youtube] use api key and client version from page	2020-11-10 06:14:25 -08:00
insaneracist	0137a782cf	[youtube] playlist title, desc	2020-11-10 04:39:04 -08:00
insaneracist	fc988a14e8	[youtube] fix: playlist	2020-11-10 00:36:01 -08:00