Compare commits

...

7 Commits

Author SHA1 Message Date
insaneracist 63afc7936d [youtube] INNERTUBE_CONTEXT regex adjustment 2020-11-10 22:49:55 -08:00
insaneracist 2fd829049c [youtube] post entire client context to api endpoint 2020-11-10 21:44:22 -08:00
insaneracist 29e9c94948 [youtube] stop loading pages if videos are already seen 2020-11-10 14:39:38 -08:00
insaneracist 965a404be3 [youtube] poking github 2020-11-10 06:39:03 -08:00
insaneracist b2a462a24c [youtube] use api key and client version from page 2020-11-10 06:14:25 -08:00
insaneracist 0137a782cf [youtube] playlist title, desc 2020-11-10 04:39:04 -08:00
insaneracist fc988a14e8 [youtube] fix: playlist 2020-11-10 00:36:01 -08:00
1 changed files with 61 additions and 0 deletions

View File

@ -36,6 +36,7 @@ from ..utils import (
get_element_by_attribute,
get_element_by_id,
int_or_none,
js_to_json,
mimetype2ext,
orderedSet,
parse_codecs,
@ -2891,6 +2892,66 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
url = self._TEMPLATE_URL % playlist_id
page = self._download_webpage(url, playlist_id)
yt_initial = self._get_yt_initial_data('', page)
if yt_initial:
playlist_items = try_get(yt_initial, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents'], list)
entries = []
playlist_page = 1
api_key = self._search_regex(
r'"INNERTUBE_API_KEY":"([^"]+)"',
page, 'api key', default="AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", fatal=False)
ytcfg_string = self._search_regex(
r'ytcfg\.set\(({.*?"INNERTUBE_CONTEXT".*?})\);',
page, 'client context')
api_client_context = self._parse_json(ytcfg_string, 'client context', transform_source=js_to_json)['INNERTUBE_CONTEXT']
while playlist_items:
item = playlist_items.pop(0)
item_video = try_get(item, lambda x: x['playlistVideoRenderer'], dict)
if item_video:
video_id = try_get(item_video, lambda x: x['videoId'], compat_str)
if not video_id:
continue
entry = {
'_type': 'url',
'duration': int_or_none(try_get(item_video, lambda x: x['lengthSeconds'], compat_str)),
'id': video_id,
'ie_key': 'Youtube',
# 'thumbnails': try_get(item_video, lambda x: x['thumbnail']['thumbnails'], list),
'title': try_get(item_video, lambda x: x['title']['runs'][0]['text'], compat_str),
'url': video_id
}
entries.append(entry)
item_continue = try_get(item, lambda x: x['continuationItemRenderer'], dict)
if item_continue:
playlist_page += 1
continuation_token = try_get(item_continue, lambda x: x['continuationEndpoint']['continuationCommand']['token'], compat_str)
request_data = {
'context': api_client_context,
'continuation': continuation_token
}
response = self._download_json(
'https://www.youtube.com/youtubei/v1/browse?key=%s' % api_key,
data=json.dumps(request_data).encode('utf8'),
errnote='Unable to download playlist page', fatal=False,
headers={'Content-Type': 'application/json'},
note='Downloading page %s' % playlist_page,
video_id=playlist_id)
playlist_items_new = try_get(response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
if playlist_items_new:
playlist_items.extend(playlist_items_new)
playlist_title = try_get(yt_initial, lambda x: x['microformat']['microformatDataRenderer']['title'], compat_str)
playlist_description = try_get(yt_initial, lambda x: x['microformat']['microformatDataRenderer']['description'], compat_str)
playlist = self.playlist_result(
entries,
playlist_id=playlist_id,
playlist_title=playlist_title,
playlist_description=playlist_description)
has_videos = bool(entries)
return has_videos, playlist
# the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
match = match.strip()