#86 [youtube_live_chat] Use POST API (Closes #82)

YouTube has removed support for the old GET based live chat API, and it's now returning 404 Authored by siikamiika
2021-02-15 11:57:21 +02:00 · 2021-02-15 11:57:21 +02:00 · 273762c8d0
parent 7620cd46c3
commit 273762c8d0
3 changed files with 45 additions and 38 deletions
--- a/youtube_dlc/downloader/fragment.py
+++ b/youtube_dlc/downloader/fragment.py
@ -95,11 +95,12 @@ class FragmentFD(FileDownloader):
        frag_index_stream.write(json.dumps({'downloader': downloader}))
        frag_index_stream.close()

-    def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
+    def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None):
        fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
        fragment_info_dict = {
            'url': frag_url,
            'http_headers': headers or info_dict.get('http_headers'),
+            'request_data': request_data,
        }
        success = ctx['dl'].download(fragment_filename, fragment_info_dict)
        if not success:
--- a/youtube_dlc/downloader/http.py
+++ b/youtube_dlc/downloader/http.py
@ -27,6 +27,7 @@ from ..utils import (
 class HttpFD(FileDownloader):
    def real_download(self, filename, info_dict):
        url = info_dict['url']
+        request_data = info_dict.get('request_data', None)

        class DownloadContext(dict):
            __getattr__ = dict.get
@ -101,7 +102,7 @@ class HttpFD(FileDownloader):
                range_end = ctx.data_len - 1
            has_range = range_start is not None
            ctx.has_range = has_range
-            request = sanitized_Request(url, None, headers)
+            request = sanitized_Request(url, request_data, headers)
            if has_range:
                set_range(request, range_start, range_end)
            # Establish connection
@ -152,7 +153,7 @@ class HttpFD(FileDownloader):
                    try:
                        # Open the connection again without the range header
                        ctx.data = self.ydl.urlopen(
-                            sanitized_Request(url, None, headers))
+                            sanitized_Request(url, request_data, headers))
                        content_length = ctx.data.info()['Content-Length']
                    except (compat_urllib_error.HTTPError, ) as err:
                        if err.code < 500 or err.code >= 600:
--- a/youtube_dlc/downloader/youtube_live_chat.py
+++ b/youtube_dlc/downloader/youtube_live_chat.py
@ -1,11 +1,13 @@
 from __future__ import division, unicode_literals

-import re
 import json

 from .fragment import FragmentFD
 from ..compat import compat_urllib_error
-from ..utils import try_get
+from ..utils import (
+    try_get,
+    RegexNotFoundError,
+)
 from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE


@ -27,40 +29,28 @@ class YoutubeLiveChatReplayFD(FragmentFD):
            'total_frags': None,
        }

-        def dl_fragment(url):
-            headers = info_dict.get('http_headers', {})
-            return self._download_fragment(ctx, url, info_dict, headers)
+        ie = YT_BaseIE(self.ydl)

-        def parse_yt_initial_data(data):
-            patterns = (
-                r'%s\\s*%s' % (YT_BaseIE._YT_INITIAL_DATA_RE, YT_BaseIE._YT_INITIAL_BOUNDARY_RE),
-                r'%s' % YT_BaseIE._YT_INITIAL_DATA_RE)
-            data = data.decode('utf-8', 'replace')
-            for patt in patterns:
-                try:
-                    raw_json = re.search(patt, data).group(1)
-                    return json.loads(raw_json)
-                except AttributeError:
-                    continue
+        def dl_fragment(url, data=None, headers=None):
+            http_headers = info_dict.get('http_headers', {})
+            if headers:
+                http_headers = http_headers.copy()
+                http_headers.update(headers)
+            return self._download_fragment(ctx, url, info_dict, http_headers, data)

-        def download_and_parse_fragment(url, frag_index):
+        def download_and_parse_fragment(url, frag_index, request_data):
            count = 0
            while count <= fragment_retries:
                try:
-                    success, raw_fragment = dl_fragment(url)
+                    success, raw_fragment = dl_fragment(url, request_data, {'content-type': 'application/json'})
                    if not success:
                        return False, None, None
-                    data = parse_yt_initial_data(raw_fragment)
+                    try:
+                        data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
+                    except RegexNotFoundError:
+                        data = None
                    if not data:
-                        raw_data = json.loads(raw_fragment)
-                        # sometimes youtube replies with a list
-                        if not isinstance(raw_data, list):
-                            raw_data = [raw_data]
-                        try:
-                            data = next(item['response'] for item in raw_data if 'response' in item)
-                        except StopIteration:
-                            data = {}
-
+                        data = json.loads(raw_fragment)
                    live_chat_continuation = try_get(
                        data,
                        lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
@ -93,22 +83,37 @@ class YoutubeLiveChatReplayFD(FragmentFD):
            'https://www.youtube.com/watch?v={}'.format(video_id))
        if not success:
            return False
-        data = parse_yt_initial_data(raw_fragment)
+        try:
+            data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
+        except RegexNotFoundError:
+            return False
        continuation_id = try_get(
            data,
            lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
        # no data yet but required to call _append_fragment
        self._append_fragment(ctx, b'')

+        ytcfg = ie._extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
+
+        if not ytcfg:
+            return False
+        api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
+        innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
+        if not api_key or not innertube_context:
+            return False
+        url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
+
        frag_index = offset = 0
        while continuation_id is not None:
            frag_index += 1
-            url = ''.join((
-                'https://www.youtube.com/live_chat_replay',
-                '/get_live_chat_replay' if frag_index > 1 else '',
-                '?continuation=%s' % continuation_id,
-                '&playerOffsetMs=%d&hidden=false&pbj=1' % max(offset - 5000, 0) if frag_index > 1 else ''))
-            success, continuation_id, offset = download_and_parse_fragment(url, frag_index)
+            request_data = {
+                'context': innertube_context,
+                'continuation': continuation_id,
+            }
+            if frag_index > 1:
+                request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
+            success, continuation_id, offset = download_and_parse_fragment(
+                url, frag_index, json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n')
            if not success:
                return False
            if test: