forked from minhngoc25a/yt-dlc
Merge pull request #12 from siikamiika/youtube-live-chat
Youtube live chat
This commit is contained in:
commit
a9c069012f
|
@ -1805,6 +1805,14 @@ class YoutubeDL(object):
|
||||||
self.report_error('Cannot write annotations file: ' + annofn)
|
self.report_error('Cannot write annotations file: ' + annofn)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def dl(name, info):
|
||||||
|
fd = get_suitable_downloader(info, self.params)(self, self.params)
|
||||||
|
for ph in self._progress_hooks:
|
||||||
|
fd.add_progress_hook(ph)
|
||||||
|
if self.params.get('verbose'):
|
||||||
|
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
|
||||||
|
return fd.download(name, info)
|
||||||
|
|
||||||
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
||||||
self.params.get('writeautomaticsub')])
|
self.params.get('writeautomaticsub')])
|
||||||
|
|
||||||
|
@ -1812,14 +1820,12 @@ class YoutubeDL(object):
|
||||||
# subtitles download errors are already managed as troubles in relevant IE
|
# subtitles download errors are already managed as troubles in relevant IE
|
||||||
# that way it will silently go on when used with unsupporting IE
|
# that way it will silently go on when used with unsupporting IE
|
||||||
subtitles = info_dict['requested_subtitles']
|
subtitles = info_dict['requested_subtitles']
|
||||||
ie = self.get_info_extractor(info_dict['extractor_key'])
|
|
||||||
for sub_lang, sub_info in subtitles.items():
|
for sub_lang, sub_info in subtitles.items():
|
||||||
sub_format = sub_info['ext']
|
sub_format = sub_info['ext']
|
||||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
|
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||||
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
||||||
else:
|
else:
|
||||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
|
||||||
if sub_info.get('data') is not None:
|
if sub_info.get('data') is not None:
|
||||||
try:
|
try:
|
||||||
# Use newline='' to prevent conversion of newline characters
|
# Use newline='' to prevent conversion of newline characters
|
||||||
|
@ -1831,11 +1837,11 @@ class YoutubeDL(object):
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
sub_data = ie._request_webpage(
|
dl(sub_filename, sub_info)
|
||||||
sub_info['url'], info_dict['id'], note=False).read()
|
except (ExtractorError, IOError, OSError, ValueError,
|
||||||
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
|
compat_urllib_error.URLError,
|
||||||
subfile.write(sub_data)
|
compat_http_client.HTTPException,
|
||||||
except (ExtractorError, IOError, OSError, ValueError) as err:
|
socket.error) as err:
|
||||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||||
(sub_lang, error_to_compat_str(err)))
|
(sub_lang, error_to_compat_str(err)))
|
||||||
continue
|
continue
|
||||||
|
@ -1856,14 +1862,6 @@ class YoutubeDL(object):
|
||||||
|
|
||||||
if not self.params.get('skip_download', False):
|
if not self.params.get('skip_download', False):
|
||||||
try:
|
try:
|
||||||
def dl(name, info):
|
|
||||||
fd = get_suitable_downloader(info, self.params)(self, self.params)
|
|
||||||
for ph in self._progress_hooks:
|
|
||||||
fd.add_progress_hook(ph)
|
|
||||||
if self.params.get('verbose'):
|
|
||||||
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
|
|
||||||
return fd.download(name, info)
|
|
||||||
|
|
||||||
if info_dict.get('requested_formats') is not None:
|
if info_dict.get('requested_formats') is not None:
|
||||||
downloaded = []
|
downloaded = []
|
||||||
success = True
|
success = True
|
||||||
|
|
|
@ -8,6 +8,7 @@ from .rtmp import RtmpFD
|
||||||
from .dash import DashSegmentsFD
|
from .dash import DashSegmentsFD
|
||||||
from .rtsp import RtspFD
|
from .rtsp import RtspFD
|
||||||
from .ism import IsmFD
|
from .ism import IsmFD
|
||||||
|
from .youtube_live_chat import YoutubeLiveChatReplayFD
|
||||||
from .external import (
|
from .external import (
|
||||||
get_external_downloader,
|
get_external_downloader,
|
||||||
FFmpegFD,
|
FFmpegFD,
|
||||||
|
@ -26,6 +27,7 @@ PROTOCOL_MAP = {
|
||||||
'f4m': F4mFD,
|
'f4m': F4mFD,
|
||||||
'http_dash_segments': DashSegmentsFD,
|
'http_dash_segments': DashSegmentsFD,
|
||||||
'ism': IsmFD,
|
'ism': IsmFD,
|
||||||
|
'youtube_live_chat_replay': YoutubeLiveChatReplayFD,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,94 @@
|
||||||
|
from __future__ import division, unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .fragment import FragmentFD
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeLiveChatReplayFD(FragmentFD):
|
||||||
|
""" Downloads YouTube live chat replays fragment by fragment """
|
||||||
|
|
||||||
|
FD_NAME = 'youtube_live_chat_replay'
|
||||||
|
|
||||||
|
def real_download(self, filename, info_dict):
|
||||||
|
video_id = info_dict['video_id']
|
||||||
|
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
|
||||||
|
|
||||||
|
test = self.params.get('test', False)
|
||||||
|
|
||||||
|
ctx = {
|
||||||
|
'filename': filename,
|
||||||
|
'live': True,
|
||||||
|
'total_frags': None,
|
||||||
|
}
|
||||||
|
|
||||||
|
def dl_fragment(url):
|
||||||
|
headers = info_dict.get('http_headers', {})
|
||||||
|
return self._download_fragment(ctx, url, info_dict, headers)
|
||||||
|
|
||||||
|
def parse_yt_initial_data(data):
|
||||||
|
window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?)(?<=});'
|
||||||
|
var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?)(?<=});'
|
||||||
|
for patt in window_patt, var_patt:
|
||||||
|
try:
|
||||||
|
raw_json = re.search(patt, data).group(1)
|
||||||
|
return json.loads(raw_json)
|
||||||
|
except AttributeError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
self._prepare_and_start_frag_download(ctx)
|
||||||
|
|
||||||
|
success, raw_fragment = dl_fragment(
|
||||||
|
'https://www.youtube.com/watch?v={}'.format(video_id))
|
||||||
|
if not success:
|
||||||
|
return False
|
||||||
|
data = parse_yt_initial_data(raw_fragment)
|
||||||
|
continuation_id = data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
|
||||||
|
# no data yet but required to call _append_fragment
|
||||||
|
self._append_fragment(ctx, b'')
|
||||||
|
|
||||||
|
first = True
|
||||||
|
offset = None
|
||||||
|
while continuation_id is not None:
|
||||||
|
data = None
|
||||||
|
if first:
|
||||||
|
url = 'https://www.youtube.com/live_chat_replay?continuation={}'.format(continuation_id)
|
||||||
|
success, raw_fragment = dl_fragment(url)
|
||||||
|
if not success:
|
||||||
|
return False
|
||||||
|
data = parse_yt_initial_data(raw_fragment)
|
||||||
|
else:
|
||||||
|
url = ('https://www.youtube.com/live_chat_replay/get_live_chat_replay'
|
||||||
|
+ '?continuation={}'.format(continuation_id)
|
||||||
|
+ '&playerOffsetMs={}'.format(offset - 5000)
|
||||||
|
+ '&hidden=false'
|
||||||
|
+ '&pbj=1')
|
||||||
|
success, raw_fragment = dl_fragment(url)
|
||||||
|
if not success:
|
||||||
|
return False
|
||||||
|
data = json.loads(raw_fragment)['response']
|
||||||
|
|
||||||
|
first = False
|
||||||
|
continuation_id = None
|
||||||
|
|
||||||
|
live_chat_continuation = data['continuationContents']['liveChatContinuation']
|
||||||
|
offset = None
|
||||||
|
processed_fragment = bytearray()
|
||||||
|
if 'actions' in live_chat_continuation:
|
||||||
|
for action in live_chat_continuation['actions']:
|
||||||
|
if 'replayChatItemAction' in action:
|
||||||
|
replay_chat_item_action = action['replayChatItemAction']
|
||||||
|
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
|
||||||
|
processed_fragment.extend(
|
||||||
|
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
|
||||||
|
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
|
||||||
|
|
||||||
|
self._append_fragment(ctx, processed_fragment)
|
||||||
|
|
||||||
|
if test or offset is None:
|
||||||
|
break
|
||||||
|
|
||||||
|
self._finish_frag_download(ctx)
|
||||||
|
|
||||||
|
return True
|
|
@ -1435,7 +1435,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Signature extraction failed: ' + tb, cause=e)
|
'Signature extraction failed: ' + tb, cause=e)
|
||||||
|
|
||||||
def _get_subtitles(self, video_id, webpage):
|
def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
|
||||||
try:
|
try:
|
||||||
subs_doc = self._download_xml(
|
subs_doc = self._download_xml(
|
||||||
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
|
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
|
||||||
|
@ -1462,6 +1462,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
})
|
})
|
||||||
sub_lang_list[lang] = sub_formats
|
sub_lang_list[lang] = sub_formats
|
||||||
|
if has_live_chat_replay:
|
||||||
|
sub_lang_list['live_chat'] = [
|
||||||
|
{
|
||||||
|
'video_id': video_id,
|
||||||
|
'ext': 'json',
|
||||||
|
'protocol': 'youtube_live_chat_replay',
|
||||||
|
},
|
||||||
|
]
|
||||||
if not sub_lang_list:
|
if not sub_lang_list:
|
||||||
self._downloader.report_warning('video doesn\'t have subtitles')
|
self._downloader.report_warning('video doesn\'t have subtitles')
|
||||||
return {}
|
return {}
|
||||||
|
@ -1485,6 +1493,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
return self._parse_json(
|
return self._parse_json(
|
||||||
uppercase_escape(config), video_id, fatal=False)
|
uppercase_escape(config), video_id, fatal=False)
|
||||||
|
|
||||||
|
def _get_yt_initial_data(self, video_id, webpage):
|
||||||
|
config = self._search_regex(
|
||||||
|
(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
|
||||||
|
r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
|
||||||
|
webpage, 'ytInitialData', default=None)
|
||||||
|
if config:
|
||||||
|
return self._parse_json(
|
||||||
|
uppercase_escape(config), video_id, fatal=False)
|
||||||
|
|
||||||
def _get_automatic_captions(self, video_id, webpage):
|
def _get_automatic_captions(self, video_id, webpage):
|
||||||
"""We need the webpage for getting the captions url, pass it as an
|
"""We need the webpage for getting the captions url, pass it as an
|
||||||
argument to speed up the process."""
|
argument to speed up the process."""
|
||||||
|
@ -1978,6 +1995,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
if is_live is None:
|
if is_live is None:
|
||||||
is_live = bool_or_none(video_details.get('isLive'))
|
is_live = bool_or_none(video_details.get('isLive'))
|
||||||
|
|
||||||
|
has_live_chat_replay = False
|
||||||
|
if not is_live:
|
||||||
|
yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
|
||||||
|
try:
|
||||||
|
yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
|
||||||
|
has_live_chat_replay = True
|
||||||
|
except (KeyError, IndexError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
# Check for "rental" videos
|
# Check for "rental" videos
|
||||||
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
||||||
raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
|
raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
|
||||||
|
@ -2385,7 +2411,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
|
or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
|
||||||
|
|
||||||
# subtitles
|
# subtitles
|
||||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
video_subtitles = self.extract_subtitles(
|
||||||
|
video_id, video_webpage, has_live_chat_replay)
|
||||||
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
|
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
|
||||||
|
|
||||||
video_duration = try_get(
|
video_duration = try_get(
|
||||||
|
|
Loading…
Reference in New Issue