forked from minhngoc25a/yt-dlc
Extract original URL from next_url parameter of verify_age page, before actual extract
This commit is contained in:
parent
d891ff9fd9
commit
69d3b2d824
12
youtube-dl
12
youtube-dl
|
@ -1171,7 +1171,9 @@ class InfoExtractor(object):
|
||||||
class YoutubeIE(InfoExtractor):
|
class YoutubeIE(InfoExtractor):
|
||||||
"""Information extractor for youtube.com."""
|
"""Information extractor for youtube.com."""
|
||||||
|
|
||||||
_VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
|
_PREFIX = r'(?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)'
|
||||||
|
_VALID_URL = r'^('+_PREFIX+r'(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
|
||||||
|
_VALID_URL_WITH_AGE = r'^('+_PREFIX+')verify_age\?next_url=([^&]+)(?:.+)?$'
|
||||||
_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
|
_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
|
||||||
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
|
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
|
||||||
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
|
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
|
||||||
|
@ -1335,6 +1337,14 @@ class YoutubeIE(InfoExtractor):
|
||||||
return
|
return
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
# Extract original video URL from URL with age verification, using next_url parameter
|
||||||
|
mobj = re.match(self._VALID_URL_WITH_AGE, url)
|
||||||
|
if mobj:
|
||||||
|
urldecode = lambda x: re.sub(r'%([0-9a-hA-H][0-9a-hA-H])', lambda m: chr(int(m.group(1), 16)), x)
|
||||||
|
# Keep original domain. We can probably change to www.youtube.com, but it should not hurt so keep it.
|
||||||
|
# We just make sure we do not have double //, in URL, so we strip starting slash in next_url.
|
||||||
|
url = mobj.group(1) + re.sub(r'^/', '', urldecode(mobj.group(2)))
|
||||||
|
|
||||||
# Extract video id from URL
|
# Extract video id from URL
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
|
|
Loading…
Reference in New Issue