[pornhub] Fix extraction (closes #12007)

This commit is contained in:
Thomas Christlieb 2017-02-08 13:53:39 +01:00 committed by Sergey M․
parent 78ef214d2d
commit e64b0fca14
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 17 additions and 4 deletions

View File

@ -156,11 +156,24 @@ class PornHubIE(InfoExtractor):
comment_count = self._extract_count( comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
video_variables = {}
for video_variablename, quote, video_variable in re.findall(
r'(player_quality_[0-9]{3,4}p[0-9a-z]+?)=\s*(["\'])(.*?)\2;', webpage):
video_variables[video_variablename] = video_variable
encoded_video_urls = []
for encoded_video_url in re.findall(
r'player_quality_[0-9]{3,4}p\s*=(.*?);', webpage):
encoded_video_urls.append(encoded_video_url)
# Decode the URLs
video_urls = [] video_urls = []
for quote, video_url in re.findall( for url in encoded_video_urls:
r'player_quality_[0-9]{3,4}p\s*=\s*(["\'])(.+?)\1;', webpage): for varname, varval in video_variables.items():
video_urls.append(compat_urllib_parse_unquote(re.sub( url = url.replace(varname, varval)
r'{0}\s*\+\s*{0}'.format(quote), '', video_url))) url = url.replace('+', '')
url = url.replace(' ', '')
video_urls.append(url)
if webpage.find('"encrypted":true') != -1: if webpage.find('"encrypted":true') != -1:
password = compat_urllib_parse_unquote_plus( password = compat_urllib_parse_unquote_plus(