forked from minhngoc25a/yt-dlc
[pornhub] Fix comment count extraction (Closes #5320)
This commit is contained in:
parent
72b249bf1f
commit
7700207ec7
|
@ -33,10 +33,8 @@ class PornHubIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_count(self, pattern, webpage, name):
|
def _extract_count(self, pattern, webpage, name):
|
||||||
count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
|
return str_to_int(self._search_regex(
|
||||||
if count:
|
pattern, webpage, '%s count' % name, fatal=False))
|
||||||
count = str_to_int(count)
|
|
||||||
return count
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -62,11 +60,14 @@ class PornHubIE(InfoExtractor):
|
||||||
if thumbnail:
|
if thumbnail:
|
||||||
thumbnail = compat_urllib_parse.unquote(thumbnail)
|
thumbnail = compat_urllib_parse.unquote(thumbnail)
|
||||||
|
|
||||||
view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
|
view_count = self._extract_count(
|
||||||
like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
|
r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
|
||||||
dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
|
like_count = self._extract_count(
|
||||||
|
r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
|
||||||
|
dislike_count = self._extract_count(
|
||||||
|
r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
|
||||||
comment_count = self._extract_count(
|
comment_count = self._extract_count(
|
||||||
r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
|
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||||
|
|
||||||
video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
||||||
if webpage.find('"encrypted":true') != -1:
|
if webpage.find('"encrypted":true') != -1:
|
||||||
|
|
Loading…
Reference in New Issue