mirror of https://github.com/blackjack4494/yt-dlc
[spankbang] Fix formats extraction (closes #15727)
This commit is contained in:
parent
b871d7e954
commit
7773a92800
|
@ -3,7 +3,12 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
parse_duration,
|
||||||
|
parse_resolution,
|
||||||
|
str_to_int,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SpankBangIE(InfoExtractor):
|
class SpankBangIE(InfoExtractor):
|
||||||
|
@ -15,7 +20,7 @@ class SpankBangIE(InfoExtractor):
|
||||||
'id': '3vvn',
|
'id': '3vvn',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'fantasy solo',
|
'title': 'fantasy solo',
|
||||||
'description': 'Watch fantasy solo free HD porn video - 05 minutes - Babe,Masturbation,Solo,Toy - dillion harper masturbates on a bed free adult movies sexy clips.',
|
'description': 'dillion harper masturbates on a bed',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'silly2587',
|
'uploader': 'silly2587',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
@ -32,36 +37,49 @@ class SpankBangIE(InfoExtractor):
|
||||||
# mobile page
|
# mobile page
|
||||||
'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name',
|
'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# 4k
|
||||||
|
'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id, headers={
|
||||||
|
'Cookie': 'country=US'
|
||||||
|
})
|
||||||
|
|
||||||
if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
|
if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Video %s is not available' % video_id, expected=True)
|
'Video %s is not available' % video_id, expected=True)
|
||||||
|
|
||||||
stream_key = self._html_search_regex(
|
formats = []
|
||||||
r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
|
for mobj in re.finditer(
|
||||||
webpage, 'stream key')
|
r'stream_url_(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
|
||||||
|
webpage):
|
||||||
formats = [{
|
format_id, format_url = mobj.group('id', 'url')
|
||||||
'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height),
|
f = parse_resolution(format_id)
|
||||||
'ext': 'mp4',
|
f.update({
|
||||||
'format_id': '%sp' % height,
|
'url': format_url,
|
||||||
'height': int(height),
|
'format_id': format_id,
|
||||||
} for height in re.findall(r'<(?:span|li|p)[^>]+[qb]_(\d+)p', webpage)]
|
})
|
||||||
self._check_formats(formats, video_id)
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
|
r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
|
||||||
description = self._og_search_description(webpage)
|
description = self._search_regex(
|
||||||
|
r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
uploader = self._search_regex(
|
uploader = self._search_regex(
|
||||||
r'class="user"[^>]*><img[^>]+>([^<]+)',
|
r'class="user"[^>]*><img[^>]+>([^<]+)',
|
||||||
webpage, 'uploader', default=None)
|
webpage, 'uploader', default=None)
|
||||||
|
duration = parse_duration(self._search_regex(
|
||||||
|
r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)',
|
||||||
|
webpage, 'duration', fatal=False))
|
||||||
|
view_count = str_to_int(self._search_regex(
|
||||||
|
r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
age_limit = self._rta_search(webpage)
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
|
@ -71,6 +89,8 @@ class SpankBangIE(InfoExtractor):
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue