From 579c99a284481243f30e80151c90a753f613778d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 22 Mar 2017 23:48:06 +0700 Subject: [PATCH] [cloudy] Fix extraction (closes #12525) --- youtube_dl/extractor/cloudy.py | 113 +++++++++++---------------------- 1 file changed, 36 insertions(+), 77 deletions(-) diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py index ae5ba0015..9bc8dbea4 100644 --- a/youtube_dl/extractor/cloudy.py +++ b/youtube_dl/extractor/cloudy.py @@ -1,97 +1,56 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_HTTPError, -) from ..utils import ( - ExtractorError, - HEADRequest, - remove_end, + str_to_int, + unified_strdate, ) class CloudyIE(InfoExtractor): _IE_DESC = 'cloudy.ec' - _VALID_URL = r'''(?x) - https?://(?:www\.)?cloudy\.ec/ - (?:v/|embed\.php\?id=) - (?P[A-Za-z0-9]+) - ''' - _EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s' - _API_URL = 'http://www.cloudy.ec/api/player.api.php' - _MAX_TRIES = 2 - _TEST = { + _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P[A-Za-z0-9]+)' + _TESTS = [{ 'url': 'https://www.cloudy.ec/v/af511e2527aac', - 'md5': '5cb253ace826a42f35b4740539bedf07', + 'md5': '29832b05028ead1b58be86bf319397ca', 'info_dict': { 'id': 'af511e2527aac', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Funny Cats and Animals Compilation june 2013', + 'upload_date': '20130913', + 'view_count': int, } - } - - def _extract_video(self, video_id, file_key, error_url=None, try_num=0): - - if try_num > self._MAX_TRIES - 1: - raise ExtractorError('Unable to extract video URL', expected=True) - - form = { - 'file': video_id, - 'key': file_key, - } - - if error_url: - form.update({ - 'numOfErrors': try_num, - 'errorCode': '404', - 'errorUrl': error_url, - }) - - player_data = self._download_webpage( - self._API_URL, video_id, 'Downloading player data', query=form) - data = compat_parse_qs(player_data) - - try_num += 1 - - if 'error' in data: - raise ExtractorError( - '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])), - expected=True) - - title = data.get('title', [None])[0] - if title: - title = remove_end(title, '&asdasdas').strip() - - video_url = data.get('url', [None])[0] - - if video_url: - try: - self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL') - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]: - self.report_warning('Invalid video URL, requesting another', video_id) - return self._extract_video(video_id, file_key, video_url, try_num) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - } + }, { + 'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac', + 'only_matching': True, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) - url = self._EMBED_URL % video_id - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'http://www.cloudy.ec/embed.php?id=%s' % video_id, video_id) - file_key = self._search_regex( - [r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'], - webpage, 'file_key') + info = self._parse_html5_media_entries(url, webpage, video_id)[0] - return self._extract_video(video_id, file_key) + webpage = self._download_webpage( + 'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False) + + if webpage: + info.update({ + 'title': self._search_regex( + r']*>([^<]+)<', webpage, 'title'), + 'upload_date': unified_strdate(self._search_regex( + r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage, + 'upload date', fatal=False)), + 'view_count': str_to_int(self._search_regex( + r'([\d,.]+) views<', webpage, 'view count', fatal=False)), + }) + + if not info.get('title'): + info['title'] = video_id + + info['id'] = video_id + + return info