From 6ff5f1221858a4ad815bfb8a2351be2aafba7e16 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Apr 2014 00:05:43 +0200 Subject: [PATCH] [motorsport] Add extractor (Fixes #2688) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/motorsport.py | 63 ++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 youtube_dl/extractor/motorsport.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c9c400b61..12c10d962 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -154,6 +154,7 @@ from .mixcloud import MixcloudIE from .mpora import MporaIE from .mofosex import MofosexIE from .mooshare import MooshareIE +from .motorsport import MotorsportIE from .mtv import ( MTVIE, MTVIggyIE, diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py new file mode 100644 index 000000000..dc727be10 --- /dev/null +++ b/youtube_dl/extractor/motorsport.py @@ -0,0 +1,63 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import hashlib +import json +import re +import time + +from .common import InfoExtractor +from ..utils import ( + compat_parse_qs, + compat_str, + int_or_none, +) + + +class MotorsportIE(InfoExtractor): + IE_DESC = 'motorsport.com' + _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P[^/]+)/(?:$|[?#])' + _TEST = { + 'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/', + 'md5': '5592cb7c5005d9b2c163df5ac3dc04e4', + 'info_dict': { + 'id': '7063', + 'ext': 'mp4', + 'title': 'Red Bull Racing: 2014 Rules Explained', + 'duration': 207, + 'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.', + 'uploader': 'rainiere', + 'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') + + webpage = self._download_webpage(url, display_id) + flashvars_code = self._html_search_regex( + r'Video by: (.*?)', webpage, + 'uploader', fatal=False) + + return { + 'id': params['video_id'], + 'display_id': display_id, + 'title': params['title'], + 'url': video_url, + 'description': params.get('description'), + 'thumbnail': params.get('main_thumb'), + 'duration': int_or_none(params.get('duration')), + 'uploader': uploader, + }