Merge branch 'bleacherreport' of github.com:remitamine/youtube-dl into remitamine-bleacherreport

2015-12-21 11:18:32 +01:00 · 2015-12-21 11:18:32 +01:00 · 2c28da8e05
parent 7ba71e30fb c7fa5fa42c
commit 2c28da8e05
5 changed files with 218 additions and 118 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -61,6 +61,10 @@ from .beatportpro import BeatportProIE
 from .bet import BetIE
 from .bild import BildIE
 from .bilibili import BiliBiliIE
 from .bleacherreport import (
    BleacherReportIE,
    BleacherReportCMSIE,
 )
 from .blinkx import BlinkxIE
 from .bloomberg import BloombergIE
 from .bpb import BpbIE
--- a/youtube_dl/extractor/amp.py
+++ b/youtube_dl/extractor/amp.py
@ -0,0 +1,84 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    parse_iso8601,
 )
 class AMPIE(InfoExtractor):
    # parse Akamai Adaptive Media Player feed
    def _extract_feed_info(self, url):
        item = self._download_json(
            url, None, 'Downloading Akamai AMP feed',
            'Unable to download Akamai AMP feed')['channel']['item']
        video_id = item['guid']
        def get_media_node(name, default=None):
            media_name = 'media-%s' % name
            media_group = item.get('media-group') or item
            return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
        thumbnails = []
        media_thumbnail = get_media_node('thumbnail')
        if media_thumbnail:
            if isinstance(media_thumbnail, dict):
                media_thumbnail = [media_thumbnail]
            for thumbnail_data in media_thumbnail:
                thumbnail = thumbnail_data['@attributes']
                thumbnails.append({
                    'url': self._proto_relative_url(thumbnail['url'], 'http:'),
                    'width': int_or_none(thumbnail.get('width')),
                    'height': int_or_none(thumbnail.get('height')),
                })
        subtitles = {}
        media_subtitle = get_media_node('subTitle')
        if media_subtitle:
            if isinstance(media_subtitle, dict):
                media_subtitle = [media_subtitle]
            for subtitle_data in media_subtitle:
                subtitle = subtitle_data['@attributes']
                lang = subtitle.get('lang') or 'en'
                subtitles[lang] = [{'url': subtitle['href']}]
        formats = []
        media_content = get_media_node('content')
        if isinstance(media_content, dict):
            media_content = [media_content]
        for media_data in media_content:
            media = media_data['@attributes']
            media_type = media['type']
            if media_type == 'video/f4m':
                f4m_formats = self._extract_f4m_formats(
                    media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
                    video_id, f4m_id='hds', fatal=False)
                if f4m_formats:
                    formats.extend(f4m_formats)
            elif media_type == 'application/x-mpegURL':
                m3u8_formats = self._extract_m3u8_formats(
                    media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False)
                if m3u8_formats:
                    formats.extend(m3u8_formats)
            else:
                formats.append({
                    'format_id': media_data['media-category']['@attributes']['label'],
                    'url': media['url'],
                    'tbr': int_or_none(media.get('bitrate')),
                    'filesize': int_or_none(media.get('fileSize')),
                })
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': get_media_node('title'),
            'description': get_media_node('description'),
            'thumbnails': thumbnails,
            'timestamp': parse_iso8601(item.get('pubDate'), ' '),
            'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
            'formats': formats,
        }
--- a/youtube_dl/extractor/bleacherreport.py
+++ b/youtube_dl/extractor/bleacherreport.py
@ -0,0 +1,106 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from .amp import AMPIE
 from ..utils import (
    ExtractorError,
    int_or_none,
    parse_iso8601,
 )
 class BleacherReportIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
        'md5': 'a3ffc3dc73afdbc2010f02d98f990f20',
        'info_dict': {
            'id': '2496438',
            'ext': 'mp4',
            'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
            'uploader_id': 3992341,
            'description': 'CFB, ACC, Florida State',
            'timestamp': 1434380212,
            'upload_date': '20150615',
            'uploader': 'Team Stream Now ',
        },
        'add_ie': ['Ooyala'],
    }, {
        'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
        'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50',
        'info_dict': {
            'id': '2586817',
            'ext': 'mp4',
            'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
            'timestamp': 1446839961,
            'uploader': 'Sean Fay',
            'description': 'md5:825e94e0f3521df52fa83b2ed198fa20',
            'uploader_id': 6466954,
            'upload_date': '20151011',
        },
        'add_ie': ['Youtube'],
    }]
    def _real_extract(self, url):
        article_id = self._match_id(url)
        article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
        thumbnails = []
        primary_photo = article_data.get('primaryPhoto')
        if primary_photo:
            thumbnails = [{
                'url': primary_photo['url'],
                'width': primary_photo.get('width'),
                'height': primary_photo.get('height'),
            }]
        info = {
            '_type': 'url_transparent',
            'id': article_id,
            'title': article_data['title'],
            'uploader': article_data.get('author', {}).get('name'),
            'uploader_id': article_data.get('authorId'),
            'timestamp': parse_iso8601(article_data.get('createdAt')),
            'thumbnails': thumbnails,
            'comment_count': int_or_none(article_data.get('commentsCount')),
            'view_count': int_or_none(article_data.get('hitCount')),
        }
        video = article_data.get('video')
        if video:
            video_type = video['type']
            if video_type == 'cms.bleacherreport.com':
                info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
            elif video_type == 'ooyala.com':
                info['url'] = 'ooyala:%s' % video['id']
            elif video_type == 'youtube.com':
                info['url'] = video['id']
            elif video_type == 'vine.co':
                info['url'] = 'https://vine.co/v/%s' % video['id']
            else:
                info['url'] = video_type + video['id']
            return info
        else:
            raise ExtractorError('no video in the article', expected=True)
 class BleacherReportCMSIE(AMPIE):
    _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
    _TESTS = [{
        'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
        'md5': 'f0ca220af012d4df857b54f792c586bb',
        'info_dict': {
            'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
            'ext': 'flv',
            'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
            'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id)
        info['id'] = video_id
        return info
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@ -3,7 +3,7 @@ from __future__ import unicode_literals
 import itertools
-from .common import InfoExtractor
+from .amp import AMPIE
 from ..compat import (
    compat_HTTPError,
    compat_urllib_parse,
@ -12,14 +12,11 @@ from ..compat import (
 from ..utils import (
    ExtractorError,
    clean_html,
    determine_ext,
    int_or_none,
    parse_iso8601,
    sanitized_Request,
 )
-class DramaFeverBaseIE(InfoExtractor):
+class DramaFeverBaseIE(AMPIE):
    _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
    _NETRC_MACHINE = 'dramafever'
@ -80,60 +77,25 @@ class DramaFeverIE(DramaFeverBaseIE):
            'timestamp': 1404336058,
            'upload_date': '20140702',
            'duration': 343,
-        }
+        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }
    def _real_extract(self, url):
        video_id = self._match_id(url).replace('/', '.')
        try:
-            feed = self._download_json(
+            info = self._extract_feed_info(
-                'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id,
+                'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
                video_id, 'Downloading episode JSON')['channel']['item']
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError):
                raise ExtractorError(
                    'Currently unavailable in your country.', expected=True)
            raise
        media_group = feed.get('media-group', {})
        formats = []
        for media_content in media_group['media-content']:
            src = media_content.get('@attributes', {}).get('url')
            if not src:
                continue
            ext = determine_ext(src)
            if ext == 'f4m':
                formats.extend(self._extract_f4m_formats(
                    src, video_id, f4m_id='hds'))
            elif ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    src, video_id, 'mp4', m3u8_id='hls'))
            else:
                formats.append({
                    'url': src,
                })
        self._sort_formats(formats)
        title = media_group.get('media-title')
        description = media_group.get('media-description')
        duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
        thumbnail = self._proto_relative_url(
            media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
        timestamp = parse_iso8601(feed.get('pubDate'), ' ')
        subtitles = {}
        for media_subtitle in media_group.get('media-subTitle', []):
            lang = media_subtitle.get('@attributes', {}).get('lang')
            href = media_subtitle.get('@attributes', {}).get('href')
            if not lang or not href:
                continue
            subtitles[lang] = [{
                'ext': 'ttml',
                'url': href,
            }]
        series_id, episode_number = video_id.split('.')
        episode_info = self._download_json(
            # We only need a single episode info, so restricting page size to one episode
@ -146,21 +108,12 @@ class DramaFeverIE(DramaFeverBaseIE):
            if value:
                subfile = value[0].get('subfile') or value[0].get('new_subfile')
                if subfile and subfile != 'http://www.dramafever.com/st/':
-                    subtitles.setdefault('English', []).append({
+                    info['subtitiles'].setdefault('English', []).append({
                        'ext': 'srt',
                        'url': subfile,
                    })
-        return {
+        return info
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'timestamp': timestamp,
            'duration': duration,
            'formats': formats,
            'subtitles': subtitles,
        }
 class DramaFeverSeriesIE(DramaFeverBaseIE):
--- a/youtube_dl/extractor/foxnews.py
+++ b/youtube_dl/extractor/foxnews.py
@ -2,14 +2,10 @@ from __future__ import unicode_literals
 import re
-from .common import InfoExtractor
+from .amp import AMPIE
 from ..utils import (
    parse_iso8601,
    int_or_none,
 )
-class FoxNewsIE(InfoExtractor):
+class FoxNewsIE(AMPIE):
    IE_DESC = 'Fox News and Fox Business Video'
    _VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
    _TESTS = [
@ -20,10 +16,10 @@ class FoxNewsIE(InfoExtractor):
                'id': '3937480',
                'ext': 'flv',
                'title': 'Frozen in Time',
-                'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
+                'description': '16-year-old girl is size of toddler',
                'duration': 265,
-                'timestamp': 1304411491,
+                # 'timestamp': 1304411491,
-                'upload_date': '20110503',
+                # 'upload_date': '20110503',
                'thumbnail': 're:^https?://.*\.jpg$',
            },
        },
@ -34,10 +30,10 @@ class FoxNewsIE(InfoExtractor):
                'id': '3922535568001',
                'ext': 'mp4',
                'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
-                'description': "Congressman discusses the president's executive action",
+                'description': "Congressman discusses president's plan",
                'duration': 292,
-                'timestamp': 1417662047,
+                # 'timestamp': 1417662047,
-                'upload_date': '20141204',
+                # 'upload_date': '20141204',
                'thumbnail': 're:^https?://.*\.jpg$',
            },
        },
@ -52,52 +48,9 @@ class FoxNewsIE(InfoExtractor):
    ]
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        host, video_id = re.match(self._VALID_URL, url).groups()
        video_id = mobj.group('id')
        host = mobj.group('host')
-        video = self._download_json(
+        info = self._extract_feed_info(
-            'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id)
+            'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
-
+        info['id'] = video_id
-        item = video['channel']['item']
+        return info
        title = item['title']
        description = item['description']
        timestamp = parse_iso8601(item['dc-date'])
        media_group = item['media-group']
        duration = None
        formats = []
        for media in media_group['media-content']:
            attributes = media['@attributes']
            video_url = attributes['url']
            if video_url.endswith('.f4m'):
                formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
            elif video_url.endswith('.m3u8'):
                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
            elif not video_url.endswith('.smil'):
                duration = int_or_none(attributes.get('duration'))
                formats.append({
                    'url': video_url,
                    'format_id': media['media-category']['@attributes']['label'],
                    'preference': 1,
                    'vbr': int_or_none(attributes.get('bitrate')),
                    'filesize': int_or_none(attributes.get('fileSize'))
                })
        self._sort_formats(formats)
        media_thumbnail = media_group['media-thumbnail']['@attributes']
        thumbnails = [{
            'url': media_thumbnail['url'],
            'width': int_or_none(media_thumbnail.get('width')),
            'height': int_or_none(media_thumbnail.get('height')),
        }] if media_thumbnail else []
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'duration': duration,
            'timestamp': timestamp,
            'formats': formats,
            'thumbnails': thumbnails,
        }