youtube-dl/youtube_dl/extractor/vshare.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import compat_chr
from ..utils import (
    decode_packed_codes,
    ExtractorError,
)


class VShareIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://vshare.io/d/0f64ce6',
        'md5': '17b39f55b5497ae8b59f5fbce8e35886',
        'info_dict': {
            'id': '0f64ce6',
            'title': 'vl14062007715967',
            'ext': 'mp4',
        }
    }, {
        'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
        'only_matching': True,
    }]

    def _extract_packed(self, webpage):
        packed = self._search_regex(r'(eval\(function.+)', webpage, 'packed code')
        unpacked = decode_packed_codes(packed)
        digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
        digits = digits.split(',')
        digits = [int(digit) for digit in digits]
        key_digit = self._search_regex(r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
        chars = [compat_chr(d - int(key_digit)) for d in digits]
        return ''.join(chars)

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(
            'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id)

        title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
        title = title.split(' - ')[0]

        error = self._html_search_regex(
            r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
            'error', default=None)
        if error:
            raise ExtractorError(error, expected=True)

        unpacked = self._extract_packed(webpage)
        video_urls = re.findall(r'<source src="([^"]+)', unpacked)
        formats = [{'url': video_url} for video_url in video_urls]
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
        }

    @staticmethod
    def _extract_urls(webpage):
        return re.findall(
            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
            webpage)
[vshare] Add extractor (closes #12278) 2017-04-03 22:05:18 +02:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

[vshare] Fix extraction (closes #14473) 2017-11-14 16:34:45 +01:00			`import re`

[vshare] Add extractor (closes #12278) 2017-04-03 22:05:18 +02:00			`from .common import InfoExtractor`
[vshare] Fix extraction (closes #14473) 2017-11-14 16:34:45 +01:00			`from ..compat import compat_chr`
[vshare] Capture and output error message 2017-11-14 16:39:54 +01:00			`from ..utils import (`
			`decode_packed_codes,`
			`ExtractorError,`
			`)`
[vshare] Add extractor (closes #12278) 2017-04-03 22:05:18 +02:00

			`class VShareIE(InfoExtractor):`
			`_VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'`
			`_TESTS = [{`
			`'url': 'https://vshare.io/d/0f64ce6',`
[vshare] Fix extraction (closes #14473) 2017-11-14 16:34:45 +01:00			`'md5': '17b39f55b5497ae8b59f5fbce8e35886',`
[vshare] Add extractor (closes #12278) 2017-04-03 22:05:18 +02:00			`'info_dict': {`
			`'id': '0f64ce6',`
			`'title': 'vl14062007715967',`
			`'ext': 'mp4',`
			`}`
			`}, {`
			`'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',`
			`'only_matching': True,`
			`}]`

[vshare] Fix extraction (closes #14473) 2017-11-14 16:34:45 +01:00			`def _extract_packed(self, webpage):`
			`packed = self._search_regex(r'(eval\(function.+)', webpage, 'packed code')`
			`unpacked = decode_packed_codes(packed)`
			`digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')`
			`digits = digits.split(',')`
			`digits = [int(digit) for digit in digits]`
			`key_digit = self._search_regex(r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')`
			`chars = [compat_chr(d - int(key_digit)) for d in digits]`
			`return ''.join(chars)`

[vshare] Add extractor (closes #12278) 2017-04-03 22:05:18 +02:00			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`

			`webpage = self._download_webpage(`
[vshare] Fix extraction (closes #14473) 2017-11-14 16:34:45 +01:00			`'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id)`
[vshare] Add extractor (closes #12278) 2017-04-03 22:05:18 +02:00
[vshare] Fix extraction (closes #14473) 2017-11-14 16:34:45 +01:00			`title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')`
			`title = title.split(' - ')[0]`
[vshare] Add extractor (closes #12278) 2017-04-03 22:05:18 +02:00
[vshare] Capture and output error message 2017-11-14 16:39:54 +01:00			`error = self._html_search_regex(`
			`r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,`
			`'error', default=None)`
			`if error:`
			`raise ExtractorError(error, expected=True)`

[vshare] Fix extraction (closes #14473) 2017-11-14 16:34:45 +01:00			`unpacked = self._extract_packed(webpage)`
			`video_urls = re.findall(r'<source src="([^"]+)', unpacked)`
			`formats = [{'url': video_url} for video_url in video_urls]`
[vshare] Add extractor (closes #12278) 2017-04-03 22:05:18 +02:00			`return {`
			`'id': video_id,`
			`'title': title,`
[vshare] Fix extraction (closes #14473) 2017-11-14 16:34:45 +01:00			`'formats': formats,`
[vshare] Add extractor (closes #12278) 2017-04-03 22:05:18 +02:00			`}`
[vshare] Fix extraction (closes #14473) 2017-11-14 16:34:45 +01:00
			`@staticmethod`
			`def _extract_urls(webpage):`
			`return re.findall(`
			`r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',`
			`webpage)`