2014-01-29 15:34:35 +01:00
from __future__ import unicode_literals
2013-06-23 21:14:19 +02:00
import base64
import re
from . common import InfoExtractor
from . . utils import (
compat_urllib_parse ,
)
class InfoQIE ( InfoExtractor ) :
2014-02-02 12:03:36 +01:00
_VALID_URL = r ' https?://(?:www \ .)?infoq \ .com/[^/]+/(?P<id>[^/]+)$ '
2014-04-20 01:01:37 +02:00
2013-06-27 20:27:08 +02:00
_TEST = {
2014-04-20 01:01:37 +02:00
u ' name ' : u ' InfoQ ' ,
u ' url ' : u ' http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things ' ,
u ' md5 ' : u ' fcaa3d995e04080dcb9465d86b5eef62 ' ,
u ' info_dict ' : {
u ' id ' : u ' 12-jan-pythonthings ' ,
u ' ext ' : u ' mp4 ' ,
u ' description ' : u ' Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience. ' ,
u ' title ' : u ' A Few of My Favorite [Python] Things ' ,
2013-06-27 20:27:08 +02:00
} ,
}
2013-06-23 21:14:19 +02:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
2014-02-02 12:03:36 +01:00
video_id = mobj . group ( ' id ' )
2013-06-23 21:14:19 +02:00
2014-02-02 12:03:36 +01:00
webpage = self . _download_webpage ( url , video_id )
2013-06-23 21:14:19 +02:00
2014-04-20 01:01:37 +02:00
self . report_extraction ( video_id )
2013-06-23 21:14:19 +02:00
2014-04-20 01:01:37 +02:00
video_title = self . _html_search_regex ( r ' <title>(.*?)</title> ' , webpage , ' title ' )
video_description = self . _html_search_meta ( ' description ' , webpage , ' description ' )
2013-06-23 21:14:19 +02:00
2014-04-20 01:01:37 +02:00
video_url = ' rtmpe://video.infoq.com/cfx/st/ '
base64playpath = self . _search_regex ( r " jsclassref = ' ([^ ' ]*) ' " , webpage , ' jsclassref ' )
playpath = ' mp4: ' + base64 . b64decode ( base64playpath ) . decode ( ' utf-8 ' )
2013-06-23 21:14:19 +02:00
2014-04-20 01:01:37 +02:00
video_filename = playpath . split ( ' / ' ) [ - 1 ]
2013-06-23 21:14:19 +02:00
video_id , extension = video_filename . split ( ' . ' )
2014-04-20 01:01:37 +02:00
return [ {
2013-06-23 21:14:19 +02:00
' id ' : video_id ,
' title ' : video_title ,
' description ' : video_description ,
2014-04-20 01:01:37 +02:00
' formats ' : [ {
' url ' : video_url ,
' ext ' : extension ,
' play_path ' : playpath ,
} ] ,
} ]