2014-11-24 20:02:00 +01:00
# coding: utf-8
from __future__ import unicode_literals
2016-01-31 22:00:09 +01:00
import re
2014-11-24 20:02:00 +01:00
from . common import InfoExtractor
2016-01-31 22:00:09 +01:00
from . . utils import (
js_to_json ,
determine_ext ,
)
2014-11-24 20:02:00 +01:00
2014-11-24 22:47:23 +01:00
2014-11-24 20:02:00 +01:00
class BpbIE ( InfoExtractor ) :
2014-11-24 22:47:23 +01:00
IE_DESC = ' Bundeszentrale für politische Bildung '
2016-09-08 13:29:05 +02:00
_VALID_URL = r ' https?://(?:www \ .)?bpb \ .de/mediathek/(?P<id>[0-9]+)/ '
2014-11-24 22:47:23 +01:00
_TEST = {
' url ' : ' http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr ' ,
2016-01-31 22:00:09 +01:00
# md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2
' md5 ' : ' c4f84c8a8044ca9ff68bb8441d300b3f ' ,
2014-11-24 22:47:23 +01:00
' info_dict ' : {
' id ' : ' 297 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Joachim Gauck zu 1989 und die Erinnerung an die DDR ' ,
' description ' : ' Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine " gewisse Traurigkeit " im Umgang mit der DDR-Vergangenheit. '
}
}
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
title = self . _html_search_regex (
r ' <h2 class= " white " >(.*?)</h2> ' , webpage , ' title ' )
2016-01-31 22:00:09 +01:00
video_info_dicts = re . findall (
2017-08-31 12:56:37 +02:00
r " ( { \ s*src \ s*: \ s* ' https://film \ .bpb \ .de/[^}]+}) " , webpage )
2016-01-31 22:00:09 +01:00
formats = [ ]
for video_info in video_info_dicts :
video_info = self . _parse_json ( video_info , video_id , transform_source = js_to_json )
video_url = video_info [ ' src ' ]
2017-08-31 12:56:37 +02:00
quality = ' high ' if re . search ( r ' _high \ . ' , video_url ) else ' low '
2016-01-31 22:00:09 +01:00
formats . append ( {
' url ' : video_url ,
' preference ' : 10 if quality == ' high ' else 0 ,
' format_note ' : quality ,
' format_id ' : ' %s - %s ' % ( quality , determine_ext ( video_url ) ) ,
} )
self . _sort_formats ( formats )
2014-11-24 22:47:23 +01:00
return {
' id ' : video_id ,
2016-01-31 22:00:09 +01:00
' formats ' : formats ,
2014-11-24 22:47:23 +01:00
' title ' : title ,
' description ' : self . _og_search_description ( webpage ) ,
}