update sources

2015-02-17 12:45:54 +01:00 · 2015-02-17 12:45:54 +01:00 · d265aca41c
parent 2357ffbf73
commit d265aca41c
745 changed files with 3627 additions and 52651 deletions
--- a/sources/AUTHORS.rst
+++ b/sources/AUTHORS.rst
@ -33,3 +33,4 @@ generally made searx better:
 - Benjamin Sonntag
 - @opi
 - @dimqua
 - Giorgos Logiotatidis
--- a/sources/Dockerfile
+++ b/sources/Dockerfile
@ -0,0 +1,21 @@
 FROM debian:stable
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
            python-dev python2.7-minimal python-virtualenv \
            python-pybabel python-pip zlib1g-dev \
            libxml2-dev libxslt1-dev build-essential \
            openssl
 RUN useradd searx
 WORKDIR /app
 RUN pip install uwsgi
 COPY requirements.txt /app/requirements.txt
 RUN pip install -r requirements.txt
 COPY . /app
 RUN sed -i -e "s/ultrasecretkey/`openssl rand -hex 16`/g" searx/settings.yml
 EXPOSE 5000
 CMD ["/usr/local/bin/uwsgi", "--uid", "searx", "--gid", "searx", "--http", ":5000", "-w",  "searx.webapp"]
--- a/sources/Makefile
+++ b/sources/Makefile
@ -46,7 +46,9 @@ minimal: bin/buildout minimal.cfg setup.py
 styles:
 	@lessc -x searx/static/themes/default/less/style.less > searx/static/themes/default/css/style.css
 	@lessc -x searx/static/themes/default/less/style-rtl.less > searx/static/themes/default/css/style-rtl.css
 	@lessc -x searx/static/themes/courgette/less/style.less > searx/static/themes/courgette/css/style.css
 	@lessc -x searx/static/themes/courgette/less/style-rtl.less > searx/static/themes/courgette/css/style-rtl.css
 	@lessc -x searx/static/less/bootstrap/bootstrap.less > searx/static/css/bootstrap.min.css
 	@lessc -x searx/static/themes/oscar/less/oscar/oscar.less > searx/static/themes/oscar/css/oscar.min.css
--- a/sources/init.py
+++ b/sources/init.py
@ -1,61 +0,0 @@
 '''
 searx is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 searx is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.
 You should have received a copy of the GNU Affero General Public License
 along with searx. If not, see < http://www.gnu.org/licenses/ >.
 (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
 '''
 import logging
 from os import environ
 from os.path import realpath, dirname, join, abspath
 try:
    from yaml import load
 except:
    from sys import exit, stderr
    stderr.write('[E] install pyyaml\n')
    exit(2)
 searx_dir = abspath(dirname(__file__))
 engine_dir = dirname(realpath(__file__))
 # if possible set path to settings using the
 # enviroment variable SEARX_SETTINGS_PATH
 if 'SEARX_SETTINGS_PATH' in environ:
    settings_path = environ['SEARX_SETTINGS_PATH']
 # otherwise using default path
 else:
    settings_path = join(searx_dir, 'settings.yml')
 if 'SEARX_HTTPS_REWRITE_PATH' in environ:
    https_rewrite_path = environ['SEARX_HTTPS_REWRITE_PATH']
 else:
    https_rewrite_path = join(searx_dir, 'https_rules')
 # load settings
 with open(settings_path) as settings_yaml:
    settings = load(settings_yaml)
 if settings.get('server', {}).get('debug'):
    logging.basicConfig(level=logging.DEBUG)
 else:
    logging.basicConfig(level=logging.WARNING)
 logger = logging.getLogger('searx')
 # load https rules only if https rewrite is enabled
 if settings.get('server', {}).get('https_rewrite'):
    # loade https rules
    from searx.https_rewrite import load_https_rules
    load_https_rules(https_rewrite_path)
 logger.info('Initialisation done')
--- a/sources/autocomplete.py
+++ b/sources/autocomplete.py
@ -1,162 +0,0 @@
 '''
 searx is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 searx is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.
 You should have received a copy of the GNU Affero General Public License
 along with searx. If not, see < http://www.gnu.org/licenses/ >.
 (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
 '''
 from lxml import etree
 from json import loads
 from urllib import urlencode
 from searx.languages import language_codes
 from searx.engines import (
    categories, engines, engine_shortcuts
 )
 from searx.poolrequests import get
 def searx_bang(full_query):
    '''check if the searchQuery contain a bang, and create fitting autocompleter results'''
    # check if there is a query which can be parsed
    if len(full_query.getSearchQuery()) == 0:
        return []
    results = []
    # check if current query stats with !bang
    first_char = full_query.getSearchQuery()[0]
    if first_char == '!' or first_char == '?':
        if len(full_query.getSearchQuery()) == 1:
            # show some example queries
            # TODO, check if engine is not avaliable
            results.append(first_char + "images")
            results.append(first_char + "wikipedia")
            results.append(first_char + "osm")
        else:
            engine_query = full_query.getSearchQuery()[1:]
            # check if query starts with categorie name
            for categorie in categories:
                if categorie.startswith(engine_query):
                    results.append(first_char+'{categorie}'.format(categorie=categorie))
            # check if query starts with engine name
            for engine in engines:
                if engine.startswith(engine_query.replace('_', ' ')):
                    results.append(first_char+'{engine}'.format(engine=engine.replace(' ', '_')))
            # check if query starts with engine shortcut
            for engine_shortcut in engine_shortcuts:
                if engine_shortcut.startswith(engine_query):
                    results.append(first_char+'{engine_shortcut}'.format(engine_shortcut=engine_shortcut))
    # check if current query stats with :bang
    elif first_char == ':':
        if len(full_query.getSearchQuery()) == 1:
            # show some example queries
            results.append(":en")
            results.append(":en_us")
            results.append(":english")
            results.append(":united_kingdom")
        else:
            engine_query = full_query.getSearchQuery()[1:]
            for lc in language_codes:
                lang_id, lang_name, country = map(str.lower, lc)
                # check if query starts with language-id
                if lang_id.startswith(engine_query):
                    if len(engine_query) <= 2:
                        results.append(':{lang_id}'.format(lang_id=lang_id.split('_')[0]))
                    else:
                        results.append(':{lang_id}'.format(lang_id=lang_id))
                # check if query starts with language name
                if lang_name.startswith(engine_query):
                    results.append(':{lang_name}'.format(lang_name=lang_name))
                # check if query starts with country
                if country.startswith(engine_query.replace('_', ' ')):
                    results.append(':{country}'.format(country=country.replace(' ', '_')))
    # remove duplicates
    result_set = set(results)
    # remove results which are already contained in the query
    for query_part in full_query.query_parts:
        if query_part in result_set:
            result_set.remove(query_part)
    # convert result_set back to list
    return list(result_set)
 def dbpedia(query):
    # dbpedia autocompleter
    autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'  # noqa
    response = get(autocomplete_url
                   + urlencode(dict(QueryString=query)))
    results = []
    if response.ok:
        dom = etree.fromstring(response.content)
        results = dom.xpath('//a:Result/a:Label//text()',
                            namespaces={'a': 'http://lookup.dbpedia.org/'})
    return results
 def duckduckgo(query):
    # duckduckgo autocompleter
    url = 'https://ac.duckduckgo.com/ac/?{0}&type=list'
    resp = loads(get(url.format(urlencode(dict(q=query)))).text)
    if len(resp) > 1:
        return resp[1]
    return []
 def google(query):
    # google autocompleter
    autocomplete_url = 'http://suggestqueries.google.com/complete/search?client=toolbar&'  # noqa
    response = get(autocomplete_url
                   + urlencode(dict(q=query)))
    results = []
    if response.ok:
        dom = etree.fromstring(response.text)
        results = dom.xpath('//suggestion/@data')
    return results
 def wikipedia(query):
    # wikipedia autocompleter
    url = 'https://en.wikipedia.org/w/api.php?action=opensearch&{0}&limit=10&namespace=0&format=json'  # noqa
    resp = loads(get(url.format(urlencode(dict(search=query)))).text)
    if len(resp) > 1:
        return resp[1]
    return []
 backends = {'dbpedia': dbpedia,
            'duckduckgo': duckduckgo,
            'google': google,
            'wikipedia': wikipedia
            }
--- a/sources/engines/init.py
+++ b/sources/engines/init.py
@ -1,210 +0,0 @@
 '''
 searx is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 searx is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.
 You should have received a copy of the GNU Affero General Public License
 along with searx. If not, see < http://www.gnu.org/licenses/ >.
 (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
 '''
 from os.path import realpath, dirname, splitext, join
 import sys
 from imp import load_source
 from flask.ext.babel import gettext
 from operator import itemgetter
 from searx import settings
 from searx import logger
 logger = logger.getChild('engines')
 engine_dir = dirname(realpath(__file__))
 engines = {}
 categories = {'general': []}
 engine_shortcuts = {}
 def load_module(filename):
    modname = splitext(filename)[0]
    if modname in sys.modules:
        del sys.modules[modname]
    filepath = join(engine_dir, filename)
    module = load_source(modname, filepath)
    module.name = modname
    return module
 def load_engine(engine_data):
    engine_name = engine_data['engine']
    engine = load_module(engine_name + '.py')
    for param_name in engine_data:
        if param_name == 'engine':
            continue
        if param_name == 'categories':
            if engine_data['categories'] == 'none':
                engine.categories = []
            else:
                engine.categories = map(
                    str.strip, engine_data['categories'].split(','))
            continue
        setattr(engine, param_name, engine_data[param_name])
    if not hasattr(engine, 'paging'):
        engine.paging = False
    if not hasattr(engine, 'categories'):
        engine.categories = ['general']
    if not hasattr(engine, 'language_support'):
        engine.language_support = True
    if not hasattr(engine, 'timeout'):
        engine.timeout = settings['server']['request_timeout']
    if not hasattr(engine, 'shortcut'):
        engine.shortcut = ''
    if not hasattr(engine, 'disabled'):
        engine.disabled = False
    # checking required variables
    for engine_attr in dir(engine):
        if engine_attr.startswith('_'):
            continue
        if getattr(engine, engine_attr) is None:
            logger.error('Missing engine config attribute: "{0}.{1}"'
                  .format(engine.name, engine_attr))
            sys.exit(1)
    engine.stats = {
        'result_count': 0,
        'search_count': 0,
        'page_load_time': 0,
        'score_count': 0,
        'errors': 0
    }
    if hasattr(engine, 'categories'):
        for category_name in engine.categories:
            categories.setdefault(category_name, []).append(engine)
    else:
        categories['general'].append(engine)
    if engine.shortcut:
        if engine.shortcut in engine_shortcuts:
            logger.error('Engine config error: ambigious shortcut: {0}'
                  .format(engine.shortcut))
            sys.exit(1)
        engine_shortcuts[engine.shortcut] = engine.name
    return engine
 def get_engines_stats():
    # TODO refactor
    pageloads = []
    results = []
    scores = []
    errors = []
    scores_per_result = []
    max_pageload = max_results = max_score = max_errors = max_score_per_result = 0  # noqa
    for engine in engines.values():
        if engine.stats['search_count'] == 0:
            continue
        results_num = \
            engine.stats['result_count'] / float(engine.stats['search_count'])
        load_times = engine.stats['page_load_time'] / float(engine.stats['search_count'])  # noqa
        if results_num:
            score = engine.stats['score_count'] / float(engine.stats['search_count'])  # noqa
            score_per_result = score / results_num
        else:
            score = score_per_result = 0.0
        max_results = max(results_num, max_results)
        max_pageload = max(load_times, max_pageload)
        max_score = max(score, max_score)
        max_score_per_result = max(score_per_result, max_score_per_result)
        max_errors = max(max_errors, engine.stats['errors'])
        pageloads.append({'avg': load_times, 'name': engine.name})
        results.append({'avg': results_num, 'name': engine.name})
        scores.append({'avg': score, 'name': engine.name})
        errors.append({'avg': engine.stats['errors'], 'name': engine.name})
        scores_per_result.append({
            'avg': score_per_result,
            'name': engine.name
        })
    for engine in pageloads:
        if max_pageload:
            engine['percentage'] = int(engine['avg'] / max_pageload * 100)
        else:
            engine['percentage'] = 0
    for engine in results:
        if max_results:
            engine['percentage'] = int(engine['avg'] / max_results * 100)
        else:
            engine['percentage'] = 0
    for engine in scores:
        if max_score:
            engine['percentage'] = int(engine['avg'] / max_score * 100)
        else:
            engine['percentage'] = 0
    for engine in scores_per_result:
        if max_score_per_result:
            engine['percentage'] = int(engine['avg']
                                       / max_score_per_result * 100)
        else:
            engine['percentage'] = 0
    for engine in errors:
        if max_errors:
            engine['percentage'] = int(float(engine['avg']) / max_errors * 100)
        else:
            engine['percentage'] = 0
    return [
        (
            gettext('Page loads (sec)'),
            sorted(pageloads, key=itemgetter('avg'))
        ),
        (
            gettext('Number of results'),
            sorted(results, key=itemgetter('avg'), reverse=True)
        ),
        (
            gettext('Scores'),
            sorted(scores, key=itemgetter('avg'), reverse=True)
        ),
        (
            gettext('Scores per result'),
            sorted(scores_per_result, key=itemgetter('avg'), reverse=True)
        ),
        (
            gettext('Errors'),
            sorted(errors, key=itemgetter('avg'), reverse=True)
        ),
    ]
 if 'engines' not in settings or not settings['engines']:
    logger.error('No engines found. Edit your settings.yml')
    exit(2)
 for engine_data in settings['engines']:
    engine = load_engine(engine_data)
    engines[engine.name] = engine
--- a/sources/engines/bing.py
+++ b/sources/engines/bing.py
@ -1,84 +0,0 @@
 ## Bing (Web)
 #
 # @website     https://www.bing.com
 # @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
 #              max. 5000 query/month
 #
 # @using-api   no (because of query limit)
 # @results     HTML (using search portal)
 # @stable      no (HTML can change)
 # @parse       url, title, content
 #
 # @todo        publishedDate
 from urllib import urlencode
 from cgi import escape
 from lxml import html
 from searx.engines.xpath import extract_text
 # engine dependent config
 categories = ['general']
 paging = True
 language_support = True
 # search-url
 base_url = 'https://www.bing.com/'
 search_string = 'search?{query}&first={offset}'
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 10 + 1
    if params['language'] == 'all':
        language = 'en-US'
    else:
        language = params['language'].replace('_', '-')
    search_path = search_string.format(
        query=urlencode({'q': query, 'setmkt': language}),
        offset=offset)
    params['cookies']['SRCHHPGUSR'] = \
        'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0]
    params['url'] = base_url + search_path
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.content)
    # parse results
    for result in dom.xpath('//div[@class="sa_cc"]'):
        link = result.xpath('.//h3/a')[0]
        url = link.attrib.get('href')
        title = extract_text(link)
        content = escape(extract_text(result.xpath('.//p')))
        # append result
        results.append({'url': url,
                        'title': title,
                        'content': content})
    # return results if something is found
    if results:
        return results
    # parse results again if nothing is found yet
    for result in dom.xpath('//li[@class="b_algo"]'):
        link = result.xpath('.//h2/a')[0]
        url = link.attrib.get('href')
        title = extract_text(link)
        content = escape(extract_text(result.xpath('.//p')))
        # append result
        results.append({'url': url,
                        'title': title,
                        'content': content})
    # return results
    return results
--- a/sources/engines/bing_images.py
+++ b/sources/engines/bing_images.py
@ -1,96 +0,0 @@
 ## Bing (Images)
 #
 # @website     https://www.bing.com/images
 # @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
 #              max. 5000 query/month
 #
 # @using-api   no (because of query limit)
 # @results     HTML (using search portal)
 # @stable      no (HTML can change)
 # @parse       url, title, img_src
 #
 # @todo        currently there are up to 35 images receive per page,
 #              because bing does not parse count=10.
 #              limited response to 10 images
 from urllib import urlencode
 from lxml import html
 from yaml import load
 import re
 # engine dependent config
 categories = ['images']
 paging = True
 safesearch = True
 # search-url
 base_url = 'https://www.bing.com/'
 search_string = 'images/search?{query}&count=10&first={offset}'
 thumb_url = "http://ts1.mm.bing.net/th?id={ihk}"
 # safesearch definitions
 safesearch_types = {2: 'STRICT',
                    1: 'DEMOTE',
                    0: 'OFF'}
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 10 + 1
    # required for cookie
    if params['language'] == 'all':
        language = 'en-US'
    else:
        language = params['language'].replace('_', '-')
    search_path = search_string.format(
        query=urlencode({'q': query}),
        offset=offset)
    params['cookies']['SRCHHPGUSR'] = \
        'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0] +\
        '&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
    params['url'] = base_url + search_path
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.content)
    # init regex for yaml-parsing
    p = re.compile('({|,)([a-z]+):(")')
    # parse results
    for result in dom.xpath('//div[@class="dg_u"]'):
        link = result.xpath('./a')[0]
        # parse yaml-data (it is required to add a space, to make it parsable)
        yaml_data = load(p.sub(r'\1\2: \3', link.attrib.get('m')))
        title = link.attrib.get('t1')
        ihk = link.attrib.get('ihk')
        #url = 'http://' + link.attrib.get('t3')
        url = yaml_data.get('surl')
        img_src = yaml_data.get('imgurl')
        # append result
        results.append({'template': 'images.html',
                        'url': url,
                        'title': title,
                        'content': '',
                        'thumbnail_src': thumb_url.format(ihk=ihk),
                        'img_src': img_src})
        # TODO stop parsing if 10 images are found
        if len(results) >= 10:
            break
    # return results
    return results
--- a/sources/engines/bing_news.py
+++ b/sources/engines/bing_news.py
@ -1,98 +0,0 @@
 ## Bing (News)
 #
 # @website     https://www.bing.com/news
 # @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
 #              max. 5000 query/month
 #
 # @using-api   no (because of query limit)
 # @results     HTML (using search portal)
 # @stable      no (HTML can change)
 # @parse       url, title, content, publishedDate
 from urllib import urlencode
 from cgi import escape
 from lxml import html
 from datetime import datetime, timedelta
 from dateutil import parser
 import re
 from searx.engines.xpath import extract_text
 # engine dependent config
 categories = ['news']
 paging = True
 language_support = True
 # search-url
 base_url = 'https://www.bing.com/'
 search_string = 'news/search?{query}&first={offset}'
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 10 + 1
    if params['language'] == 'all':
        language = 'en-US'
    else:
        language = params['language'].replace('_', '-')
    search_path = search_string.format(
        query=urlencode({'q': query, 'setmkt': language}),
        offset=offset)
    params['cookies']['_FP'] = "ui=en-US"
    params['url'] = base_url + search_path
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.content)
    # parse results
    for result in dom.xpath('//div[@class="sn_r"]'):
        link = result.xpath('.//div[@class="newstitle"]/a')[0]
        url = link.attrib.get('href')
        title = extract_text(link)
        contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]')
        content = escape(extract_text(contentXPath))
        # parse publishedDate
        publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div'
                                          '//span[contains(@class,"sn_ST")]'
                                          '//span[contains(@class,"sn_tm")]')
        publishedDate = escape(extract_text(publishedDateXPath))
        if re.match("^[0-9]+ minute(s|) ago$", publishedDate):
            timeNumbers = re.findall(r'\d+', publishedDate)
            publishedDate = datetime.now() - timedelta(minutes=int(timeNumbers[0]))
        elif re.match("^[0-9]+ hour(s|) ago$", publishedDate):
            timeNumbers = re.findall(r'\d+', publishedDate)
            publishedDate = datetime.now() - timedelta(hours=int(timeNumbers[0]))
        elif re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$", publishedDate):
            timeNumbers = re.findall(r'\d+', publishedDate)
            publishedDate = datetime.now()\
                - timedelta(hours=int(timeNumbers[0]))\
                - timedelta(minutes=int(timeNumbers[1]))
        elif re.match("^[0-9]+ day(s|) ago$", publishedDate):
            timeNumbers = re.findall(r'\d+', publishedDate)
            publishedDate = datetime.now() - timedelta(days=int(timeNumbers[0]))
        else:
            try:
                publishedDate = parser.parse(publishedDate, dayfirst=False)
            except TypeError:
                publishedDate = datetime.now()
        # append result
        results.append({'url': url,
                        'title': title,
                        'publishedDate': publishedDate,
                        'content': content})
    # return results
    return results
--- a/sources/engines/btdigg.py
+++ b/sources/engines/btdigg.py
@ -1,104 +0,0 @@
 ## BTDigg (Videos, Music, Files)
 #
 # @website     https://btdigg.org
 # @provide-api yes (on demand)
 #
 # @using-api   no
 # @results     HTML (using search portal)
 # @stable      no (HTML can change)
 # @parse       url, title, content, seed, leech, magnetlink
 from urlparse import urljoin
 from cgi import escape
 from urllib import quote
 from lxml import html
 from operator import itemgetter
 from searx.engines.xpath import extract_text
 # engine dependent config
 categories = ['videos', 'music', 'files']
 paging = True
 # search-url
 url = 'https://btdigg.org'
 search_url = url + '/search?q={search_term}&p={pageno}'
 # do search-request
 def request(query, params):
    params['url'] = search_url.format(search_term=quote(query),
                                      pageno=params['pageno']-1)
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    search_res = dom.xpath('//div[@id="search_res"]/table/tr')
    # return empty array if nothing is found
    if not search_res:
        return []
    # parse results
    for result in search_res:
        link = result.xpath('.//td[@class="torrent_name"]//a')[0]
        href = urljoin(url, link.attrib.get('href'))
        title = escape(extract_text(link))
        content = escape(extract_text(result.xpath('.//pre[@class="snippet"]')[0]))
        content = "<br />".join(content.split("\n"))
        filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0]
        filesize_multiplier = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[1]
        files = result.xpath('.//span[@class="attr_val"]/text()')[1]
        seed = result.xpath('.//span[@class="attr_val"]/text()')[2]
        # convert seed to int if possible
        if seed.isdigit():
            seed = int(seed)
        else:
            seed = 0
        leech = 0
        # convert filesize to byte if possible
        try:
            filesize = float(filesize)
            # convert filesize to byte
            if filesize_multiplier == 'TB':
                filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
            elif filesize_multiplier == 'GB':
                filesize = int(filesize * 1024 * 1024 * 1024)
            elif filesize_multiplier == 'MB':
                filesize = int(filesize * 1024 * 1024)
            elif filesize_multiplier == 'KB':
                filesize = int(filesize * 1024)
        except:
            filesize = None
        # convert files to int if possible
        if files.isdigit():
            files = int(files)
        else:
            files = None
        magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href']
        # append result
        results.append({'url': href,
                        'title': title,
                        'content': content,
                        'seed': seed,
                        'leech': leech,
                        'filesize': filesize,
                        'files': files,
                        'magnetlink': magnetlink,
                        'template': 'torrent.html'})
    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)
--- a/sources/engines/currency_convert.py
+++ b/sources/engines/currency_convert.py
@ -1,57 +0,0 @@
 from datetime import datetime
 import re
 categories = []
 url = 'http://finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
 weight = 100
 parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I)  # noqa
 def request(query, params):
    m = parser_re.match(query)
    if not m:
        # wrong query
        return params
    ammount, from_currency, to_currency = m.groups()
    ammount = float(ammount)
    q = (from_currency + to_currency).upper()
    params['url'] = url.format(query=q)
    params['ammount'] = ammount
    params['from'] = from_currency
    params['to'] = to_currency
    return params
 def response(resp):
    results = []
    try:
        _, conversion_rate, _ = resp.text.split(',', 2)
        conversion_rate = float(conversion_rate)
    except:
        return results
    answer = '{0} {1} = {2} {3} (1 {1} = {4} {3})'.format(
        resp.search_params['ammount'],
        resp.search_params['from'],
        resp.search_params['ammount'] * conversion_rate,
        resp.search_params['to'],
        conversion_rate
    )
    now_date = datetime.now().strftime('%Y%m%d')
    url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html'  # noqa
    url = url.format(
        now_date,
        resp.search_params['ammount'],
        resp.search_params['from'].lower(),
        resp.search_params['to'].lower()
    )
    results.append({'answer': answer, 'url': url})
    return results
--- a/sources/engines/dailymotion.py
+++ b/sources/engines/dailymotion.py
@ -1,72 +0,0 @@
 ## Dailymotion (Videos)
 #
 # @website     https://www.dailymotion.com
 # @provide-api yes (http://www.dailymotion.com/developer)
 #
 # @using-api   yes
 # @results     JSON
 # @stable      yes
 # @parse       url, title, thumbnail, publishedDate, embedded
 #
 # @todo        set content-parameter with correct data
 from urllib import urlencode
 from json import loads
 from cgi import escape
 from datetime import datetime
 # engine dependent config
 categories = ['videos']
 paging = True
 language_support = True
 # search-url
 # see http://www.dailymotion.com/doc/api/obj-video.html
 search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,description,duration,url,thumbnail_360_url,id&sort=relevance&limit=5&page={pageno}&{query}'  # noqa
 embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\
    'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>'
 # do search-request
 def request(query, params):
    if params['language'] == 'all':
        locale = 'en-US'
    else:
        locale = params['language']
    params['url'] = search_url.format(
        query=urlencode({'search': query, 'localization': locale}),
        pageno=params['pageno'])
    return params
 # get response from search-request
 def response(resp):
    results = []
    search_res = loads(resp.text)
    # return empty array if there are no results
    if not 'list' in search_res:
        return []
    # parse results
    for res in search_res['list']:
        title = res['title']
        url = res['url']
        content = escape(res['description'])
        thumbnail = res['thumbnail_360_url']
        publishedDate = datetime.fromtimestamp(res['created_time'], None)
        embedded = embedded_url.format(videoid=res['id'])
        results.append({'template': 'videos.html',
                        'url': url,
                        'title': title,
                        'content': content,
                        'publishedDate': publishedDate,
                        'embedded': embedded,
                        'thumbnail': thumbnail})
    # return results
    return results
--- a/sources/engines/deezer.py
+++ b/sources/engines/deezer.py
@ -1,61 +0,0 @@
 ## Deezer (Music)
 #
 # @website     https://deezer.com
 # @provide-api yes (http://developers.deezer.com/api/)
 #
 # @using-api   yes
 # @results     JSON
 # @stable      yes
 # @parse       url, title, content, embedded
 from json import loads
 from urllib import urlencode
 # engine dependent config
 categories = ['music']
 paging = True
 # search-url
 url = 'http://api.deezer.com/'
 search_url = url + 'search?{query}&index={offset}'
 embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' +\
    'data-src="http://www.deezer.com/plugins/player?type=tracks&id={audioid}" ' +\
    'width="540" height="80"></iframe>'
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 25
    params['url'] = search_url.format(query=urlencode({'q': query}),
                                      offset=offset)
    return params
 # get response from search-request
 def response(resp):
    results = []
    search_res = loads(resp.text)
    # parse results
    for result in search_res.get('data', []):
        if result['type'] == 'track':
            title = result['title']
            url = result['link']
            content = result['artist']['name'] +\
                " &bull; " +\
                result['album']['title'] +\
                " &bull; " + result['title']
            embedded = embedded_url.format(audioid=result['id'])
            # append result
            results.append({'url': url,
                            'title': title,
                            'embedded': embedded,
                            'content': content})
    # return results
    return results
--- a/sources/engines/deviantart.py
+++ b/sources/engines/deviantart.py
@ -1,67 +0,0 @@
 ## Deviantart (Images)
 #
 # @website     https://www.deviantart.com/
 # @provide-api yes (https://www.deviantart.com/developers/) (RSS)
 #
 # @using-api   no (TODO, rewrite to api)
 # @results     HTML
 # @stable      no (HTML can change)
 # @parse       url, title, thumbnail_src, img_src
 #
 # @todo        rewrite to api
 from urllib import urlencode
 from urlparse import urljoin
 from lxml import html
 import re
 from searx.engines.xpath import extract_text
 # engine dependent config
 categories = ['images']
 paging = True
 # search-url
 base_url = 'https://www.deviantart.com/'
 search_url = base_url+'search?offset={offset}&{query}'
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 24
    params['url'] = search_url.format(offset=offset,
                                      query=urlencode({'q': query}))
    return params
 # get response from search-request
 def response(resp):
    results = []
    # return empty array if a redirection code is returned
    if resp.status_code == 302:
        return []
    dom = html.fromstring(resp.text)
    regex = re.compile('\/200H\/')
    # parse results
    for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'):
        link = result.xpath('.//a[contains(@class, "thumb")]')[0]
        url = urljoin(base_url, link.attrib.get('href'))
        title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]')
        title = extract_text(title_links[0])
        thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
        img_src = regex.sub('/', thumbnail_src)
        # append result
        results.append({'url': url,
                        'title': title,
                        'img_src': img_src,
                        'thumbnail_src': thumbnail_src,
                        'template': 'images.html'})
    # return results
    return results
--- a/sources/engines/digg.py
+++ b/sources/engines/digg.py
@ -1,70 +0,0 @@
 ## Digg (News, Social media)
 #
 # @website     https://digg.com/
 # @provide-api no
 #
 # @using-api   no
 # @results     HTML (using search portal)
 # @stable      no (HTML can change)
 # @parse       url, title, content, publishedDate, thumbnail
 from urllib import quote_plus
 from json import loads
 from lxml import html
 from cgi import escape
 from dateutil import parser
 # engine dependent config
 categories = ['news', 'social media']
 paging = True
 # search-url
 base_url = 'https://digg.com/'
 search_url = base_url+'api/search/{query}.json?position={position}&format=html'
 # specific xpath variables
 results_xpath = '//article'
 link_xpath = './/small[@class="time"]//a'
 title_xpath = './/h2//a//text()'
 content_xpath = './/p//text()'
 pubdate_xpath = './/time'
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 10
    params['url'] = search_url.format(position=offset,
                                      query=quote_plus(query))
    return params
 # get response from search-request
 def response(resp):
    results = []
    search_result = loads(resp.text)
    if 'html' not in search_result or search_result['html'] == '':
        return results
    dom = html.fromstring(search_result['html'])
    # parse results
    for result in dom.xpath(results_xpath):
        url = result.attrib.get('data-contenturl')
        thumbnail = result.xpath('.//img')[0].attrib.get('src')
        title = ''.join(result.xpath(title_xpath))
        content = escape(''.join(result.xpath(content_xpath)))
        pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
        publishedDate = parser.parse(pubdate)
        # append result
        results.append({'url': url,
                        'title': title,
                        'content': content,
                        'template': 'videos.html',
                        'publishedDate': publishedDate,
                        'thumbnail': thumbnail})
    # return results
    return results
--- a/sources/engines/duckduckgo.py
+++ b/sources/engines/duckduckgo.py
@ -1,76 +0,0 @@
 ## DuckDuckGo (Web)
 #
 # @website     https://duckduckgo.com/
 # @provide-api yes (https://duckduckgo.com/api),
 #              but not all results from search-site
 #
 # @using-api   no
 # @results     HTML (using search portal)
 # @stable      no (HTML can change)
 # @parse       url, title, content
 #
 # @todo        rewrite to api
 # @todo        language support
 #              (the current used site does not support language-change)
 from urllib import urlencode
 from lxml.html import fromstring
 from searx.engines.xpath import extract_text
 # engine dependent config
 categories = ['general']
 paging = True
 language_support = True
 # search-url
 url = 'https://duckduckgo.com/html?{query}&s={offset}'
 # specific xpath variables
 result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa
 url_xpath = './/a[@class="large"]/@href'
 title_xpath = './/a[@class="large"]'
 content_xpath = './/div[@class="snippet"]'
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 30
    if params['language'] == 'all':
        locale = 'en-us'
    else:
        locale = params['language'].replace('_', '-').lower()
    params['url'] = url.format(
        query=urlencode({'q': query, 'kl': locale}),
        offset=offset)
    return params
 # get response from search-request
 def response(resp):
    results = []
    doc = fromstring(resp.text)
    # parse results
    for r in doc.xpath(result_xpath):
        try:
            res_url = r.xpath(url_xpath)[-1]
        except:
            continue
        if not res_url:
            continue
        title = extract_text(r.xpath(title_xpath))
        content = extract_text(r.xpath(content_xpath))
        # append result
        results.append({'title': title,
                        'content': content,
                        'url': res_url})
    # return results
    return results
--- a/sources/engines/duckduckgo_definitions.py
+++ b/sources/engines/duckduckgo_definitions.py
@ -1,149 +0,0 @@
 import json
 from urllib import urlencode
 from lxml import html
 from searx.utils import html_to_text
 from searx.engines.xpath import extract_text
 url = 'https://api.duckduckgo.com/'\
    + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
 def result_to_text(url, text, htmlResult):
    # TODO : remove result ending with "Meaning" or "Category"
    dom = html.fromstring(htmlResult)
    a = dom.xpath('//a')
    if len(a) >= 1:
        return extract_text(a[0])
    else:
        return text
 def request(query, params):
    # TODO add kl={locale}
    params['url'] = url.format(query=urlencode({'q': query}))
    return params
 def response(resp):
    results = []
    search_res = json.loads(resp.text)
    content = ''
    heading = search_res.get('Heading', '')
    attributes = []
    urls = []
    infobox_id = None
    relatedTopics = []
    # add answer if there is one
    answer = search_res.get('Answer', '')
    if answer != '':
        results.append({'answer': html_to_text(answer)})
    # add infobox
    if 'Definition' in search_res:
        content = content + search_res.get('Definition', '')
    if 'Abstract' in search_res:
        content = content + search_res.get('Abstract', '')
    # image
    image = search_res.get('Image', '')
    image = None if image == '' else image
    # attributes
    if 'Infobox' in search_res:
        infobox = search_res.get('Infobox', None)
        if 'content' in infobox:
            for info in infobox.get('content'):
                attributes.append({'label': info.get('label'),
                                  'value': info.get('value')})
    # urls
    for ddg_result in search_res.get('Results', []):
        if 'FirstURL' in ddg_result:
            firstURL = ddg_result.get('FirstURL', '')
            text = ddg_result.get('Text', '')
            urls.append({'title': text, 'url': firstURL})
            results.append({'title': heading, 'url': firstURL})
    # related topics
    for ddg_result in search_res.get('RelatedTopics', []):
        if 'FirstURL' in ddg_result:
            suggestion = result_to_text(ddg_result.get('FirstURL', None),
                                        ddg_result.get('Text', None),
                                        ddg_result.get('Result', None))
            if suggestion != heading:
                results.append({'suggestion': suggestion})
        elif 'Topics' in ddg_result:
            suggestions = []
            relatedTopics.append({'name': ddg_result.get('Name', ''),
                                 'suggestions': suggestions})
            for topic_result in ddg_result.get('Topics', []):
                suggestion = result_to_text(topic_result.get('FirstURL', None),
                                            topic_result.get('Text', None),
                                            topic_result.get('Result', None))
                if suggestion != heading:
                    suggestions.append(suggestion)
    # abstract
    abstractURL = search_res.get('AbstractURL', '')
    if abstractURL != '':
        # add as result ? problem always in english
        infobox_id = abstractURL
        urls.append({'title': search_res.get('AbstractSource'),
                    'url': abstractURL})
    # definition
    definitionURL = search_res.get('DefinitionURL', '')
    if definitionURL != '':
        # add as result ? as answer ? problem always in english
        infobox_id = definitionURL
        urls.append({'title': search_res.get('DefinitionSource'),
                    'url': definitionURL})
    # entity
    entity = search_res.get('Entity', None)
    # TODO continent / country / department / location / waterfall /
    #      mountain range :
    #      link to map search, get weather, near by locations
    # TODO musician : link to music search
    # TODO concert tour : ??
    # TODO film / actor / television  / media franchise :
    #      links to IMDB / rottentomatoes (or scrap result)
    # TODO music : link tu musicbrainz / last.fm
    # TODO book : ??
    # TODO artist / playwright : ??
    # TODO compagny : ??
    # TODO software / os : ??
    # TODO software engineer : ??
    # TODO prepared food : ??
    # TODO website : ??
    # TODO performing art : ??
    # TODO prepared food : ??
    # TODO programming language : ??
    # TODO file format : ??
    if len(heading) > 0:
        # TODO get infobox.meta.value where .label='article_title'
        if image is None and len(attributes) == 0 and len(urls) == 1 and\
           len(relatedTopics) == 0 and len(content) == 0:
            results.append({
                           'url': urls[0]['url'],
                           'title': heading,
                           'content': content
                           })
        else:
            results.append({
                           'infobox': heading,
                           'id': infobox_id,
                           'entity': entity,
                           'content': content,
                           'img_src': image,
                           'attributes': attributes,
                           'urls': urls,
                           'relatedTopics': relatedTopics
                           })
    return results
--- a/sources/engines/dummy.py
+++ b/sources/engines/dummy.py
@ -1,14 +0,0 @@
 ## Dummy
 #
 # @results     empty array
 # @stable      yes
 # do search-request
 def request(query, params):
    return params
 # get response from search-request
 def response(resp):
    return []
--- a/sources/engines/faroo.py
+++ b/sources/engines/faroo.py
@ -1,114 +0,0 @@
 ## Faroo (Web, News)
 #
 # @website     http://www.faroo.com
 # @provide-api yes (http://www.faroo.com/hp/api/api.html), require API-key
 #
 # @using-api   yes
 # @results     JSON
 # @stable      yes
 # @parse       url, title, content, publishedDate, img_src
 from urllib import urlencode
 from json import loads
 import datetime
 from searx.utils import searx_useragent
 # engine dependent config
 categories = ['general', 'news']
 paging = True
 language_support = True
 number_of_results = 10
 api_key = None
 # search-url
 url = 'http://www.faroo.com/'
 search_url = url + 'api?{query}'\
                      '&start={offset}'\
                      '&length={number_of_results}'\
                      '&l={language}'\
                      '&src={categorie}'\
                      '&i=false'\
                      '&f=json'\
                      '&key={api_key}'  # noqa
 search_category = {'general': 'web',
                   'news': 'news'}
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * number_of_results + 1
    categorie = search_category.get(params['category'], 'web')
    if params['language'] == 'all':
        language = 'en'
    else:
        language = params['language'].split('_')[0]
    # if language is not supported, put it in english
    if language != 'en' and\
       language != 'de' and\
       language != 'zh':
        language = 'en'
    params['url'] = search_url.format(offset=offset,
                                      number_of_results=number_of_results,
                                      query=urlencode({'q': query}),
                                      language=language,
                                      categorie=categorie,
                                      api_key=api_key)
    # using searx User-Agent
    params['headers']['User-Agent'] = searx_useragent()
    return params
 # get response from search-request
 def response(resp):
    # HTTP-Code 401: api-key is not valide
    if resp.status_code == 401:
        raise Exception("API key is not valide")
    # HTTP-Code 429: rate limit exceeded
    if resp.status_code == 429:
        raise Exception("rate limit has been exceeded!")
    results = []
    search_res = loads(resp.text)
    # return empty array if there are no results
    if not search_res.get('results', {}):
        return []
    # parse results
    for result in search_res['results']:
        if result['news']:
            # timestamp (milliseconds since 1970)
            publishedDate = datetime.datetime.fromtimestamp(result['date']/1000.0)  # noqa
            # append news result
            results.append({'url': result['url'],
                            'title': result['title'],
                            'publishedDate': publishedDate,
                            'content': result['kwic']})
        else:
            # append general result
            # TODO, publishedDate correct?
            results.append({'url': result['url'],
                            'title': result['title'],
                            'content': result['kwic']})
        # append image result if image url is set
        # TODO, show results with an image like in faroo
        if result['iurl']:
            results.append({'template': 'images.html',
                            'url': result['url'],
                            'title': result['title'],
                            'content': result['kwic'],
                            'img_src': result['iurl']})
    # return results
    return results
--- a/sources/engines/filecrop.py
+++ b/sources/engines/filecrop.py
@ -1,84 +0,0 @@
 from urllib import urlencode
 from HTMLParser import HTMLParser
 url = 'http://www.filecrop.com/'
 search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}'  # noqa
 paging = True
 class FilecropResultParser(HTMLParser):
    def __init__(self):
        HTMLParser.__init__(self)
        self.__start_processing = False
        self.results = []
        self.result = {}
        self.tr_counter = 0
        self.data_counter = 0
    def handle_starttag(self, tag, attrs):
        if tag == 'tr':
            if ('bgcolor', '#edeff5') in attrs or\
               ('bgcolor', '#ffffff') in attrs:
                self.__start_processing = True
        if not self.__start_processing:
            return
        if tag == 'label':
            self.result['title'] = [attr[1] for attr in attrs
                                    if attr[0] == 'title'][0]
        elif tag == 'a' and ('rel', 'nofollow') in attrs\
                and ('class', 'sourcelink') in attrs:
            if 'content' in self.result:
                self.result['content'] += [attr[1] for attr in attrs
                                           if attr[0] == 'title'][0]
            else:
                self.result['content'] = [attr[1] for attr in attrs
                                          if attr[0] == 'title'][0]
            self.result['content'] += ' '
        elif tag == 'a':
            self.result['url'] = url + [attr[1] for attr in attrs
                                        if attr[0] == 'href'][0]
    def handle_endtag(self, tag):
        if self.__start_processing is False:
            return
        if tag == 'tr':
            self.tr_counter += 1
            if self.tr_counter == 2:
                self.__start_processing = False
                self.tr_counter = 0
                self.data_counter = 0
                self.results.append(self.result)
                self.result = {}
    def handle_data(self, data):
        if not self.__start_processing:
            return
        if 'content' in self.result:
            self.result['content'] += data + ' '
        else:
            self.result['content'] = data + ' '
        self.data_counter += 1
 def request(query, params):
    index = 1 + (params['pageno'] - 1) * 30
    params['url'] = search_url.format(query=urlencode({'w': query}),
                                      index=index)
    return params
 def response(resp):
    parser = FilecropResultParser()
    parser.feed(resp.text)
    return parser.results
--- a/sources/engines/flickr.py
+++ b/sources/engines/flickr.py
@ -1,96 +0,0 @@
 #!/usr/bin/env python
 ## Flickr (Images)
 #
 # @website     https://www.flickr.com
 # @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
 #
 # @using-api   yes
 # @results     JSON
 # @stable      yes
 # @parse       url, title, thumbnail, img_src
 #More info on api-key : https://www.flickr.com/services/apps/create/
 from urllib import urlencode
 from json import loads
 categories = ['images']
 nb_per_page = 15
 paging = True
 api_key = None
 url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search' +\
      '&api_key={api_key}&{text}&sort=relevance' +\
      '&extras=description%2C+owner_name%2C+url_o%2C+url_n%2C+url_z' +\
      '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}'
 photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
 paging = True
 def build_flickr_url(user_id, photo_id):
    return photo_url.format(userid=user_id, photoid=photo_id)
 def request(query, params):
    params['url'] = url.format(text=urlencode({'text': query}),
                               api_key=api_key,
                               nb_per_page=nb_per_page,
                               page=params['pageno'])
    return params
 def response(resp):
    results = []
    search_results = loads(resp.text)
    # return empty array if there are no results
    if not 'photos' in search_results:
        return []
    if not 'photo' in search_results['photos']:
        return []
    photos = search_results['photos']['photo']
    # parse results
    for photo in photos:
        if 'url_o' in photo:
            img_src = photo['url_o']
        elif 'url_z' in photo:
            img_src = photo['url_z']
        else:
            continue
 # For a bigger thumbnail, keep only the url_z, not the url_n
        if 'url_n' in photo:
            thumbnail_src = photo['url_n']
        elif 'url_z' in photo:
            thumbnail_src = photo['url_z']
        else:
            thumbnail_src = img_src
        url = build_flickr_url(photo['owner'], photo['id'])
        title = photo['title']
        content = '<span class="photo-author">' +\
                  photo['ownername'] +\
                  '</span><br />' +\
                  '<span class="description">' +\
                  photo['description']['_content'] +\
                  '</span>'
        # append result
        results.append({'url': url,
                        'title': title,
                        'img_src': img_src,
                        'thumbnail_src': thumbnail_src,
                        'content': content,
                        'template': 'images.html'})
    # return results
    return results
--- a/sources/engines/flickr_noapi.py
+++ b/sources/engines/flickr_noapi.py
@ -1,109 +0,0 @@
 #!/usr/bin/env python
 #  Flickr (Images)
 #
 # @website     https://www.flickr.com
 # @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
 #
 # @using-api   no
 # @results     HTML
 # @stable      no
 # @parse       url, title, thumbnail, img_src
 from urllib import urlencode
 from json import loads
 import re
 from searx.engines import logger
 logger = logger.getChild('flickr-noapi')
 categories = ['images']
 url = 'https://secure.flickr.com/'
 search_url = url + 'search/?{query}&page={page}'
 photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
 regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL)
 image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
 paging = True
 def build_flickr_url(user_id, photo_id):
    return photo_url.format(userid=user_id, photoid=photo_id)
 def request(query, params):
    params['url'] = search_url.format(query=urlencode({'text': query}),
                                      page=params['pageno'])
    return params
 def response(resp):
    results = []
    matches = regex.search(resp.text)
    if matches is None:
        return results
    match = matches.group(1)
    search_results = loads(match)
    if '_data' not in search_results:
        return []
    photos = search_results['_data']
    for photo in photos:
        # In paged configuration, the first pages' photos
        # are represented by a None object
        if photo is None:
            continue
        img_src = None
        # From the biggest to the lowest format
        for image_size in image_sizes:
            if image_size in photo['sizes']:
                img_src = photo['sizes'][image_size]['url']
                break
        if not img_src:
            logger.debug('cannot find valid image size: {0}'.format(repr(photo)))
            continue
        if 'id' not in photo['owner']:
            continue
 # For a bigger thumbnail, keep only the url_z, not the url_n
        if 'n' in photo['sizes']:
            thumbnail_src = photo['sizes']['n']['url']
        elif 'z' in photo['sizes']:
            thumbnail_src = photo['sizes']['z']['url']
        else:
            thumbnail_src = img_src
        url = build_flickr_url(photo['owner']['id'], photo['id'])
        title = photo.get('title', '')
        content = '<span class="photo-author">' +\
                  photo['owner']['username'] +\
                  '</span><br />'
        if 'description' in photo:
            content = content +\
                '<span class="description">' +\
                photo['description'] +\
                '</span>'
        # append result
        results.append({'url': url,
                        'title': title,
                        'img_src': img_src,
                        'thumbnail_src': thumbnail_src,
                        'content': content,
                        'template': 'images.html'})
    return results
--- a/sources/engines/generalfile.py
+++ b/sources/engines/generalfile.py
@ -1,60 +0,0 @@
 ## General Files (Files)
 #
 # @website     http://www.general-files.org
 # @provide-api no (nothing found)
 #
 # @using-api   no (because nothing found)
 # @results     HTML (using search portal)
 # @stable      no (HTML can change)
 # @parse       url, title, content
 #
 # @todo        detect torrents?
 from lxml import html
 # engine dependent config
 categories = ['files']
 paging = True
 # search-url
 base_url = 'http://www.general-file.com'
 search_url = base_url + '/files-{letter}/{query}/{pageno}'
 # specific xpath variables
 result_xpath = '//table[@class="block-file"]'
 title_xpath = './/h2/a//text()'
 url_xpath = './/h2/a/@href'
 content_xpath = './/p//text()'
 # do search-request
 def request(query, params):
    params['url'] = search_url.format(query=query,
                                      letter=query[0],
                                      pageno=params['pageno'])
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    # parse results
    for result in dom.xpath(result_xpath):
        url = result.xpath(url_xpath)[0]
        # skip fast download links
        if not url.startswith('/'):
            continue
        # append result
        results.append({'url': base_url + url,
                        'title': ''.join(result.xpath(title_xpath)),
                        'content': ''.join(result.xpath(content_xpath))})
    # return results
    return results
--- a/sources/engines/github.py
+++ b/sources/engines/github.py
@ -1,59 +0,0 @@
 ## Github (It)
 #
 # @website     https://github.com/
 # @provide-api yes (https://developer.github.com/v3/)
 #
 # @using-api   yes
 # @results     JSON
 # @stable      yes (using api)
 # @parse       url, title, content
 from urllib import urlencode
 from json import loads
 from cgi import escape
 # engine dependent config
 categories = ['it']
 # search-url
 search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}'  # noqa
 accept_header = 'application/vnd.github.preview.text-match+json'
 # do search-request
 def request(query, params):
    params['url'] = search_url.format(query=urlencode({'q': query}))
    params['headers']['Accept'] = accept_header
    return params
 # get response from search-request
 def response(resp):
    results = []
    search_res = loads(resp.text)
    # check if items are recieved
    if not 'items' in search_res:
        return []
    # parse results
    for res in search_res['items']:
        title = res['name']
        url = res['html_url']
        if res['description']:
            content = escape(res['description'][:500])
        else:
            content = ''
        # append result
        results.append({'url': url,
                        'title': title,
                        'content': content})
    # return results
    return results
--- a/sources/engines/google.py
+++ b/sources/engines/google.py
@ -1,140 +0,0 @@
 #  Google (Web)
 #
 # @website     https://www.google.com
 # @provide-api yes (https://developers.google.com/custom-search/)
 #
 # @using-api   no
 # @results     HTML
 # @stable      no (HTML can change)
 # @parse       url, title, content, suggestion
 from urllib import urlencode
 from urlparse import urlparse, parse_qsl
 from lxml import html
 from searx.poolrequests import get
 from searx.engines.xpath import extract_text, extract_url
 # engine dependent config
 categories = ['general']
 paging = True
 language_support = True
 # search-url
 google_hostname = 'www.google.com'
 search_path = '/search'
 redirect_path = '/url'
 images_path = '/images'
 search_url = ('https://' +
              google_hostname +
              search_path +
              '?{query}&start={offset}&gbv=1')
 # specific xpath variables
 results_xpath = '//li[@class="g"]'
 url_xpath = './/h3/a/@href'
 title_xpath = './/h3'
 content_xpath = './/span[@class="st"]'
 suggestion_xpath = '//p[@class="_Bmc"]'
 images_xpath = './/div/a'
 image_url_xpath = './@href'
 image_img_src_xpath = './img/@src'
 pref_cookie = ''
 # see https://support.google.com/websearch/answer/873?hl=en
 def get_google_pref_cookie():
    global pref_cookie
    if pref_cookie == '':
        resp = get('https://www.google.com/ncr', allow_redirects=False)
        pref_cookie = resp.cookies["PREF"]
    return pref_cookie
 # remove google-specific tracking-url
 def parse_url(url_string):
    parsed_url = urlparse(url_string)
    if (parsed_url.netloc in [google_hostname, '']
            and parsed_url.path == redirect_path):
        query = dict(parse_qsl(parsed_url.query))
        return query['q']
    else:
        return url_string
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 10
    if params['language'] == 'all':
        language = 'en'
    else:
        language = params['language'].replace('_', '-').lower()
    params['url'] = search_url.format(offset=offset,
                                      query=urlencode({'q': query}))
    params['headers']['Accept-Language'] = language
    params['cookies']['PREF'] = get_google_pref_cookie()
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    # parse results
    for result in dom.xpath(results_xpath):
        title = extract_text(result.xpath(title_xpath)[0])
        try:
            url = parse_url(extract_url(result.xpath(url_xpath), search_url))
            parsed_url = urlparse(url)
            if (parsed_url.netloc == google_hostname
                    and parsed_url.path == search_path):
                # remove the link to google news
                continue
            # images result
            if (parsed_url.netloc == google_hostname
                    and parsed_url.path == images_path):
                # only thumbnail image provided,
                # so skipping image results
                # results = results + parse_images(result)
                pass
            else:
                # normal result
                content = extract_text(result.xpath(content_xpath)[0])
                # append result
                results.append({'url': url,
                                'title': title,
                                'content': content})
        except:
            continue
    # parse suggestion
    for suggestion in dom.xpath(suggestion_xpath):
        # append suggestion
        results.append({'suggestion': extract_text(suggestion)})
    # return results
    return results
 def parse_images(result):
    results = []
    for image in result.xpath(images_xpath):
        url = parse_url(extract_text(image.xpath(image_url_xpath)[0]))
        img_src = extract_text(image.xpath(image_img_src_xpath)[0])
        # append result
        results.append({'url': url,
                        'title': '',
                        'content': '',
                        'img_src': img_src,
                        'template': 'images.html'})
    return results
--- a/sources/engines/google_images.py
+++ b/sources/engines/google_images.py
@ -1,68 +0,0 @@
 ## Google (Images)
 #
 # @website     https://www.google.com
 # @provide-api yes (https://developers.google.com/web-search/docs/),
 #              deprecated!
 #
 # @using-api   yes
 # @results     JSON
 # @stable      yes (but deprecated)
 # @parse       url, title, img_src
 from urllib import urlencode, unquote
 from json import loads
 # engine dependent config
 categories = ['images']
 paging = True
 safesearch = True
 # search-url
 url = 'https://ajax.googleapis.com/'
 search_url = url + 'ajax/services/search/images?v=1.0&start={offset}&rsz=large&safe={safesearch}&filter=off&{query}'
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 8
    if params['safesearch'] == 0:
        safesearch = 'off'
    else:
        safesearch = 'on'
    params['url'] = search_url.format(query=urlencode({'q': query}),
                                      offset=offset,
                                      safesearch=safesearch)
    return params
 # get response from search-request
 def response(resp):
    results = []
    search_res = loads(resp.text)
    # return empty array if there are no results
    if not search_res.get('responseData', {}).get('results'):
        return []
    # parse results
    for result in search_res['responseData']['results']:
        href = result['originalContextUrl']
        title = result['title']
        if 'url' not in result:
            continue
        thumbnail_src = result['tbUrl']
        # append result
        results.append({'url': href,
                        'title': title,
                        'content': result['content'],
                        'thumbnail_src': thumbnail_src,
                        'img_src': unquote(result['url']),
                        'template': 'images.html'})
    # return results
    return results
--- a/sources/engines/google_news.py
+++ b/sources/engines/google_news.py
@ -1,65 +0,0 @@
 ## Google (News)
 #
 # @website     https://www.google.com
 # @provide-api yes (https://developers.google.com/web-search/docs/),
 #              deprecated!
 #
 # @using-api   yes
 # @results     JSON
 # @stable      yes (but deprecated)
 # @parse       url, title, content, publishedDate
 from urllib import urlencode
 from json import loads
 from dateutil import parser
 # search-url
 categories = ['news']
 paging = True
 language_support = True
 # engine dependent config
 url = 'https://ajax.googleapis.com/'
 search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={lang}'
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 8
    language = 'en-US'
    if params['language'] != 'all':
        language = params['language'].replace('_', '-')
    params['url'] = search_url.format(offset=offset,
                                      query=urlencode({'q': query}),
                                      lang=language)
    return params
 # get response from search-request
 def response(resp):
    results = []
    search_res = loads(resp.text)
    # return empty array if there are no results
    if not search_res.get('responseData', {}).get('results'):
        return []
    # parse results
    for result in search_res['responseData']['results']:
        # parse publishedDate
        publishedDate = parser.parse(result['publishedDate'])
        if 'url' not in result:
            continue
        # append result
        results.append({'url': result['unescapedUrl'],
                        'title': result['titleNoFormatting'],
                        'publishedDate': publishedDate,
                        'content': result['content']})
    # return results
    return results
--- a/sources/engines/json_engine.py
+++ b/sources/engines/json_engine.py
@ -1,87 +0,0 @@
 from urllib import urlencode
 from json import loads
 from collections import Iterable
 search_url = None
 url_query = None
 content_query = None
 title_query = None
 #suggestion_xpath = ''
 def iterate(iterable):
    if type(iterable) == dict:
        it = iterable.iteritems()
    else:
        it = enumerate(iterable)
    for index, value in it:
        yield str(index), value
 def is_iterable(obj):
    if type(obj) == str:
        return False
    if type(obj) == unicode:
        return False
    return isinstance(obj, Iterable)
 def parse(query):
    q = []
    for part in query.split('/'):
        if part == '':
            continue
        else:
            q.append(part)
    return q
 def do_query(data, q):
    ret = []
    if not q:
        return ret
    qkey = q[0]
    for key, value in iterate(data):
        if len(q) == 1:
            if key == qkey:
                ret.append(value)
            elif is_iterable(value):
                ret.extend(do_query(value, q))
        else:
            if not is_iterable(value):
                continue
            if key == qkey:
                ret.extend(do_query(value, q[1:]))
            else:
                ret.extend(do_query(value, q))
    return ret
 def query(data, query_string):
    q = parse(query_string)
    return do_query(data, q)
 def request(query, params):
    query = urlencode({'q': query})[2:]
    params['url'] = search_url.format(query=query)
    params['query'] = query
    return params
 def response(resp):
    results = []
    json = loads(resp.text)
    urls = query(json, url_query)
    contents = query(json, content_query)
    titles = query(json, title_query)
    for url, title, content in zip(urls, titles, contents):
        results.append({'url': url, 'title': title, 'content': content})
    return results
--- a/sources/engines/kickass.py
+++ b/sources/engines/kickass.py
@ -1,120 +0,0 @@
 ## Kickass Torrent (Videos, Music, Files)
 #
 # @website     https://kickass.so
 # @provide-api no (nothing found)
 #
 # @using-api   no
 # @results     HTML (using search portal)
 # @stable      yes (HTML can change)
 # @parse       url, title, content, seed, leech, magnetlink
 from urlparse import urljoin
 from cgi import escape
 from urllib import quote
 from lxml import html
 from operator import itemgetter
 from searx.engines.xpath import extract_text
 # engine dependent config
 categories = ['videos', 'music', 'files']
 paging = True
 # search-url
 url = 'https://kickass.to/'
 search_url = url + 'search/{search_term}/{pageno}/'
 # specific xpath variables
 magnet_xpath = './/a[@title="Torrent magnet link"]'
 torrent_xpath = './/a[@title="Download torrent file"]'
 content_xpath = './/span[@class="font11px lightgrey block"]'
 # do search-request
 def request(query, params):
    params['url'] = search_url.format(search_term=quote(query),
                                      pageno=params['pageno'])
    # FIX: SSLError: hostname 'kickass.so'
    # doesn't match either of '*.kickass.to', 'kickass.to'
    params['verify'] = False
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    search_res = dom.xpath('//table[@class="data"]//tr')
    # return empty array if nothing is found
    if not search_res:
        return []
    # parse results
    for result in search_res[1:]:
        link = result.xpath('.//a[@class="cellMainLink"]')[0]
        href = urljoin(url, link.attrib['href'])
        title = extract_text(link)
        content = escape(extract_text(result.xpath(content_xpath)))
        seed = result.xpath('.//td[contains(@class, "green")]/text()')[0]
        leech = result.xpath('.//td[contains(@class, "red")]/text()')[0]
        filesize = result.xpath('.//td[contains(@class, "nobr")]/text()')[0]
        filesize_multiplier = result.xpath('.//td[contains(@class, "nobr")]//span/text()')[0]
        files = result.xpath('.//td[contains(@class, "center")][2]/text()')[0]
        # convert seed to int if possible
        if seed.isdigit():
            seed = int(seed)
        else:
            seed = 0
        # convert leech to int if possible
        if leech.isdigit():
            leech = int(leech)
        else:
            leech = 0
        # convert filesize to byte if possible
        try:
            filesize = float(filesize)
            # convert filesize to byte
            if filesize_multiplier == 'TB':
                filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
            elif filesize_multiplier == 'GB':
                filesize = int(filesize * 1024 * 1024 * 1024)
            elif filesize_multiplier == 'MB':
                filesize = int(filesize * 1024 * 1024)
            elif filesize_multiplier == 'KB':
                filesize = int(filesize * 1024)
        except:
            filesize = None
        # convert files to int if possible
        if files.isdigit():
            files = int(files)
        else:
            files = None
        magnetlink = result.xpath(magnet_xpath)[0].attrib['href']
        torrentfile = result.xpath(torrent_xpath)[0].attrib['href']
        torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*")
        # append result
        results.append({'url': href,
                        'title': title,
                        'content': content,
                        'seed': seed,
                        'leech': leech,
                        'filesize': filesize,
                        'files': files,
                        'magnetlink': magnetlink,
                        'torrentfile': torrentfileurl,
                        'template': 'torrent.html'})
    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)
--- a/sources/engines/mediawiki.py
+++ b/sources/engines/mediawiki.py
@ -1,81 +0,0 @@
 ## general mediawiki-engine (Web)
 #
 # @website     websites built on mediawiki (https://www.mediawiki.org)
 # @provide-api yes (http://www.mediawiki.org/wiki/API:Search)
 #
 # @using-api   yes
 # @results     JSON
 # @stable      yes
 # @parse       url, title
 #
 # @todo        content
 from json import loads
 from string import Formatter
 from urllib import urlencode, quote
 # engine dependent config
 categories = ['general']
 language_support = True
 paging = True
 number_of_results = 1
 # search-url
 base_url = 'https://{language}.wikipedia.org/'
 search_url = base_url + 'w/api.php?action=query'\
                                 '&list=search'\
                                 '&{query}'\
                                 '&srprop=timestamp'\
                                 '&format=json'\
                                 '&sroffset={offset}'\
                                 '&srlimit={limit}'     # noqa
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * number_of_results
    string_args = dict(query=urlencode({'srsearch': query}),
                       offset=offset,
                       limit=number_of_results)
    format_strings = list(Formatter().parse(base_url))
    if params['language'] == 'all':
        language = 'en'
    else:
        language = params['language'].split('_')[0]
    if len(format_strings) > 1:
        string_args['language'] = language
    # write search-language back to params, required in response
    params['language'] = language
    params['url'] = search_url.format(**string_args)
    return params
 # get response from search-request
 def response(resp):
    results = []
    search_results = loads(resp.text)
    # return empty array if there are no results
    if not search_results.get('query', {}).get('search'):
        return []
    # parse results
    for result in search_results['query']['search']:
        url = base_url.format(language=resp.search_params['language']) +\
            'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8'))
        # append result
        results.append({'url': url,
                        'title': result['title'],
                        'content': ''})
    # return results
    return results
--- a/sources/engines/mixcloud.py
+++ b/sources/engines/mixcloud.py
@ -1,59 +0,0 @@
 ## Mixcloud (Music)
 #
 # @website     https://http://www.mixcloud.com/
 # @provide-api yes (http://www.mixcloud.com/developers/
 #
 # @using-api   yes
 # @results     JSON
 # @stable      yes
 # @parse       url, title, content, embedded, publishedDate
 from json import loads
 from urllib import urlencode
 from dateutil import parser
 # engine dependent config
 categories = ['music']
 paging = True
 # search-url
 url = 'http://api.mixcloud.com/'
 search_url = url + 'search/?{query}&type=cloudcast&limit=10&offset={offset}'
 embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' +\
    'data-src="https://www.mixcloud.com/widget/iframe/?feed={url}" width="300" height="300"></iframe>'
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 10
    params['url'] = search_url.format(query=urlencode({'q': query}),
                                      offset=offset)
    return params
 # get response from search-request
 def response(resp):
    results = []
    search_res = loads(resp.text)
    # parse results
    for result in search_res.get('data', []):
        title = result['name']
        url = result['url']
        content = result['user']['name']
        embedded = embedded_url.format(url=url)
        publishedDate = parser.parse(result['created_time'])
        # append result
        results.append({'url': url,
                        'title': title,
                        'embedded': embedded,
                        'publishedDate': publishedDate,
                        'content': content})
    # return results
    return results
--- a/sources/engines/openstreetmap.py
+++ b/sources/engines/openstreetmap.py
@ -1,97 +0,0 @@
 ## OpenStreetMap (Map)
 #
 # @website     https://openstreetmap.org/
 # @provide-api yes (http://wiki.openstreetmap.org/wiki/Nominatim)
 #
 # @using-api   yes
 # @results     JSON
 # @stable      yes
 # @parse       url, title
 from json import loads
 from searx.utils import searx_useragent
 # engine dependent config
 categories = ['map']
 paging = False
 # search-url
 base_url = 'https://nominatim.openstreetmap.org/'
 search_string = 'search/{query}?format=json&polygon_geojson=1&addressdetails=1'
 result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
 # do search-request
 def request(query, params):
    params['url'] = base_url + search_string.format(query=query)
    # using searx User-Agent
    params['headers']['User-Agent'] = searx_useragent()
    return params
 # get response from search-request
 def response(resp):
    results = []
    json = loads(resp.text)
    # parse results
    for r in json:
        if 'display_name' not in r:
            continue
        title = r['display_name']
        osm_type = r.get('osm_type', r.get('type'))
        url = result_base_url.format(osm_type=osm_type,
                                     osm_id=r['osm_id'])
        osm = {'type': osm_type,
               'id': r['osm_id']}
        geojson = r.get('geojson')
        # if no geojson is found and osm_type is a node, add geojson Point
        if not geojson and osm_type == 'node':
            geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]}
        address_raw = r.get('address')
        address = {}
        # get name
        if r['class'] == 'amenity' or\
           r['class'] == 'shop' or\
           r['class'] == 'tourism' or\
           r['class'] == 'leisure':
            if address_raw.get('address29'):
                address = {'name': address_raw.get('address29')}
            else:
                address = {'name': address_raw.get(r['type'])}
        # add rest of adressdata, if something is already found
        if address.get('name'):
            address.update({'house_number': address_raw.get('house_number'),
                           'road': address_raw.get('road'),
                           'locality': address_raw.get('city',
                                       address_raw.get('town',          # noqa
                                       address_raw.get('village'))),    # noqa
                           'postcode': address_raw.get('postcode'),
                           'country': address_raw.get('country'),
                           'country_code': address_raw.get('country_code')})
        else:
            address = None
        # append result
        results.append({'template': 'map.html',
                        'title': title,
                        'content': '',
                        'longitude': r['lon'],
                        'latitude': r['lat'],
                        'boundingbox': r['boundingbox'],
                        'geojson': geojson,
                        'address': address,
                        'osm': osm,
                        'url': url})
    # return results
    return results
--- a/sources/engines/photon.py
+++ b/sources/engines/photon.py
@ -1,132 +0,0 @@
 ## Photon (Map)
 #
 # @website     https://photon.komoot.de
 # @provide-api yes (https://photon.komoot.de/)
 #
 # @using-api   yes
 # @results     JSON
 # @stable      yes
 # @parse       url, title
 from urllib import urlencode
 from json import loads
 from searx.utils import searx_useragent
 # engine dependent config
 categories = ['map']
 paging = False
 language_support = True
 number_of_results = 10
 # search-url
 base_url = 'https://photon.komoot.de/'
 search_string = 'api/?{query}&limit={limit}'
 result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
 # list of supported languages
 allowed_languages = ['de', 'en', 'fr', 'it']
 # do search-request
 def request(query, params):
    params['url'] = base_url +\
        search_string.format(query=urlencode({'q': query}),
                             limit=number_of_results)
    if params['language'] != 'all':
        language = params['language'].split('_')[0]
        if language in allowed_languages:
            params['url'] = params['url'] + "&lang=" + language
    # using searx User-Agent
    params['headers']['User-Agent'] = searx_useragent()
    # FIX: SSLError: SSL3_GET_SERVER_CERTIFICATE:certificate verify failed
    params['verify'] = False
    return params
 # get response from search-request
 def response(resp):
    results = []
    json = loads(resp.text)
    # parse results
    for r in json.get('features', {}):
        properties = r.get('properties')
        if not properties:
            continue
        # get title
        title = properties.get('name')
        # get osm-type
        if properties.get('osm_type') == 'N':
            osm_type = 'node'
        elif properties.get('osm_type') == 'W':
            osm_type = 'way'
        elif properties.get('osm_type') == 'R':
            osm_type = 'relation'
        else:
            # continue if invalide osm-type
            continue
        url = result_base_url.format(osm_type=osm_type,
                                     osm_id=properties.get('osm_id'))
        osm = {'type': osm_type,
               'id': properties.get('osm_id')}
        geojson = r.get('geometry')
        if properties.get('extent'):
            boundingbox = [properties.get('extent')[3],
                           properties.get('extent')[1],
                           properties.get('extent')[0],
                           properties.get('extent')[2]]
        else:
            # TODO: better boundingbox calculation
            boundingbox = [geojson['coordinates'][1],
                           geojson['coordinates'][1],
                           geojson['coordinates'][0],
                           geojson['coordinates'][0]]
        # address calculation
        address = {}
        # get name
        if properties.get('osm_key') == 'amenity' or\
           properties.get('osm_key') == 'shop' or\
           properties.get('osm_key') == 'tourism' or\
           properties.get('osm_key') == 'leisure':
            address = {'name': properties.get('name')}
        # add rest of adressdata, if something is already found
        if address.get('name'):
            address.update({'house_number': properties.get('housenumber'),
                           'road': properties.get('street'),
                           'locality': properties.get('city',
                                       properties.get('town',           # noqa
                                       properties.get('village'))),     # noqa
                           'postcode': properties.get('postcode'),
                           'country': properties.get('country')})
        else:
            address = None
        # append result
        results.append({'template': 'map.html',
                        'title': title,
                        'content': '',
                        'longitude': geojson['coordinates'][0],
                        'latitude': geojson['coordinates'][1],
                        'boundingbox': boundingbox,
                        'geojson': geojson,
                        'address': address,
                        'osm': osm,
                        'url': url})
    # return results
    return results
--- a/sources/engines/piratebay.py
+++ b/sources/engines/piratebay.py
@ -1,94 +0,0 @@
 ## Piratebay (Videos, Music, Files)
 #
 # @website     https://thepiratebay.se
 # @provide-api no (nothing found)
 #
 # @using-api   no
 # @results     HTML (using search portal)
 # @stable      yes (HTML can change)
 # @parse       url, title, content, seed, leech, magnetlink
 from urlparse import urljoin
 from cgi import escape
 from urllib import quote
 from lxml import html
 from operator import itemgetter
 from searx.engines.xpath import extract_text
 # engine dependent config
 categories = ['videos', 'music', 'files']
 paging = True
 # search-url
 url = 'https://thepiratebay.se/'
 search_url = url + 'search/{search_term}/{pageno}/99/{search_type}'
 # piratebay specific type-definitions
 search_types = {'files': '0',
                'music': '100',
                'videos': '200'}
 # specific xpath variables
 magnet_xpath = './/a[@title="Download this torrent using magnet"]'
 torrent_xpath = './/a[@title="Download this torrent"]'
 content_xpath = './/font[@class="detDesc"]'
 # do search-request
 def request(query, params):
    search_type = search_types.get(params['category'], '0')
    params['url'] = search_url.format(search_term=quote(query),
                                      search_type=search_type,
                                      pageno=params['pageno'] - 1)
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    search_res = dom.xpath('//table[@id="searchResult"]//tr')
    # return empty array if nothing is found
    if not search_res:
        return []
    # parse results
    for result in search_res[1:]:
        link = result.xpath('.//div[@class="detName"]//a')[0]
        href = urljoin(url, link.attrib.get('href'))
        title = extract_text(link)
        content = escape(extract_text(result.xpath(content_xpath)))
        seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
        # convert seed to int if possible
        if seed.isdigit():
            seed = int(seed)
        else:
            seed = 0
        # convert leech to int if possible
        if leech.isdigit():
            leech = int(leech)
        else:
            leech = 0
        magnetlink = result.xpath(magnet_xpath)[0]
        torrentfile = result.xpath(torrent_xpath)[0]
        # append result
        results.append({'url': href,
                        'title': title,
                        'content': content,
                        'seed': seed,
                        'leech': leech,
                        'magnetlink': magnetlink.attrib.get('href'),
                        'torrentfile': torrentfile.attrib.get('href'),
                        'template': 'torrent.html'})
    # return results sorted by seeder
    return sorted(results, key=itemgetter('seed'), reverse=True)
--- a/sources/engines/searchcode_code.py
+++ b/sources/engines/searchcode_code.py
@ -1,68 +0,0 @@
 ## Searchcode (It)
 #
 # @website     https://searchcode.com/
 # @provide-api yes (https://searchcode.com/api/)
 #
 # @using-api   yes
 # @results     JSON
 # @stable      yes
 # @parse       url, title, content
 from urllib import urlencode
 from json import loads
 # engine dependent config
 categories = ['it']
 paging = True
 # search-url
 url = 'https://searchcode.com/'
 search_url = url+'api/codesearch_I/?{query}&p={pageno}'
 # special code-endings which are not recognised by the file ending
 code_endings = {'cs': 'c#',
                'h': 'c',
                'hpp': 'cpp',
                'cxx': 'cpp'}
 # do search-request
 def request(query, params):
    params['url'] = search_url.format(query=urlencode({'q': query}),
                                      pageno=params['pageno']-1)
    return params
 # get response from search-request
 def response(resp):
    results = []
    search_results = loads(resp.text)
    # parse results
    for result in search_results.get('results', []):
        href = result['url']
        title = "" + result['name'] + " - " + result['filename']
        repo = result['repo']
        lines = dict()
        for line, code in result['lines'].items():
            lines[int(line)] = code
        code_language = code_endings.get(
            result['filename'].split('.')[-1].lower(),
            result['filename'].split('.')[-1].lower())
        # append result
        results.append({'url': href,
                        'title': title,
                        'content': '',
                        'repository': repo,
                        'codelines': sorted(lines.items()),
                        'code_language': code_language,
                        'template': 'code.html'})
    # return results
    return results
--- a/sources/engines/searchcode_doc.py
+++ b/sources/engines/searchcode_doc.py
@ -1,56 +0,0 @@
 ## Searchcode (It)
 #
 # @website     https://searchcode.com/
 # @provide-api yes (https://searchcode.com/api/)
 #
 # @using-api   yes
 # @results     JSON
 # @stable      yes
 # @parse       url, title, content
 from urllib import urlencode
 from json import loads
 # engine dependent config
 categories = ['it']
 paging = True
 # search-url
 url = 'https://searchcode.com/'
 search_url = url+'api/search_IV/?{query}&p={pageno}'
 # do search-request
 def request(query, params):
    params['url'] = search_url.format(query=urlencode({'q': query}),
                                      pageno=params['pageno']-1)
    return params
 # get response from search-request
 def response(resp):
    results = []
    search_results = loads(resp.text)
    # parse results
    for result in search_results.get('results', []):
        href = result['url']
        title = "[" + result['type'] + "] " +\
                result['namespace'] +\
                " " + result['name']
        content = '<span class="highlight">[' +\
                  result['type'] + "] " +\
                  result['name'] + " " +\
                  result['synopsis'] +\
                  "</span><br />" +\
                  result['description']
        # append result
        results.append({'url': href,
                        'title': title,
                        'content': content})
    # return results
    return results
--- a/sources/engines/soundcloud.py
+++ b/sources/engines/soundcloud.py
@ -1,70 +0,0 @@
 ## Soundcloud (Music)
 #
 # @website     https://soundcloud.com
 # @provide-api yes (https://developers.soundcloud.com/)
 #
 # @using-api   yes
 # @results     JSON
 # @stable      yes
 # @parse       url, title, content, publishedDate, embedded
 from json import loads
 from urllib import urlencode, quote_plus
 from dateutil import parser
 # engine dependent config
 categories = ['music']
 paging = True
 # api-key
 guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28'
 # search-url
 url = 'https://api.soundcloud.com/'
 search_url = url + 'search?{query}'\
                         '&facet=model'\
                         '&limit=20'\
                         '&offset={offset}'\
                         '&linked_partitioning=1'\
                         '&client_id={client_id}'   # noqa
 embedded_url = '<iframe width="100%" height="166" ' +\
    'scrolling="no" frameborder="no" ' +\
    'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 20
    params['url'] = search_url.format(query=urlencode({'q': query}),
                                      offset=offset,
                                      client_id=guest_client_id)
    return params
 # get response from search-request
 def response(resp):
    results = []
    search_res = loads(resp.text)
    # parse results
    for result in search_res.get('collection', []):
        if result['kind'] in ('track', 'playlist'):
            title = result['title']
            content = result['description']
            publishedDate = parser.parse(result['last_modified'])
            uri = quote_plus(result['uri'])
            embedded = embedded_url.format(uri=uri)
            # append result
            results.append({'url': result['permalink_url'],
                            'title': title,
                            'publishedDate': publishedDate,
                            'embedded': embedded,
                            'content': content})
    # return results
    return results
--- a/sources/engines/stackoverflow.py
+++ b/sources/engines/stackoverflow.py
@ -1,58 +0,0 @@
 ## Stackoverflow (It)
 #
 # @website     https://stackoverflow.com/
 # @provide-api not clear (https://api.stackexchange.com/docs/advanced-search)
 #
 # @using-api   no
 # @results     HTML
 # @stable      no (HTML can change)
 # @parse       url, title, content
 from urlparse import urljoin
 from cgi import escape
 from urllib import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text
 # engine dependent config
 categories = ['it']
 paging = True
 # search-url
 url = 'http://stackoverflow.com/'
 search_url = url+'search?{query}&page={pageno}'
 # specific xpath variables
 results_xpath = '//div[contains(@class,"question-summary")]'
 link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a'
 content_xpath = './/div[@class="excerpt"]'
 # do search-request
 def request(query, params):
    params['url'] = search_url.format(query=urlencode({'q': query}),
                                      pageno=params['pageno'])
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    # parse results
    for result in dom.xpath(results_xpath):
        link = result.xpath(link_xpath)[0]
        href = urljoin(url, link.attrib.get('href'))
        title = escape(extract_text(link))
        content = escape(extract_text(result.xpath(content_xpath)))
        # append result
        results.append({'url': href,
                        'title': title,
                        'content': content})
    # return results
    return results
--- a/sources/engines/startpage.py
+++ b/sources/engines/startpage.py
@ -1,85 +0,0 @@
 #  Startpage (Web)
 #
 # @website     https://startpage.com
 # @provide-api no (nothing found)
 #
 # @using-api   no
 # @results     HTML
 # @stable      no (HTML can change)
 # @parse       url, title, content
 #
 # @todo        paging
 from lxml import html
 from cgi import escape
 import re
 from searx.engines.xpath import extract_text
 # engine dependent config
 categories = ['general']
 # there is a mechanism to block "bot" search
 # (probably the parameter qid), require
 # storing of qid's between mulitble search-calls
 # paging = False
 language_support = True
 # search-url
 base_url = 'https://startpage.com/'
 search_url = base_url + 'do/search'
 # specific xpath variables
 # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
 # not ads: div[@class="result"] are the direct childs of div[@id="results"]
 results_xpath = '//div[@class="result"]'
 link_xpath = './/h3/a'
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 10
    params['url'] = search_url
    params['method'] = 'POST'
    params['data'] = {'query': query,
                      'startat': offset}
    # set language if specified
    if params['language'] != 'all':
        params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.content)
    # parse results
    for result in dom.xpath(results_xpath):
        links = result.xpath(link_xpath)
        if not links:
            continue
        link = links[0]
        url = link.attrib.get('href')
        # block google-ad url's
        if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url):
            continue
        title = escape(extract_text(link))
        if result.xpath('./p[@class="desc"]'):
            content = escape(extract_text(result.xpath('./p[@class="desc"]')))
        else:
            content = ''
        # append result
        results.append({'url': url,
                        'title': title,
                        'content': content})
    # return results
    return results
--- a/sources/engines/subtitleseeker.py
+++ b/sources/engines/subtitleseeker.py
@ -1,79 +0,0 @@
 ## Subtitleseeker (Video)
 #
 # @website     http://www.subtitleseeker.com
 # @provide-api no
 #
 # @using-api   no
 # @results     HTML
 # @stable      no (HTML can change)
 # @parse       url, title, content
 from cgi import escape
 from urllib import quote_plus
 from lxml import html
 from searx.languages import language_codes
 from searx.engines.xpath import extract_text
 # engine dependent config
 categories = ['videos']
 paging = True
 language = ""
 # search-url
 url = 'http://www.subtitleseeker.com/'
 search_url = url + 'search/TITLES/{query}&p={pageno}'
 # specific xpath variables
 results_xpath = '//div[@class="boxRows"]'
 # do search-request
 def request(query, params):
    params['url'] = search_url.format(query=quote_plus(query),
                                      pageno=params['pageno'])
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    search_lang = ""
    if resp.search_params['language'] != 'all':
        search_lang = [lc[1]
                       for lc in language_codes
                       if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
    # parse results
    for result in dom.xpath(results_xpath):
        link = result.xpath(".//a")[0]
        href = link.attrib.get('href')
        if language is not "":
            href = href + language + '/'
        elif search_lang:
            href = href + search_lang + '/'
        title = escape(extract_text(link))
        content = extract_text(result.xpath('.//div[contains(@class,"red")]'))
        content = content + " - "
        text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0])
        content = content + text
        if result.xpath(".//span") != []:
            content = content +\
                " - (" +\
                extract_text(result.xpath(".//span")) +\
                ")"
        # append result
        results.append({'url': href,
                        'title': title,
                        'content': escape(content)})
    # return results
    return results
--- a/sources/engines/twitter.py
+++ b/sources/engines/twitter.py
@ -1,77 +0,0 @@
 ## Twitter (Social media)
 #
 # @website     https://twitter.com/
 # @provide-api yes (https://dev.twitter.com/docs/using-search)
 #
 # @using-api   no
 # @results     HTML (using search portal)
 # @stable      no (HTML can change)
 # @parse       url, title, content
 #
 # @todo        publishedDate
 from urlparse import urljoin
 from urllib import urlencode
 from lxml import html
 from datetime import datetime
 from searx.engines.xpath import extract_text
 # engine dependent config
 categories = ['social media']
 language_support = True
 # search-url
 base_url = 'https://twitter.com/'
 search_url = base_url + 'search?'
 # specific xpath variables
 results_xpath = '//li[@data-item-type="tweet"]'
 link_xpath = './/small[@class="time"]//a'
 title_xpath = './/span[@class="username js-action-profile-name"]'
 content_xpath = './/p[@class="js-tweet-text tweet-text"]'
 timestamp_xpath = './/span[contains(@class,"_timestamp")]'
 # do search-request
 def request(query, params):
    params['url'] = search_url + urlencode({'q': query})
    # set language if specified
    if params['language'] != 'all':
        params['cookies']['lang'] = params['language'].split('_')[0]
    else:
        params['cookies']['lang'] = 'en'
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    # parse results
    for tweet in dom.xpath(results_xpath):
        link = tweet.xpath(link_xpath)[0]
        url = urljoin(base_url, link.attrib.get('href'))
        title = extract_text(tweet.xpath(title_xpath))
        content = extract_text(tweet.xpath(content_xpath)[0])
        pubdate = tweet.xpath(timestamp_xpath)
        if len(pubdate) > 0:
            timestamp = float(pubdate[0].attrib.get('data-time'))
            publishedDate = datetime.fromtimestamp(timestamp, None)
            # append result
            results.append({'url': url,
                            'title': title,
                            'content': content,
                            'publishedDate': publishedDate})
        else:
            # append result
            results.append({'url': url,
                            'title': title,
                            'content': content})
    # return results
    return results
--- a/sources/engines/vimeo.py
+++ b/sources/engines/vimeo.py
@ -1,75 +0,0 @@
 #  Vimeo (Videos)
 #
 # @website     https://vimeo.com/
 # @provide-api yes (http://developer.vimeo.com/api),
 #              they have a maximum count of queries/hour
 #
 # @using-api   no (TODO, rewrite to api)
 # @results     HTML (using search portal)
 # @stable      no (HTML can change)
 # @parse       url, title, publishedDate,  thumbnail, embedded
 #
 # @todo        rewrite to api
 # @todo        set content-parameter with correct data
 from urllib import urlencode
 from lxml import html
 from HTMLParser import HTMLParser
 from searx.engines.xpath import extract_text
 from dateutil import parser
 # engine dependent config
 categories = ['videos']
 paging = True
 # search-url
 base_url = 'http://vimeo.com'
 search_url = base_url + '/search/page:{pageno}?{query}'
 # specific xpath variables
 results_xpath = '//div[@id="browse_content"]/ol/li'
 url_xpath = './a/@href'
 title_xpath = './a/div[@class="data"]/p[@class="title"]'
 content_xpath = './a/img/@src'
 publishedDate_xpath = './/p[@class="meta"]//attribute::datetime'
 embedded_url = '<iframe data-src="//player.vimeo.com/video{videoid}" ' +\
    'width="540" height="304" frameborder="0" ' +\
    'webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>'
 # do search-request
 def request(query, params):
    params['url'] = search_url.format(pageno=params['pageno'],
                                      query=urlencode({'q': query}))
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    p = HTMLParser()
    # parse results
    for result in dom.xpath(results_xpath):
        videoid = result.xpath(url_xpath)[0]
        url = base_url + videoid
        title = p.unescape(extract_text(result.xpath(title_xpath)))
        thumbnail = extract_text(result.xpath(content_xpath)[0])
        publishedDate = parser.parse(extract_text(result.xpath(publishedDate_xpath)[0]))
        embedded = embedded_url.format(videoid=videoid)
        # append result
        results.append({'url': url,
                        'title': title,
                        'content': '',
                        'template': 'videos.html',
                        'publishedDate': publishedDate,
                        'embedded': embedded,
                        'thumbnail': thumbnail})
    # return results
    return results
--- a/sources/engines/wikidata.py
+++ b/sources/engines/wikidata.py
@ -1,305 +0,0 @@
 import json
 from urllib import urlencode
 from searx.poolrequests import get
 from searx.utils import format_date_by_locale
 result_count = 1
 wikidata_host = 'https://www.wikidata.org'
 wikidata_api = wikidata_host + '/w/api.php'
 url_search = wikidata_api \
    + '?action=query&list=search&format=json'\
    + '&srnamespace=0&srprop=sectiontitle&{query}'
 url_detail = wikidata_api\
    + '?action=wbgetentities&format=json'\
    + '&props=labels%7Cinfo%7Csitelinks'\
    + '%7Csitelinks%2Furls%7Cdescriptions%7Cclaims'\
    + '&{query}'
 url_map = 'https://www.openstreetmap.org/'\
    + '?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M'
 def request(query, params):
    params['url'] = url_search.format(
        query=urlencode({'srsearch': query,
                        'srlimit': result_count}))
    return params
 def response(resp):
    results = []
    search_res = json.loads(resp.text)
    wikidata_ids = set()
    for r in search_res.get('query', {}).get('search', {}):
        wikidata_ids.add(r.get('title', ''))
    language = resp.search_params['language'].split('_')[0]
    if language == 'all':
        language = 'en'
    url = url_detail.format(query=urlencode({'ids': '|'.join(wikidata_ids),
                                            'languages': language + '|en'}))
    htmlresponse = get(url)
    jsonresponse = json.loads(htmlresponse.content)
    for wikidata_id in wikidata_ids:
        results = results + getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'])
    return results
 def getDetail(jsonresponse, wikidata_id, language, locale):
    results = []
    urls = []
    attributes = []
    result = jsonresponse.get('entities', {}).get(wikidata_id, {})
    title = result.get('labels', {}).get(language, {}).get('value', None)
    if title is None:
        title = result.get('labels', {}).get('en', {}).get('value', None)
    if title is None:
        return results
    description = result\
        .get('descriptions', {})\
        .get(language, {})\
        .get('value', None)
    if description is None:
        description = result\
            .get('descriptions', {})\
            .get('en', {})\
            .get('value', '')
    claims = result.get('claims', {})
    official_website = get_string(claims, 'P856', None)
    if official_website is not None:
        urls.append({'title': 'Official site', 'url': official_website})
        results.append({'title': title, 'url': official_website})
    wikipedia_link_count = 0
    if language != 'en':
        wikipedia_link_count += add_url(urls,
                                        'Wikipedia (' + language + ')',
                                        get_wikilink(result, language +
                                                     'wiki'))
    wikipedia_en_link = get_wikilink(result, 'enwiki')
    wikipedia_link_count += add_url(urls,
                                    'Wikipedia (en)',
                                    wikipedia_en_link)
    if wikipedia_link_count == 0:
        misc_language = get_wiki_firstlanguage(result, 'wiki')
        if misc_language is not None:
            add_url(urls,
                    'Wikipedia (' + misc_language + ')',
                    get_wikilink(result, misc_language + 'wiki'))
    if language != 'en':
        add_url(urls,
                'Wiki voyage (' + language + ')',
                get_wikilink(result, language + 'wikivoyage'))
    add_url(urls,
            'Wiki voyage (en)',
            get_wikilink(result, 'enwikivoyage'))
    if language != 'en':
        add_url(urls,
                'Wikiquote (' + language + ')',
                get_wikilink(result, language + 'wikiquote'))
    add_url(urls,
            'Wikiquote (en)',
            get_wikilink(result, 'enwikiquote'))
    add_url(urls,
            'Commons wiki',
            get_wikilink(result, 'commonswiki'))
    add_url(urls,
            'Location',
            get_geolink(claims, 'P625', None))
    add_url(urls,
            'Wikidata',
            'https://www.wikidata.org/wiki/'
            + wikidata_id + '?uselang=' + language)
    musicbrainz_work_id = get_string(claims, 'P435')
    if musicbrainz_work_id is not None:
        add_url(urls,
                'MusicBrainz',
                'http://musicbrainz.org/work/'
                + musicbrainz_work_id)
    musicbrainz_artist_id = get_string(claims, 'P434')
    if musicbrainz_artist_id is not None:
        add_url(urls,
                'MusicBrainz',
                'http://musicbrainz.org/artist/'
                + musicbrainz_artist_id)
    musicbrainz_release_group_id = get_string(claims, 'P436')
    if musicbrainz_release_group_id is not None:
        add_url(urls,
                'MusicBrainz',
                'http://musicbrainz.org/release-group/'
                + musicbrainz_release_group_id)
    musicbrainz_label_id = get_string(claims, 'P966')
    if musicbrainz_label_id is not None:
        add_url(urls,
                'MusicBrainz',
                'http://musicbrainz.org/label/'
                + musicbrainz_label_id)
    # musicbrainz_area_id = get_string(claims, 'P982')
    # P1407 MusicBrainz series ID
    # P1004 MusicBrainz place ID
    # P1330 MusicBrainz instrument ID
    # P1407 MusicBrainz series ID
    postal_code = get_string(claims, 'P281', None)
    if postal_code is not None:
        attributes.append({'label': 'Postal code(s)', 'value': postal_code})
    date_of_birth = get_time(claims, 'P569', None)
    if date_of_birth is not None:
        date_of_birth = format_date_by_locale(date_of_birth[8:], locale)
        attributes.append({'label': 'Date of birth', 'value': date_of_birth})
    date_of_death = get_time(claims, 'P570', None)
    if date_of_death is not None:
        date_of_death = format_date_by_locale(date_of_death[8:], locale)
        attributes.append({'label': 'Date of death', 'value': date_of_death})
    if len(attributes) == 0 and len(urls) == 2 and len(description) == 0:
        results.append({
                       'url': urls[0]['url'],
                       'title': title,
                       'content': description
                       })
    else:
        results.append({
                       'infobox': title,
                       'id': wikipedia_en_link,
                       'content': description,
                       'attributes': attributes,
                       'urls': urls
                       })
    return results
 def add_url(urls, title, url):
    if url is not None:
        urls.append({'title': title, 'url': url})
        return 1
    else:
        return 0
 def get_mainsnak(claims, propertyName):
    propValue = claims.get(propertyName, {})
    if len(propValue) == 0:
        return None
    propValue = propValue[0].get('mainsnak', None)
    return propValue
 def get_string(claims, propertyName, defaultValue=None):
    propValue = claims.get(propertyName, {})
    if len(propValue) == 0:
        return defaultValue
    result = []
    for e in propValue:
        mainsnak = e.get('mainsnak', {})
        datavalue = mainsnak.get('datavalue', {})
        if datavalue is not None:
            result.append(datavalue.get('value', ''))
    if len(result) == 0:
        return defaultValue
    else:
        # TODO handle multiple urls
        return result[0]
 def get_time(claims, propertyName, defaultValue=None):
    propValue = claims.get(propertyName, {})
    if len(propValue) == 0:
        return defaultValue
    result = []
    for e in propValue:
        mainsnak = e.get('mainsnak', {})
        datavalue = mainsnak.get('datavalue', {})
        if datavalue is not None:
            value = datavalue.get('value', '')
            result.append(value.get('time', ''))
    if len(result) == 0:
        return defaultValue
    else:
        return ', '.join(result)
 def get_geolink(claims, propertyName, defaultValue=''):
    mainsnak = get_mainsnak(claims, propertyName)
    if mainsnak is None:
        return defaultValue
    datatype = mainsnak.get('datatype', '')
    datavalue = mainsnak.get('datavalue', {})
    if datatype != 'globe-coordinate':
        return defaultValue
    value = datavalue.get('value', {})
    precision = value.get('precision', 0.0002)
    # there is no zoom information, deduce from precision (error prone)
    # samples :
    # 13 --> 5
    # 1 --> 6
    # 0.016666666666667 --> 9
    # 0.00027777777777778 --> 19
    # wolframalpha :
    # quadratic fit { {13, 5}, {1, 6}, {0.0166666, 9}, {0.0002777777,19}}
    # 14.1186-8.8322 x+0.625447 x^2
    if precision < 0.0003:
        zoom = 19
    else:
        zoom = int(15 - precision*8.8322 + precision*precision*0.625447)
    url = url_map\
        .replace('{latitude}', str(value.get('latitude', 0)))\
        .replace('{longitude}', str(value.get('longitude', 0)))\
        .replace('{zoom}', str(zoom))
    return url
 def get_wikilink(result, wikiid):
    url = result.get('sitelinks', {}).get(wikiid, {}).get('url', None)
    if url is None:
        return url
    elif url.startswith('http://'):
        url = url.replace('http://', 'https://')
    elif url.startswith('//'):
        url = 'https:' + url
    return url
 def get_wiki_firstlanguage(result, wikipatternid):
    for k in result.get('sitelinks', {}).keys():
        if k.endswith(wikipatternid) and len(k) == (2+len(wikipatternid)):
            return k[0:2]
    return None
--- a/sources/engines/www1x.py
+++ b/sources/engines/www1x.py
@ -1,82 +0,0 @@
 ## 1x (Images)
 #
 # @website     http://1x.com/
 # @provide-api no
 #
 # @using-api   no
 # @results     HTML
 # @stable      no (HTML can change)
 # @parse       url, title, thumbnail, img_src, content
 from urllib import urlencode
 from urlparse import urljoin
 from lxml import html
 import string
 import re
 # engine dependent config
 categories = ['images']
 paging = False
 # search-url
 base_url = 'http://1x.com'
 search_url = base_url+'/backend/search.php?{query}'
 # do search-request
 def request(query, params):
    params['url'] = search_url.format(query=urlencode({'q': query}))
    return params
 # get response from search-request
 def response(resp):
    results = []
    # get links from result-text
    regex = re.compile('(</a>|<a)')
    results_parts = re.split(regex, resp.text)
    cur_element = ''
    # iterate over link parts
    for result_part in results_parts:
        # processed start and end of link
        if result_part == '<a':
            cur_element = result_part
            continue
        elif result_part != '</a>':
            cur_element += result_part
            continue
        cur_element += result_part
        # fix xml-error
        cur_element = string.replace(cur_element, '"></a>', '"/></a>')
        dom = html.fromstring(cur_element)
        link = dom.xpath('//a')[0]
        url = urljoin(base_url, link.attrib.get('href'))
        title = link.attrib.get('title', '')
        thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src'])
        # TODO: get image with higher resolution
        img_src = thumbnail_src
        # check if url is showing to a photo
        if '/photo/' not in url:
            continue
        # append result
        results.append({'url': url,
                        'title': title,
                        'img_src': img_src,
                        'content': '',
                        'thumbnail_src': thumbnail_src,
                        'template': 'images.html'})
    # return results
    return results
--- a/sources/engines/www500px.py
+++ b/sources/engines/www500px.py
@ -1,64 +0,0 @@
 ## 500px (Images)
 #
 # @website     https://500px.com
 # @provide-api yes (https://developers.500px.com/)
 #
 # @using-api   no
 # @results     HTML
 # @stable      no (HTML can change)
 # @parse       url, title, thumbnail, img_src, content
 #
 # @todo        rewrite to api
 from urllib import urlencode
 from urlparse import urljoin
 from lxml import html
 import re
 from searx.engines.xpath import extract_text
 # engine dependent config
 categories = ['images']
 paging = True
 # search-url
 base_url = 'https://500px.com'
 search_url = base_url + '/search?search?page={pageno}&type=photos&{query}'
 # do search-request
 def request(query, params):
    params['url'] = search_url.format(pageno=params['pageno'],
                                      query=urlencode({'q': query}))
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    regex = re.compile('3\.jpg.*$')
    # parse results
    for result in dom.xpath('//div[@class="photo"]'):
        link = result.xpath('.//a')[0]
        url = urljoin(base_url, link.attrib.get('href'))
        title = extract_text(result.xpath('.//div[@class="title"]'))
        thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
        # To have a bigger thumbnail, uncomment the next line
        # thumbnail_src = regex.sub('4.jpg', thumbnail_src)
        content = extract_text(result.xpath('.//div[@class="info"]'))
        img_src = regex.sub('2048.jpg', thumbnail_src)
        # append result
        results.append({'url': url,
                        'title': title,
                        'img_src': img_src,
                        'content': content,
                        'thumbnail_src': thumbnail_src,
                        'template': 'images.html'})
    # return results
    return results
--- a/sources/engines/xpath.py
+++ b/sources/engines/xpath.py
@ -1,106 +0,0 @@
 from lxml import html
 from urllib import urlencode, unquote
 from urlparse import urlparse, urljoin
 from lxml.etree import _ElementStringResult, _ElementUnicodeResult
 from searx.utils import html_to_text
 search_url = None
 url_xpath = None
 content_xpath = None
 title_xpath = None
 suggestion_xpath = ''
 results_xpath = ''
 '''
 if xpath_results is list, extract the text from each result and concat the list
 if xpath_results is a xml element, extract all the text node from it
   ( text_content() method from lxml )
 if xpath_results is a string element, then it's already done
 '''
 def extract_text(xpath_results):
    if type(xpath_results) == list:
        # it's list of result : concat everything using recursive call
        if not xpath_results:
            raise Exception('Empty url resultset')
        result = ''
        for e in xpath_results:
            result = result + extract_text(e)
        return result.strip()
    elif type(xpath_results) in [_ElementStringResult, _ElementUnicodeResult]:
        # it's a string
        return ''.join(xpath_results)
    else:
        # it's a element
        return html_to_text(xpath_results.text_content()).strip()
 def extract_url(xpath_results, search_url):
    url = extract_text(xpath_results)
    if url.startswith('//'):
        # add http or https to this kind of url //example.com/
        parsed_search_url = urlparse(search_url)
        url = parsed_search_url.scheme+url
    elif url.startswith('/'):
        # fix relative url to the search engine
        url = urljoin(search_url, url)
    # normalize url
    url = normalize_url(url)
    return url
 def normalize_url(url):
    parsed_url = urlparse(url)
    # add a / at this end of the url if there is no path
    if not parsed_url.netloc:
        raise Exception('Cannot parse url')
    if not parsed_url.path:
        url += '/'
    # FIXME : hack for yahoo
    if parsed_url.hostname == 'search.yahoo.com'\
       and parsed_url.path.startswith('/r'):
        p = parsed_url.path
        mark = p.find('/**')
        if mark != -1:
            return unquote(p[mark+3:]).decode('utf-8')
    return url
 def request(query, params):
    query = urlencode({'q': query})[2:]
    params['url'] = search_url.format(query=query)
    params['query'] = query
    return params
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    if results_xpath:
        for result in dom.xpath(results_xpath):
            url = extract_url(result.xpath(url_xpath), search_url)
            title = extract_text(result.xpath(title_xpath)[0])
            content = extract_text(result.xpath(content_xpath)[0])
            results.append({'url': url, 'title': title, 'content': content})
    else:
        for url, title, content in zip(
            (extract_url(x, search_url) for
             x in dom.xpath(url_xpath)),
            map(extract_text, dom.xpath(title_xpath)),
            map(extract_text, dom.xpath(content_xpath))
        ):
            results.append({'url': url, 'title': title, 'content': content})
    if not suggestion_xpath:
        return results
    for suggestion in dom.xpath(suggestion_xpath):
        results.append({'suggestion': extract_text(suggestion)})
    return results
--- a/sources/engines/yacy.py
+++ b/sources/engines/yacy.py
@ -1,97 +0,0 @@
 ## Yacy (Web, Images, Videos, Music, Files)
 #
 # @website     http://yacy.net
 # @provide-api yes
 #              (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch)
 #
 # @using-api   yes
 # @results     JSON
 # @stable      yes
 # @parse       (general)    url, title, content, publishedDate
 # @parse       (images)     url, title, img_src
 #
 # @todo        parse video, audio and file results
 from json import loads
 from urllib import urlencode
 from dateutil import parser
 # engine dependent config
 categories = ['general', 'images']  # TODO , 'music', 'videos', 'files'
 paging = True
 language_support = True
 number_of_results = 5
 # search-url
 base_url = 'http://localhost:8090'
 search_url = '/yacysearch.json?{query}'\
             '&startRecord={offset}'\
             '&maximumRecords={limit}'\
             '&contentdom={search_type}'\
             '&resource=global'
 # yacy specific type-definitions
 search_types = {'general': 'text',
                'images': 'image',
                'files': 'app',
                'music': 'audio',
                'videos': 'video'}
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * number_of_results
    search_type = search_types.get(params.get('category'), '0')
    params['url'] = base_url +\
        search_url.format(query=urlencode({'query': query}),
                          offset=offset,
                          limit=number_of_results,
                          search_type=search_type)
    # add language tag if specified
    if params['language'] != 'all':
        params['url'] += '&lr=lang_' + params['language'].split('_')[0]
    return params
 # get response from search-request
 def response(resp):
    results = []
    raw_search_results = loads(resp.text)
    # return empty array if there are no results
    if not raw_search_results:
        return []
    search_results = raw_search_results.get('channels', [])
    if len(search_results) == 0:
        return []
    for result in search_results[0].get('items', []):
        # parse image results
        if result.get('image'):
            # append result
            results.append({'url': result['url'],
                            'title': result['title'],
                            'content': '',
                            'img_src': result['image'],
                            'template': 'images.html'})
        # parse general results
        else:
            publishedDate = parser.parse(result['pubDate'])
            # append result
            results.append({'url': result['link'],
                            'title': result['title'],
                            'content': result['description'],
                            'publishedDate': publishedDate})
        # TODO parse video, audio and file results
    # return results
    return results
--- a/sources/engines/yahoo.py
+++ b/sources/engines/yahoo.py
@ -1,103 +0,0 @@
 ## Yahoo (Web)
 #
 # @website     https://search.yahoo.com/web
 # @provide-api yes (https://developer.yahoo.com/boss/search/),
 #              $0.80/1000 queries
 #
 # @using-api   no (because pricing)
 # @results     HTML (using search portal)
 # @stable      no (HTML can change)
 # @parse       url, title, content, suggestion
 from urllib import urlencode
 from urlparse import unquote
 from lxml import html
 from searx.engines.xpath import extract_text, extract_url
 # engine dependent config
 categories = ['general']
 paging = True
 language_support = True
 # search-url
 base_url = 'https://search.yahoo.com/'
 search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
 # specific xpath variables
 results_xpath = '//div[@class="res"]'
 url_xpath = './/h3/a/@href'
 title_xpath = './/h3/a'
 content_xpath = './/div[@class="abstr"]'
 suggestion_xpath = '//div[@id="satat"]//a'
 # remove yahoo-specific tracking-url
 def parse_url(url_string):
    endings = ['/RS', '/RK']
    endpositions = []
    start = url_string.find('http', url_string.find('/RU=') + 1)
    for ending in endings:
        endpos = url_string.rfind(ending)
        if endpos > -1:
            endpositions.append(endpos)
    if start == 0 or len(endpositions) == 0:
        return url_string
    else:
        end = min(endpositions)
        return unquote(url_string[start:end])
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 10 + 1
    if params['language'] == 'all':
        language = 'en'
    else:
        language = params['language'].split('_')[0]
    params['url'] = base_url + search_url.format(offset=offset,
                                                 query=urlencode({'p': query}),
                                                 lang=language)
    # TODO required?
    params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\
        .format(lang=language)
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    # parse results
    for result in dom.xpath(results_xpath):
        try:
            url = parse_url(extract_url(result.xpath(url_xpath), search_url))
            title = extract_text(result.xpath(title_xpath)[0])
        except:
            continue
        content = extract_text(result.xpath(content_xpath)[0])
        # append result
        results.append({'url': url,
                        'title': title,
                        'content': content})
    # if no suggestion found, return results
    if not dom.xpath(suggestion_xpath):
        return results
    # parse suggestion
    for suggestion in dom.xpath(suggestion_xpath):
        # append suggestion
        results.append({'suggestion': extract_text(suggestion)})
    # return results
    return results
--- a/sources/engines/yahoo_news.py
+++ b/sources/engines/yahoo_news.py
@ -1,93 +0,0 @@
 # Yahoo (News)
 #
 # @website     https://news.yahoo.com
 # @provide-api yes (https://developer.yahoo.com/boss/search/)
 #              $0.80/1000 queries
 #
 # @using-api   no (because pricing)
 # @results     HTML (using search portal)
 # @stable      no (HTML can change)
 # @parse       url, title, content, publishedDate
 from urllib import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text, extract_url
 from searx.engines.yahoo import parse_url
 from datetime import datetime, timedelta
 import re
 from dateutil import parser
 # engine dependent config
 categories = ['news']
 paging = True
 language_support = True
 # search-url
 search_url = 'https://news.search.yahoo.com/search?{query}&b={offset}&fl=1&vl=lang_{lang}'  # noqa
 # specific xpath variables
 results_xpath = '//div[@class="res"]'
 url_xpath = './/h3/a/@href'
 title_xpath = './/h3/a'
 content_xpath = './/div[@class="abstr"]'
 publishedDate_xpath = './/span[@class="timestamp"]'
 suggestion_xpath = '//div[@id="satat"]//a'
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 10 + 1
    if params['language'] == 'all':
        language = 'en'
    else:
        language = params['language'].split('_')[0]
    params['url'] = search_url.format(offset=offset,
                                      query=urlencode({'p': query}),
                                      lang=language)
    # TODO required?
    params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\
        .format(lang=language)
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    # parse results
    for result in dom.xpath(results_xpath):
        url = parse_url(extract_url(result.xpath(url_xpath), search_url))
        title = extract_text(result.xpath(title_xpath)[0])
        content = extract_text(result.xpath(content_xpath)[0])
        # parse publishedDate
        publishedDate = extract_text(result.xpath(publishedDate_xpath)[0])
        if re.match("^[0-9]+ minute(s|) ago$", publishedDate):
            publishedDate = datetime.now() - timedelta(minutes=int(re.match(r'\d+', publishedDate).group()))  # noqa
        else:
            if re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$",
                        publishedDate):
                timeNumbers = re.findall(r'\d+', publishedDate)
                publishedDate = datetime.now()\
                    - timedelta(hours=int(timeNumbers[0]))\
                    - timedelta(minutes=int(timeNumbers[1]))
            else:
                publishedDate = parser.parse(publishedDate)
        if publishedDate.year == 1900:
            publishedDate = publishedDate.replace(year=datetime.now().year)
        # append result
        results.append({'url': url,
                        'title': title,
                        'content': content,
                        'publishedDate': publishedDate})
    # return results
    return results
--- a/sources/engines/youtube.py
+++ b/sources/engines/youtube.py
@ -1,93 +0,0 @@
 ## Youtube (Videos)
 #
 # @website     https://www.youtube.com/
 # @provide-api yes (http://gdata-samples-youtube-search-py.appspot.com/)
 #
 # @using-api   yes
 # @results     JSON
 # @stable      yes
 # @parse       url, title, content, publishedDate, thumbnail, embedded
 from json import loads
 from urllib import urlencode
 from dateutil import parser
 # engine dependent config
 categories = ['videos', 'music']
 paging = True
 language_support = True
 # search-url
 base_url = 'https://gdata.youtube.com/feeds/api/videos'
 search_url = base_url + '?alt=json&{query}&start-index={index}&max-results=5'
 embedded_url = '<iframe width="540" height="304" ' +\
    'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
    'frameborder="0" allowfullscreen></iframe>'
 # do search-request
 def request(query, params):
    index = (params['pageno'] - 1) * 5 + 1
    params['url'] = search_url.format(query=urlencode({'q': query}),
                                      index=index)
    # add language tag if specified
    if params['language'] != 'all':
        params['url'] += '&lr=' + params['language'].split('_')[0]
    return params
 # get response from search-request
 def response(resp):
    results = []
    search_results = loads(resp.text)
    # return empty array if there are no results
    if not 'feed' in search_results:
        return []
    feed = search_results['feed']
    # parse results
    for result in feed['entry']:
        url = [x['href'] for x in result['link'] if x['type'] == 'text/html']
        if not url:
            continue
        # remove tracking
        url = url[0].replace('feature=youtube_gdata', '')
        if url.endswith('&'):
            url = url[:-1]
        videoid = url[32:]
        title = result['title']['$t']
        content = ''
        thumbnail = ''
        pubdate = result['published']['$t']
        publishedDate = parser.parse(pubdate)
        if 'media$thumbnail' in result['media$group']:
            thumbnail = result['media$group']['media$thumbnail'][0]['url']
        content = result['content']['$t']
        embedded = embedded_url.format(videoid=videoid)
        # append result
        results.append({'url': url,
                        'title': title,
                        'content': content,
                        'template': 'videos.html',
                        'publishedDate': publishedDate,
                        'embedded': embedded,
                        'thumbnail': thumbnail})
    # return results
    return results
--- a/sources/https_rewrite.py
+++ b/sources/https_rewrite.py
@ -1,209 +0,0 @@
 '''
 searx is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 searx is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.
 You should have received a copy of the GNU Affero General Public License
 along with searx. If not, see < http://www.gnu.org/licenses/ >.
 (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
 '''
 import re
 from urlparse import urlparse
 from lxml import etree
 from os import listdir
 from os.path import isfile, isdir, join
 from searx import logger
 logger = logger.getChild("https_rewrite")
 # https://gitweb.torproject.org/\
 # pde/https-everywhere.git/tree/4.0:/src/chrome/content/rules
 # HTTPS rewrite rules
 https_rules = []
 # load single ruleset from a xml file
 def load_single_https_ruleset(filepath):
    ruleset = ()
    # init parser
    parser = etree.XMLParser()
    # load and parse xml-file
    try:
        tree = etree.parse(filepath, parser)
    except:
        # TODO, error message
        return ()
    # get root node
    root = tree.getroot()
    # check if root is a node with the name ruleset
    # TODO improve parsing
    if root.tag != 'ruleset':
        return ()
    # check if rule is deactivated by default
    if root.attrib.get('default_off'):
        return ()
    # check if rule does only work for specific platforms
    if root.attrib.get('platform'):
        return ()
    hosts = []
    rules = []
    exclusions = []
    # parse childs from ruleset
    for ruleset in root:
        # this child define a target
        if ruleset.tag == 'target':
            # check if required tags available
            if not ruleset.attrib.get('host'):
                continue
            # convert host-rule to valid regex
            host = ruleset.attrib.get('host')\
                .replace('.', '\.').replace('*', '.*')
            # append to host list
            hosts.append(host)
        # this child define a rule
        elif ruleset.tag == 'rule':
            # check if required tags available
            if not ruleset.attrib.get('from')\
               or not ruleset.attrib.get('to'):
                continue
            # TODO hack, which convert a javascript regex group
            # into a valid python regex group
            rule_from = ruleset.attrib['from'].replace('$', '\\')
            if rule_from.endswith('\\'):
                rule_from = rule_from[:-1]+'$'
            rule_to = ruleset.attrib['to'].replace('$', '\\')
            if rule_to.endswith('\\'):
                rule_to = rule_to[:-1]+'$'
            # TODO, not working yet because of the hack above,
            # currently doing that in webapp.py
            # rule_from_rgx = re.compile(rule_from, re.I)
            # append rule
            try:
                rules.append((re.compile(rule_from, re.I | re.U), rule_to))
            except:
                # TODO log regex error
                continue
        # this child define an exclusion
        elif ruleset.tag == 'exclusion':
            # check if required tags available
            if not ruleset.attrib.get('pattern'):
                continue
            exclusion_rgx = re.compile(ruleset.attrib.get('pattern'))
            # append exclusion
            exclusions.append(exclusion_rgx)
    # convert list of possible hosts to a simple regex
    # TODO compress regex to improve performance
    try:
        target_hosts = re.compile('^(' + '|'.join(hosts) + ')', re.I | re.U)
    except:
        return ()
    # return ruleset
    return (target_hosts, rules, exclusions)
 # load all https rewrite rules
 def load_https_rules(rules_path):
    # check if directory exists
    if not isdir(rules_path):
        logger.error("directory not found: '" + rules_path + "'")
        return
    # search all xml files which are stored in the https rule directory
    xml_files = [join(rules_path, f)
                 for f in listdir(rules_path)
                 if isfile(join(rules_path, f)) and f[-4:] == '.xml']
    # load xml-files
    for ruleset_file in xml_files:
        # calculate rewrite-rules
        ruleset = load_single_https_ruleset(ruleset_file)
        # skip if no ruleset returned
        if not ruleset:
            continue
        # append ruleset
        https_rules.append(ruleset)
    logger.info('{n} rules loaded'.format(n=len(https_rules)))
 def https_url_rewrite(result):
    skip_https_rewrite = False
    # check if HTTPS rewrite is possible
    for target, rules, exclusions in https_rules:
        # check if target regex match with url
        if target.match(result['parsed_url'].netloc):
            # process exclusions
            for exclusion in exclusions:
                # check if exclusion match with url
                if exclusion.match(result['url']):
                    skip_https_rewrite = True
                    break
            # skip https rewrite if required
            if skip_https_rewrite:
                break
            # process rules
            for rule in rules:
                try:
                    new_result_url = rule[0].sub(rule[1], result['url'])
                except:
                    break
                # parse new url
                new_parsed_url = urlparse(new_result_url)
                # continiue if nothing was rewritten
                if result['url'] == new_result_url:
                    continue
                # get domainname from result
                # TODO, does only work correct with TLD's like
                #  asdf.com, not for asdf.com.de
                # TODO, using publicsuffix instead of this rewrite rule
                old_result_domainname = '.'.join(
                    result['parsed_url'].hostname.split('.')[-2:])
                new_result_domainname = '.'.join(
                    new_parsed_url.hostname.split('.')[-2:])
                # check if rewritten hostname is the same,
                # to protect against wrong or malicious rewrite rules
                if old_result_domainname == new_result_domainname:
                    # set new url
                    result['url'] = new_result_url
            # target has matched, do not search over the other rules
            break
    return result
--- a/sources/https_rules/00README
+++ b/sources/https_rules/00README
@ -1,17 +0,0 @@
 <!-- 
 This directory contains web site rewriting rules for the
 HTTPS Everywhere software, available from
 https://www.eff.org/https-everywhere
 These rules were contributed to the project by users and aim to
 enable routine secure access to as many different web sites as
 possible.  They are automatically installed together with the
 HTTPS Everywhere software.  The presence of these rules does not
 mean that an HTTPS Everywhere user accessed, or intended to
 access, any particular web site.
 For information about how to create additional HTTPS Everywhere
 rewriting rules to add support for new sites, please see
 https://www.eff.org/https-everywhere/rulesets
 -->
--- a/sources/https_rules/Bing.xml
+++ b/sources/https_rules/Bing.xml
@ -1,56 +0,0 @@
 <!--
 	For other Microsoft coverage, see Microsoft.xml.
 	CDN buckets:
 		- a134.lm.akamai.net
 			- akam.bing.com
 			- *.mm.bing.net
 	Nonfunctional domains:
 		- m2.cn.bing.com
 		- origin.bj1.bing.com
 		- blogs.bing.com
 	Fully covered domains:
 		- bing.com subdomains:
 			- (www.)
 			- c.bing		(tracking beacons)
 			- cn.bing
 			- h.bing
 			- ssl
 			- testfamilysafety.bing
 			- udc.bing
 			- (www.)bing
 		- *.mm.bing.net
 		- api.bing.com
 -->
 <ruleset name="Bing">
 	<target host="bing.com" />
 	<target host="*.bing.com" />
 	<target host="*.mm.bing.net" />
 	<securecookie host=".*\.bing\.com$" name=".+" />
 	<rule from="^http://((?:c|cn|h|ssl|testfamilysafety|udc|www)\.)?bing\.com/"
 		to="https://$1bing.com/" />
 	<rule from="^http://([^/:@]*)\.mm\.bing\.net/"
 		to="https://$1.mm.bing.com/"/>
 	<rule from="^http://([^/:@]*)\.api\.bing\.net/"
 		to="https://$1.api.bing.com/"/>
 </ruleset>
--- a/sources/https_rules/Dailymotion.xml
+++ b/sources/https_rules/Dailymotion.xml
@ -1,69 +0,0 @@
 <!--
 	Nonfunctional domains:
 		- blog.dailymotion.com
 		- press.dailymotion.com		(shows steaw.com, CN: www.steaw.com)
 		- proxy-46.dailymotion.com
 		- publicite.dailymotion.com
 		- publisher.dailymotion.com	(reset)
 		- vid.ak.dmcdn.net		(403, Akamai)
 		- vid2.ak.dmcdn.net		(504, akamai)
 	Problematic domains:
 		- ak2.static.dailymotion.com	(mismatched, CN: *.dmcdn.net)
 		- support.dmcloud.net		(mismatched, CN: *.zendesk.com)
 	Partially covered domains:
 		- (www.)dailymotion.com
 			- cdn/manifest/video/\w+.mnft 403s
 			- crossdomain.xml breaks videos
 -->
 <ruleset name="Dailymotion (default off)" default_off="breaks some embedded videos">
 	<target host="dailymotion.com" />
 	<!--
 		* for cross-domain cookie.
 					-->
 	<target host="*.dailymotion.com" />
 		<!--
 			https://mail1.eff.org/pipermail/https-everywhere-rules/2012-July/001241.html
 													-->
 		<exclusion pattern="^http://(?:www\.)?dailymotion\.com/(?:cdn/[\w-]+/video/|crossdomain\.xml$)" />
 	<target host="ak2.static.dailymotion.com" />
 	<target host="*.dmcdn.net" />
 	<target host="dmcloud.net" />
 	<target host="*.dmcloud.net" />
 	<!--	Testing wrt embedded breakage.
 		securecookie host="^.*\.dailymotion\.com$" name=".+" /-->
 	<!--
 		Omniture tracking cookies:
 						-->
 	<securecookie host="^\.dailymotion\.com$" name="^s_\w+$" />
 	<securecookie host="^www\.dailymotion\.com$" name=".+" />
 	<rule from="^http://(erroracct\.|www\.)?dailymotion\.com/"
 		to="https://$1dailymotion.com/" />
 	<rule from="^http://(s\d|static(?:\d|s\d-ssl))\.dmcdn\.net/"
 		to="https://$1.dmcdn.net/" />
 	<rule from="^https?://ak2\.static\.dailymotion\.com/"
 		to="https://static1-ssl.dmcdn.net/" />
 	<rule from="^http://(s\.|www\.)?dmcloud\.net/"
 		to="https://$1dmcloud.net/" />
 	<rule from="^https?://support\.dmcloud\.net/"
 		to="https://dmcloud.zendesk.com/" />
 </ruleset>
--- a/sources/https_rules/Deviantart.xml
+++ b/sources/https_rules/Deviantart.xml
@ -1,53 +0,0 @@
 <!--
 	For problematic rules, see Deviantart-mismatches.xml.
 	Other deviantArt rulesets:
 		- Sta.sh.xml
 	ToDo: Find edgecast URL for /(fc|th)\d+.
 	Mixed content:
 		- Images on *.....com from e.deviantart.net *
 	* Secured by us
 -->
 <ruleset name="DeviantArt (pending)" default_off="site operator says not ready yet">
 	<target host="deviantart.com" />
 	<target host="*.deviantart.com" />
 	<target host="deviantart.net" />
 	<target host="*.deviantart.net" />
 	<!--	Not secured by server:
 					-->
 	<!--securecookie host="^\.deviantart\.com$" name="^userinfo$" /-->
 	<securecookie host="^\.deviantart\.com$" name=".*" />
 	<!--	Redirects from com to net, but does so successfully by itself.
 										-->
 	<rule from="^http://([aei]|fc\d\d|s[ht]|th\d\d)\.deviantart\.(com|net)/"
 		to="https://$1.deviantart.$2/" />
 	<!--	This handles everything that isn't in the first rule.
 		Namely, usernames, backend, fc, th, and (www.).
 			These domains present a cert that is only
 		valid for .com.
 			Note that .net isn't used on DA, but.net does
 		redirect to .com, and we shouldn't break what would
 		otherwise work.
 			Mustn't rewrite from https here, as doing so
 		would conflict with the first rule.
 								-->
 	<rule from="^http://([^/:@\.]+\.)?deviantart\.(?:com|net)/"
 		to="https://$1deviantart.com/" />
 </ruleset>
--- a/sources/https_rules/DuckDuckGo.xml
+++ b/sources/https_rules/DuckDuckGo.xml
@ -1,38 +0,0 @@
 <!--
 	Problematic domains:
 		- www.dukgo.com		(mismatched, CN: dukgo.com)
 	Fully covered domains:
 		- (www.)dukgo.com	(www → ^)
 -->
 <ruleset name="DuckDuckGo">
  <target host="duckduckgo.com" />
  <target host="*.duckduckgo.com" />
  <target host="ddg.gg" />
  <target host="duck.co" />
  <target host="i.duck.co" />
 	<target host="dukgo.com" />
 	<target host="www.dukgo.com" />
  <exclusion pattern="^http://(help|meme)\.duckduckgo\.com/" />
 	<securecookie host="^duck\.co$" name=".*"/>
  <rule from="^http://duckduckgo\.com/" to="https://duckduckgo.com/"/>
  <rule from="^http://([^/:@\.]+)\.duckduckgo\.com/" to="https://$1.duckduckgo.com/"/>
 	<!-- TODO: What does ddg.gg/foo do? Runs query foo, redirects to homepage, or error? -->
    <rule from="^http://ddg\.gg/$" to="https://duckduckgo.com/" />
    <rule from="^http://duck\.co/" to="https://duck.co/" />
 	<rule from="^http://i\.duck\.co/"
 		to="https://duckduckgo.com/"/>
 	<rule from="^http://(?:www\.)?dukgo\.com/"
 		to="https://dukgo.com/" />
 </ruleset>
--- a/sources/https_rules/Flickr.xml
+++ b/sources/https_rules/Flickr.xml
@ -1,44 +0,0 @@
 <!--
 	For other Yahoo coverage, see Yahoo.xml.
 	These altnames don't exist:
 		- www.blog.flickr.net
 		- www.code.flickr.net
 -->
 <ruleset name="Flickr">
 	<target host="flic.kr" />
 	<target host="*.flic.kr" />
 	<target host="flickr.com" />
 	<target host="*.flickr.com" />
 	<target host="*.flickr.net" />
 	<target host="*.staticflickr.com" />
 	<!--	Not secured by server:
 					-->
 	<!--securecookie host="^\.flic\.kr$" name="^BX$" /-->
 	<securecookie host="^\.flic\.kr$" name=".+" />
 	<securecookie host=".*\.flickr\.com$" name=".+" />
 	<rule from="^http://flic\.kr/"
 		to="https://flic.kr/" />
 	<rule from="^http://(api\.|www\.)?flickr\.com/"
 		to="https://$1flickr.com/" />
 	<rule from="^http://s(ecure|tatic)\.flickr\.com/"
 		to="https://s$1.flickr.com/" />
 	<rule from="^http://(c2|farm\d+)\.static(\.)?flickr\.com/"
 		to="https://$1.static$2flickr.com/" />
 	<rule from="^http://(blog|code)\.flickr\.net/"
 		to="https://$1.flickr.net/" />
 </ruleset>
--- a/sources/https_rules/Github-Pages.xml
+++ b/sources/https_rules/Github-Pages.xml
@ -1,11 +0,0 @@
 <!--
 	For other GitHub coverage, see Github.xml.
 -->
 <ruleset name="GitHub Pages">
 	<target host="*.github.io" />
 	<rule from="^http://([^/@:\.]+)\.github\.io/"
 		to="https://$1.github.io/" />
 </ruleset>
--- a/sources/https_rules/Github.xml
+++ b/sources/https_rules/Github.xml
@ -1,94 +0,0 @@
 <!--
 	Other GitHub rulesets:
 		- Github-Pages.xml
 		- Guag.es.xml
 		- Speaker_Deck.com.xml
 	CDN buckets:
 		- github-images.s3.amazonaws.com
 		- github.global.ssl.fastly.net
 		- a248.e.akamai.net/assets.github.com/
 		- a248.e.akamai.net/camo.github.com/
 		- s3.amazonaws.com/github/ | d24z2fz21y4fag.cloudfront.net
 		- github.myshopify.com
 	Fully covered domains:
 		- github.com subdomains:
 			- (www.)
 			- assets\d+
 			- assets-cdn
 			- bounty
 			- cloud
 			- f.cloud
 			- codeload
 			- developer
 			- eclipse
 			- enterprise
 			- gist
 			- gist-assets
 			- help
 			- identicons
 			- jobs
 			- mac
 			- mobile
 			- nodeload
 			- octodex
 			- pages
 			- raw
 			- rg3
 			- shop
 			- status
 			- support
 			- training
 			- try
 			- wiki
 			- windows
 		- collector.githubapp.com
 		- githubusercontent.com
 -->
 <ruleset name="GitHub">
 	<target host="github.com" />
 	<target host="*.github.com" />
 	<target host="github.io" />
 	<target host="*.githubusercontent.com" />
 	<target host="collector.githubapp.com" />
 	<!--	Secured by server:
 					-->
 	<!--securecookie host="^github\.com$" name="^(_gh_sess|tz|user_session)$" /-->
 	<!--securecookie host="^\.github\.com$" name="^(dotcom_user|logged_in)$" /-->
 	<!--securecookie host="^enterprise\.github\.com$" name="^(_enterprise_web|request_method)$" /-->
 	<!--securecookie host="^gist\.github\.com$" name="^_gist_session$" /-->
 	<!--securecookie host="^help\.github\.com$" name="^_help_session$" /-->
 	<!--
 		Not secured by server:
 					-->
 	<!--securecookie host="^status\.github\.com$" name="^rack\.session$" /-->
 	<securecookie host="^(?:.*\.)?github\.com$" name=".+" />
 	<rule from="^http://((?:assets\d+|assets-cdn|bounty|cloud|f\.cloud|codeload|developer|eclipse|enterprise|gist|gist-assets|help|identicons|jobs|mac|mobile|nodeload|octodex|pages|raw|rg3|shop|status|support|training|try|wiki|windows|www)\.)?github\.com/"
 		to="https://$1github.com/" />
 	<rule from="^http://collector\.githubapp\.com/"
 		to="https://collector.githubapp.com/" />
 	<rule from="^https?://github\.io/"
 		to="https://pages.github.com/" />
 	<rule from="^http://([^/@:\.]+)\.githubusercontent\.com/"
 		to="https://$1.githubusercontent.com/" />
 </ruleset>
--- a/sources/https_rules/Google-mismatches.xml
+++ b/sources/https_rules/Google-mismatches.xml
@ -1,26 +0,0 @@
 <!--
 	Problematic domains:
 		- (www.)apture.com	(works, mismatched, CN: *.google.com)
 -->
 <ruleset name="Google (mismatches)" default_off="mismatches">
 	<!--	Akamai	-->
 	<target host="js.admeld.com"/>
 	<target host="apture.com" />
 	<target host="www.apture.com" />
 	<target host="googleartproject.com"/>
 	<target host="www.googleartproject.com"/>
 	<rule from="^http://js\.admeld\.com/"
 		to="https://js.admeld.com/"/>
 	<rule from="^https?://(?:www\.)?apture\.com/"
 		to="https://apture.com/" />
 	<rule from="^http://(?:www\.)?googleartproject\.com/"
 		to="https://www.googleartproject.com/"/>
 </ruleset>
--- a/sources/https_rules/Google.org.xml
+++ b/sources/https_rules/Google.org.xml
@ -1,14 +0,0 @@
 <!--
 	For other Google coverage, see GoogleServices.xml.
 -->
 <ruleset name="Google.org">
 	<target host="google.org" />
 	<target host="www.google.org" />
 	<rule from="^http://(www\.)?google\.org/"
 		to="https://$1google.org/" />
 </ruleset>
--- a/sources/https_rules/GoogleAPIs.xml
+++ b/sources/https_rules/GoogleAPIs.xml
@ -1,143 +0,0 @@
 <!--
 	For other Google coverage, see GoogleServices.xml.
 	Nonfunctional domains:
 		- hosted.gmodules.com *
 		- img0.gmodules.com *
 		- p.gmodules.com *
 	* 404; mismatched, CN: *.googleusercontent.com
 	Problematic domains:
 		- gmodules.com			(503, CN: www.google.com)
 		- www.gmodules.com		(503, CN: *.googleusercontent.com)
 		- gstatic.com			(404, valid cert)
 		- api.recaptcha.net		(works; mismatched, CN: google.com)
 	Partially covered domains:
 		- (www.)gmodules.com		(→ www.google.com)
 		- (www.)google.com
 		- chart.apis.google.com		(→ chart.googleapis.com)
 	Fully covered domains:
 		- api.google.com
 		- *.clients.google.com:
 			- linkhelp
 		- ssl.google-analytics.com
 		- www.google-analytics.com
 		- googleapis.com subdomains:
 			- ajax
 			- chart
 			- *.commondatastorage
 			- fonts
 			- *.storage
 			- www
 		- gstatic.com subdomains:
 			- (www.)	(^ → www)
 			- csi
 			- encrypted-tbn\d
 			- g0
 			- *.metric
 			- ssl
 			- t\d
 		- api.recaptcha.net	(→ www.google.com)
 		- api-secure.recaptcha.net
 		- gdata.youtube.com
 	ssl.google-analytics.com/ga.js sets __utm\w wildcard
 	cookies on whichever domain it is loaded from.
 -->
 <ruleset name="Google APIs">
 	<target host="gmodules.com" />
 	<target host="www.gmodules.com" />
 	<target host="google.com" />
 	<target host="apis.google.com" />
 	<target host="*.apis.google.com" />
 	<target host="*.clients.google.com" />
 	<target host="www.google.com" />
 	<target host="*.google-analytics.com" />
 	<target host="*.googleapis.com" />
 	<target host="gstatic.com" />
 	<target host="*.gstatic.com" />
 	<!--	Captive portal detection redirects to this URL, and many captive
 		portals break TLS, so exempt this redirect URL.
 		See GitHub bug #368
 							-->
 		<exclusion pattern="^http://www\.gstatic\.com/generate_204" />
 	<target host="*.recaptcha.net" />
 	<target host="gdata.youtube.com" />
 		<exclusion pattern="^http://gdata\.youtube\.com/crossdomain\.xml" />
 	<securecookie host="^ssl\.google-analytics\.com$" name=".+" />
 	<rule from="^http://(?:www\.)?gmodules\.com/ig/images/"
 		to="https://www.google.com/ig/images/" />
 	<!--	jsapi was causing problems on some sites that embed google maps:
 		https://trac.torproject.org/projects/tor/ticket/2335
 		Apparently now fixed; thanks, Google!
 							-->
 	<rule from="^http://(?:www\.)?google\.com/(afsonline/|chart|jsapi|recaptcha/|uds)"
 		to="https://www.google.com/$1" />
 	<rule from="^http://(api|[\w-]+\.client)s\.google\.com/"
 		to="https://$1s.google.com/" />
 	<rule from="^http://chart\.apis\.google\.com/chart"
 		to="https://chart.googleapis.com/chart" />
 	<rule from="^http://(ssl|www)\.google-analytics\.com/"
 		to="https://$1.google-analytics.com/" />
 	<rule from="^http://(ajax|chart|fonts|www)\.googleapis\.com/"
 		to="https://$1.googleapis.com/" />
 	<rule from="^http://([^@:\./]+\.)?(commondata)?storage\.googleapis\.com/"
 		to="https://$1$2storage.googleapis.com/" />
 	<!--	There is an interesting question about whether we should
 		append &strip=1 to all cache URLs.  This causes them to load
 		without images and styles, which is more secure but can look
 		worse.
 			Without &strip=1, the images and styles from the cached
 		pages still load from the original, typically unencrypted, page.
 			With &strip=1, the cached page will be text-only and
 		will come exclusively from Google's HTTPS server.
 									-->
 	<rule from="^http://(?:www\.)?gstatic\.com/"
 		to="https://www.gstatic.com/" />
 	<rule from="^http://(csi|encrypted-tbn\d|g0|[\w-]+\.metric|ssl|t\d)\.gstatic\.com/"
 		to="https://$1.gstatic.com/" />
 	<rule from="^http://api\.recaptcha\.net/"
 		to="https://www.google.com/recaptcha/api/" />
 	<rule from="^http://api-secure\.recaptcha\.net/"
 		to="https://api-secure.recaptcha.net/" />
 	<rule from="^http://gdata\.youtube\.com/"
 		to="https://gdata.youtube.com/" />
 </ruleset>
--- a/sources/https_rules/GoogleCanada.xml
+++ b/sources/https_rules/GoogleCanada.xml
@ -1,6 +0,0 @@
 <ruleset name="GoogleCanada">
 	<target host="google.ca" />
 	<target host="*.google.ca" />
 	<rule from="^http://([^/:@\.]+)\.google\.ca/finance" to="https://$1.google.ca/finance"/>
 </ruleset>
--- a/sources/https_rules/GoogleImages.xml
+++ b/sources/https_rules/GoogleImages.xml
@ -1,65 +0,0 @@
 <!--
 	For other Google coverage, see GoogleServices.xml.
 	Problematic domains:
 		- www.google.bo *
 		- www.google.co *
 		- www.google.ec *
 		- www.google.in *
 		- www.google.kr *
 		- www.google.com.kz **
 		- www.google.com.lk *
 		- www.google.mx **
 		- www.google.sg *
 		- www.google.sl *
 		- www.google.ug *
 		- www.google.vn *
 	* 404; mismatched, CN: google.com
 	** Works; mismatched, CN: google.com
 -->
 <ruleset name="Google Images">
 	<target host="google.*" />
 	<target host="www.google.*" />
 	<target host="google.co.*" />
 	<target host="www.google.co.*" />
 	<target host="google.com" />
 	<target host="images.google.com" />
 	<target host="google.com.*" />
 	<target host="www.google.com.*" />
 		<!--
 			Only handle image-related paths in this ruleset:
 										-->
 		<exclusion pattern="^http://(?:www\.)?google(?:\.com?)?\.\w{2,3}/(?!(?:advanced_image_search|imghp|.*tb(?:m=isch|s=sbi)))" />
 	<rule from="^http://(?:www\.)?google\.com/"
 		to="https://www.google.com/" />
 	<rule from="^http://images\.google\.com/"
 		to="https://images.google.com/" />
 	<!--	First handle problematic domains:
 							-->
 	<rule from="^http://(?:www\.)?google\.co/"
 		to="https://www.google.com/" />
 	<rule from="^http://(?:www\.)?google\.(?:co\.)?(in|kr|ug)/"
 		to="https://www.google.co.$1/" />
 	<rule from="^http://(?:www\.)?google\.(?:com\.)?(kz|lk)/"
 		to="https://www.google.$1/" />
 	<rule from="^http://(?:www\.)?google\.(?:com\.)?(bo|ec|mx|sg|sl|vn)/"
 		to="https://www.google.com.$1/" />
 	<!--	And then the rest:
 					-->
 	<rule from="^http://(?:www\.)?google\.(com?\.)?(ae|ar|at|au|bg|bh|br|ca|ch|cl|co|cr|cu|de|eg|es|fi|fr|gh|gt|hr|id|ie|il|it|jo|jp|jm|ke|kw|lb|ly|my|na|ng|nl|no|nz|om|pa|pe|pk|pl|pt|py|qa|ro|ru|rw|sa|se|sv|th|tr|uk|uy|ve|za|zw)/"
 		to="https://www.google.$1$2/" />
 </ruleset>
--- a/sources/https_rules/GoogleMainSearch.xml
+++ b/sources/https_rules/GoogleMainSearch.xml
@ -1,78 +0,0 @@
 <ruleset name="Search www.google.com">
 <!-- 
 Enabling this ruleset should cause searches to go to
 https://www.google.com rather than https://encrypted.google.com.  Note that
 the filename is important; it must be before GoogleSearch.xml in a bash
 expansion of src/chrome/content/rules/*.xml in order to take precedence. 
 -->
  <target host="*.google.com" />
  <target host="google.com" />
  <target host="www.google.com.*" />
  <target host="google.com.*" />
  <target host="www.google.co.*" />
  <target host="google.co.*" />
  <target host="www.google.*" />
  <target host="google.*" />
  <!-- beyond clients1 these do not currently exist in the ccTLDs,
       but just in case... -->
  <target host="clients1.google.com.*" />
  <target host="clients2.google.com.*" />
  <target host="clients3.google.com.*" />
  <target host="clients4.google.com.*" />
  <target host="clients5.google.com.*" />
  <target host="clients6.google.com.*" />
  <target host="clients1.google.co.*" />
  <target host="clients2.google.co.*" />
  <target host="clients3.google.co.*" />
  <target host="clients4.google.co.*" />
  <target host="clients5.google.co.*" />
  <target host="clients6.google.co.*" />
  <target host="clients1.google.*" />
  <target host="clients2.google.*" />
  <target host="clients3.google.*" />
  <target host="clients4.google.*" />
  <target host="clients5.google.*" />
  <target host="clients6.google.*" />
  <rule from="^http://www\.google\.com/$"
          to="https://www.google.com/"/>
  <!-- The most basic case. -->
  <rule from="^http://(?:www\.)?google\.com/search"
          to="https://www.google.com/search"/>
  <!-- A very annoying exception that we seem to need for the basic case -->
  <exclusion pattern="^http://(?:www\.)?google\.com/search.*tbs=shop" />
  <exclusion pattern="^http://clients[0-9]\.google\.com/.*client=products.*" />
  <exclusion pattern="^http://suggestqueries\.google\.com/.*client=.*" />
  <!-- https://trac.torproject.org/projects/tor/ticket/9713 -->
  <exclusion pattern="^http://clients[0-9]\.google\.com/ocsp" />
  <!-- This is necessary for image results links from web search results -->
  <exclusion pattern="^http://(?:www\.)?google\.com/search.*tbm=isch.*" />
  <rule from="^http://(?:www\.)?google\.com/webhp"
          to="https://www.google.com/webhp"/>
  <rule from="^http://(?:www\.)?google\.com/#"
          to="https://www.google.com/#"/>
  <rule from="^http://(?:www\.)?google\.com/$"
          to="https://www.google.com/"/>
   <!-- Completion urls look like this:
 http://clients2.google.co.jp/complete/search?hl=ja&client=hp&expIds=17259,24660,24729,24745&q=m&cp=1 HTTP/1.1\r\n
   -->
  <rule from="^http://clients[0-9]\.google\.com/complete/search"
          to="https://clients1.google.com/complete/search"/>
 </ruleset>
--- a/sources/https_rules/GoogleMaps.xml
+++ b/sources/https_rules/GoogleMaps.xml
@ -1,67 +0,0 @@
 <!--
 	Problematic domains:
 		- khms *
 		- khms[0-3] *
 	* $ 404s
 	Fully covered domains:
 		- google.com subdomains:
 			- khms
 			- khms[0-3]
 -->
 <ruleset name="Google Maps">
 	<target host="maps.google.*" />
 		<!--
 			https://trac.torproject.org/projects/tor/ticket/8627
 										-->
 		<exclusion pattern="^http://maps\.google\.com/local_url" />
 		<exclusion pattern="^http://maps\.google\.gr/transitathens" />
 	<target host="maps.google.co.*" />
 	<target host="khms.google.com" />
 	<target host="khms0.google.com" />
 	<target host="khms1.google.com" />
 	<target host="khms2.google.com" />
 	<target host="khms3.google.com" />
 	<target host="maps-api-ssl.google.com" />
 	<target host="mw2.google.com" />
 	<target host="maps.google.com.*" />
 	<target host="maps.googleapis.com" />
 		<!--
 			https://mail1.eff.org/pipermail/https-everywhere-rules/2012-September/001317.html
 														-->
 		<!--exclusion pattern="^http://maps\.googleapis\.com/map(files/lib/map_1_20\.swf|sapi/publicapi\?file=flashapi)" /-->
 		<exclusion pattern="^http://maps\.googleapis\.com/map(?:files/lib/map_\d+_\d+\.swf|sapi/publicapi\?file=flashapi)" />
 	<target host="maps.gstatic.com" />
 	<!--securecookie host="^maps\.google\.(com?\.)?(au|ca|gh|ie|in|jm|ke|lk|my|n[agz]|pk|rw|sl|sg|ug|uk|za|zw)$" name=".+" /-->
 	<securecookie host="^maps\.google\.[\w.]{2,6}$" name=".+" />
 	<securecookie host="^maps\.g(?:oogle|oogleapis|static)\.com$" name=".+" />
 	<securecookie host="^maps-api-ssl\.google\.com$" name=".+" />
 	<rule from="^http://maps\.google\.([^/]+)/"
 		to="https://maps.google.$1/" />
 	<!--	http://khms.../$ 404s:
 					-->
 	<rule from="^http://khms\d?\.google\.com/+\??$"
 		to="https://www.google.com/" />
 	<rule from="^http://(khms\d?|maps-api-ssl|mw2)\.google\.com/"
 		to="https://$1.google.com/" />
 	<rule from="^http://maps\.g(oogleapis|static)\.com/"
 		to="https://maps.g$1.com/" />
 	<rule from="^https://maps\.googleapis\.com/map(?=files/lib/map_\d+_\d+\.swf|sapi/publicapi\?file=flashapi)"
 		to="http://maps.googleapis.com/map" downgrade="1" />
 </ruleset>
--- a/sources/https_rules/GoogleMelange.xml
+++ b/sources/https_rules/GoogleMelange.xml
@ -1,6 +0,0 @@
 <ruleset name="GoogleMelange">
  <target host="www.google-melange.com" />
  <target host="google-melange.com" />
  <rule from="^http://(www\.)?google-melange\.com/" to="https://www.google-melange.com/" />
 </ruleset>
--- a/sources/https_rules/GoogleSearch.xml
+++ b/sources/https_rules/GoogleSearch.xml
@ -1,135 +0,0 @@
 <ruleset name="Google Search">
 	<target host="google.com" />
 	<target host="*.google.com" />
 	<target host="google.com.*" />
 	<target host="www.google.com.*" />
 	<target host="google.co.*" />
 	<target host="www.google.co.*" />
 	<target host="google.*" />
 	<target host="www.google.*" />
 	<!--
 		Beyond clients1 these do not currently
 		exist in the ccTLDs, but just in case...
 							-->
 	<target host="clients1.google.com.*" />
 	<target host="clients2.google.com.*" />
 	<target host="clients3.google.com.*" />
 	<target host="clients4.google.com.*" />
 	<target host="clients5.google.com.*" />
 	<target host="clients6.google.com.*" />
 	<target host="clients1.google.co.*" />
 	<target host="clients2.google.co.*" />
 	<target host="clients3.google.co.*" />
 	<target host="clients4.google.co.*" />
 	<target host="clients5.google.co.*" />
 	<target host="clients6.google.co.*" />
 	<target host="clients1.google.*" />
 	<target host="clients2.google.*" />
 	<target host="clients3.google.*" />
 	<target host="clients4.google.*" />
 	<target host="clients5.google.*" />
 	<target host="clients6.google.*" />
 	<!--	Some Google pages can generate naive links back to the
 		unencrypted version of encrypted.google.com, which is
 		a 301 but theoretically vulnerable to SSL stripping.
 									-->
 	<rule from="^http://encrypted\.google\.com/"
 		to="https://encrypted.google.com/" />
 	<!--	The most basic case.
 					-->
 	<rule from="^http://(?:www\.)?google\.com/search"
 		to="https://encrypted.google.com/search" />
 	<!--	A very annoying exception that we
 		seem to need for the basic case
 						-->
 	<exclusion pattern="^http://(?:www\.)?google\.com/search.*tbs=shop" />
 	<exclusion pattern="^http://clients\d\.google\.com/.*client=products.*" />
 	<exclusion pattern="^http://suggestqueries\.google\.com/.*client=.*" />
  <!-- https://trac.torproject.org/projects/tor/ticket/9713 
         -->
  <exclusion pattern="^http://clients[0-9]\.google\.com/ocsp" />
 	<!--	This is necessary for image results
 		links from web search results
 						-->
 	<exclusion pattern="^http://(?:www\.)?google\.com/search.*tbm=isch.*" />
 	<rule from="^http://(?:www\.)?google\.com/about"
 		to="https://www.google.com/about" />
 	<!--	There are two distinct cases for these firefox searches	-->
 	<rule from="^http://(?:www\.)?google(?:\.com?)?\.[a-z]{2}/firefox/?$"
 		to="https://encrypted.google.com/" />
 	<rule from="^http://(?:www\.)?google(?:\.com?)?\.[a-z]{2}/firefox"
 		to="https://encrypted.google.com/webhp" />
 	<rule from="^http://(?:www\.)?google\.com/webhp"
 		to="https://encrypted.google.com/webhp" />
 	<rule from="^http://codesearch\.google\.com/"
 		to="https://codesearch.google.com/" />
 	<rule from="^http://(?:www\.)?google\.com/codesearch"
 		to="https://www.google.com/codesearch" />
 	<rule from="^http://(?:www\.)?google\.com/#"
 		to="https://encrypted.google.com/#" />
 	<rule from="^http://(?:www\.)?google\.com/$"
 		to="https://encrypted.google.com/" />
 	<!--	Google supports IPv6 search, including
 		HTTPS with a valid certificate!	-->
 	<rule from="^http://ipv6\.google\.com/"
 		to="https://ipv6.google.com/" />
 	<!--	most google international sites look like
 		"google.fr", some look like "google.co.jp",
 		and some crazy ones like "google.com.au"	-->
 	<rule from="^http://(www\.)?google(\.com?)?\.([a-z]{2})/(search\?|#)"
 		to="https://$1google$2.$3/$4" />
 	<!--	Language preference setting	-->
 	<rule from="^http://(www\.)?google(\.com?)?\.([a-z]{2})/setprefs"
 	to="https://$1google$2.$3/setprefs" />
 	<!--	Completion urls look like this:
 http://clients2.google.co.jp/complete/search?hl=ja&client=hp&expIds=17259,24660,24729,24745&q=m&cp=1 HTTP/1.1\r\n
 		-->
 	<rule from="^http://clients\d\.google\.com/complete/search"
 		to="https://clients1.google.com/complete/search" />
 	<rule from="^http://clients\d\.google(\.com?\.[a-z]{2})/complete/search"
 		to="https://clients1.google.$1/complete/search" />
 	<rule from="^http://clients\d\.google\.([a-z]{2})/complete/search"
 		to="https://clients1.google.$1/complete/search" />
 	<rule from="^http://suggestqueries\.google\.com/complete/search"
 		to="https://clients1.google.com/complete/search" />
 	<rule from="^http://(www\.)?google\.(com?\.)?([a-z]{2})/(?:webhp)?$"
 		to="https://$1google.$2$3/" />
 	<!--	If there are URL parameters, keep them.	-->
 	<rule from="^http://(www\.)?google\.(com?\.)?([a-z]{2})/(?:webhp)?\?"
 		to="https://$1google.$2$3/webhp?" />
 	<!-- teapot -->
 	<rule from="^http://(www\.)?google(\.com?)?\.([a-z]{2})/teapot"
 		to="https://$1google$2.$3/teapot" />
 </ruleset>
--- a/sources/https_rules/GoogleServices.xml
+++ b/sources/https_rules/GoogleServices.xml
@ -1,345 +0,0 @@
 <!--
 	Other Google rulesets:
 		- 2mdn.net.xml
 		- Admeld.xml
 		- ChannelIntelligence.com.xml
 		- Doubleclick.net.xml
 		- FeedBurner.xml
 		- Google.org.xml
 		- GoogleAPIs.xml
 		- Google_App_Engine.xml
 		- GoogleImages.xml
 		- GoogleShopping.xml
 		- Ingress.xml
 		- Meebo.xml
 		- Orkut.xml
 		- Postini.xml
 		- WebM_Project.org.xml
 	Nonfunctional domains:
 		- feedproxy.google.com			(404, valid cert)
 		- partnerpage.google.com *
 		- safebrowsing.clients.google.com	(404, mismatched)
 		- (www.)googlesyndicatedsearch.com	(404; mismatched, CN: google.com)
 		- buttons.googlesyndication.com *
 	* 404, valid cert
 	Nonfunctional google.com paths:
 		- analytics	(redirects to http)
 		- imgres
 		- gadgets *
 		- hangouts	(404)
 		- u/		(404)
 	* Redirects to http
 	Problematic domains:
 		- www.goo.gl		(404; mismatched, CN: *.google.com)
 		- google.com subdomains:
 			- books		(googlebooks/, images/, & intl/ 404, but works when rewritten to www)
 			- cbks0 ****
 			- earth *
 			- gg		($ 404s)
 			- knoll *
 			- scholar **
 			- trends *
 		- news.google.cctld **
 		- scholar.google.cctld **
 		- *-opensocial.googleusercontent.com ***
 	**** $ 404s
 	* 404, valid cert
 	** Redirects to http, valid cert
 	*** Breaks followers widget - https://trac.torproject.org/projects/tor/ticket/7294
 	Partially covered domains:
 		- google.cctld subdomains:
 			- scholar	(→ www)
 		- google.com subdomains:
 			- (www.)
 			- cbks0		($ 404s)
 			- gg		($ 404s)
 			- news		(→ www)
 			- scholar	(→ www)
 		- *.googleusercontent.com	(*-opensocial excluded)
 	Fully covered domains:
 		- lh[3-6].ggpht.com
 		- (www.)goo.gl		(www → ^)
 		- google.com subdomains:
 			- accounts
 			- adwords
 			- apis
 			- appengine
 			- books		(→ encrypted)
 			- calendar
 			- checkout
 			- chrome
 			- clients[12]
 			- code
 			- *.corp
 			- developers
 			- dl
 			- docs
 			- docs\d
 			- \d.docs
 			- drive
 			- earth		(→ www)
 			- encrypted
 			- encrypted-tbn[123]
 			- feedburner
 			- fiber
 			- finance
 			- glass
 			- groups
 			- health
 			- helpouts
 			- history
 			- hostedtalkgadget
 			- id
 			- investor
 			- knol
 			- knoll		(→ knol)
 			- lh\d
 			- mail
 			- chatenabled.mail
 			- pack
 			- picasaweb
 			- pki
 			- play
 			- plus
 			- plusone
 			- productforums
 			- profiles
 			- safebrowsing-cache
 			- cert-test.sandbox
 			- plus.sandbox
 			- sb-ssl
 			- script
 			- security
 			- services
 			- servicessites
 			- sites
 			- spreadsheets
 			- spreadsheets\d
 			- support
 			- talk
 			- talkgadget
 			- tbn2			(→ encrypted-tbn2)
 			- tools
 			- trends		(→ www)
 		- partner.googleadservices.com
 		- (www.)googlecode.com
 		- *.googlecode.com	(per-project subdomains)
 		- googlesource.com
 		- *.googlesource.com
 		- pagead2.googlesyndication.com
 		- tpc.googlesyndication.com
 		- mail-attachment.googleusercontent.com
 		- webcache.googleusercontent.com
 	XXX: Needs more testing
 -->
 <ruleset name="Google Services">
 	<target host="*.ggpht.com" />
 	<target host="gmail.com" />
 	<target host="www.gmail.com" />
 	<target host="goo.gl" />
 	<target host="www.goo.gl" />
 	<target host="google.*" />
 	<target host="accounts.google.*" />
 	<target host="adwords.google.*" />
 	<target host="finance.google.*" />
 	<target host="groups.google.*" />
 	<target host="it.google.*" />
 	<target host="news.google.*" />
 		<exclusion pattern="^http://(?:news\.)?google\.com/(?:archivesearch|newspapers)" />
 	<target host="picasaweb.google.*" />
 	<target host="scholar.google.*" />
 	<target host="www.google.*" />
 	<target host="*.google.ca" />
 	<target host="google.co.*" />
 	<target host="accounts.google.co.*" />
 	<target host="adwords.google.co.*" />
 	<target host="finance.google.co.*" />
 	<target host="groups.google.co.*" />
 	<target host="id.google.co.*" />
 	<target host="news.google.co.*" />
 	<target host="picasaweb.google.co.*" />
 	<target host="scholar.google.co.*" />
 	<target host="www.google.co.*" />
 	<target host="google.com" />
 	<target host="*.google.com" />
 		<exclusion pattern="^http://(?:www\.)?google\.com/analytics/*(?:/[^/]+)?(?:\?.*)?$" />
 		<!--exclusion pattern="^http://books\.google\.com/(?!books/(\w+\.js|css/|javascript/)|favicon\.ico|googlebooks/|images/|intl/)" /-->
 		<exclusion pattern="^http://cbks0\.google\.com/(?:$|\?)" />
 		<exclusion pattern="^http://gg\.google\.com/(?!csi(?:$|\?))" />
 	<target host="google.com.*" />
 	<target host="accounts.google.com.*" />
 	<target host="adwords.google.com.*" />
 	<target host="groups.google.com.*" />
 	<target host="id.google.com.*" />
 	<target host="news.google.com.*" />
 	<target host="picasaweb.google.com.*" />
 	<target host="scholar.google.com.*" />
 	<target host="www.google.com.*" />
 	<target host="partner.googleadservices.com" />
 	<target host="googlecode.com" />
 	<target host="*.googlecode.com" />
 	<target host="googlemail.com" />
 	<target host="www.googlemail.com" />
 	<target host="googlesource.com" />
 	<target host="*.googlesource.com" />
 	<target host="*.googlesyndication.com" />
 	<target host="www.googletagservices.com" />
 	<target host="googleusercontent.com" />
 	<target host="*.googleusercontent.com" />
 		<!--
 			Necessary for the Followers widget:
 				 https://trac.torproject.org/projects/tor/ticket/7294
 											-->
 		<exclusion pattern="http://[^@:\./]+-opensocial\.googleusercontent\.com" />
 	<!--	Can we secure any of these wildcard cookies safely?
 									-->
 	<!--securecookie host="^\.google\.com$" name="^(hl|I4SUserLocale|NID|PREF|S)$" /-->
 	<!--securecookie host="^\.google\.[\w.]{2,6}$" name="^(hl|I4SUserLocale|NID|PREF|S|S_awfe)$" /-->
 	<securecookie host="^(?:accounts|adwords|\.code|login\.corp|developers|docs|\d\.docs|fiber|mail|picasaweb|plus|\.?productforums|support)\.google\.[\w.]{2,6}$" name=".+" />
 	<securecookie host="^www\.google\.com$" name="^GoogleAccountsLocale_session$" />
 	<securecookie host="^mail-attachment\.googleusercontent\.com$" name=".+" />
 	<securecookie host="^gmail\.com$" name=".+" />
 	<securecookie host="^www\.gmail\.com$" name=".+" />
 	<securecookie host="^googlemail\.com$" name=".+" />
 	<securecookie host="^www\.googlemail\.com$" name=".+" />
 	<!--    - lh 3-6 exist
 		- All appear identical
 		- Identical to lh\d.googleusercontent.com
 					-->
 	<rule from="^http://lh(\d)\.ggpht\.com/"
 		to="https://lh$1.ggpht.com/" />
 	<rule from="^http://lh(\d)\.google\.ca/"
 		to="https://lh$1.google.ca/" />
 	<rule from="^http://(www\.)?g(oogle)?mail\.com/"
 		to="https://$1g$2mail.com/" />
 	<rule from="^http://(?:www\.)?goo\.gl/"
 		to="https://goo.gl/" />
 	<!--	Redirects to http when rewritten to www:
 							-->
 	<rule from="^http://books\.google\.com/"
 		to="https://encrypted.google.com/" />
 	<!--	tisp$ 404s:
 				-->
 	<rule from="^http://(?:www\.)?google\.((?:com?\.)?\w{2,3})/tisp(?=$|\?)"
 		to="https://www.google.$1/tisp/" />
 	<!--	Paths that work on all in google.*
 							-->
 	<rule from="^http://(?:www\.)?google\.((?:com?\.)?\w{2,3})/(accounts|adplanner|ads|adsense|adwords|analytics|bookmarks|chrome|contacts|coop|cse|css|culturalinstitute|doodles|earth|favicon\.ico|finance|get|goodtoknow|googleblogs|grants|green|hostednews|images|intl|js|landing|logos|mapmaker|newproducts|news|nexus|patents|policies|prdhp|profiles|products|reader|s2|settings|shopping|support|tisp|tools|transparencyreport|trends|urchin|webmasters)(?=$|[?/])"
 		 to="https://www.google.$1/$2" />
 	<!--	Paths that 404 on .ccltd, but work on .com:
 								-->
 	<rule from="^http://(?:www\.)?google\.(?:com?\.)?\w{2,3}/(?=calendar|dictionary|doubleclick|help|ideas|pacman|postini|powermeter|url)"
 		 to="https://www.google.com/" />
 	<rule from="^http://(?:www\.)?google\.(?:com?\.)?\w{2,3}/custom"
 		 to="https://www.google.com/cse" />
 	<!--	Paths that only exist/work on .com
 							-->
 	<rule from="^http://(?:www\.)?google\.com/(\+|appsstatus|books|buzz|extern_js|glass|googlebooks|ig|insights|moderator|phone|safebrowsing|videotargetting|webfonts)(?=$|[?/])"
 		to="https://www.google.com/$1" />
 	<!--	Subdomains that work on all in google.*
 							-->
 	<rule from="^http://(accounts|adwords|finance|groups|id|picasaweb|)\.google\.((?:com?\.)?\w{2,3})/"
 		to="https://$1.google.$2/" />
 	<!--	Subdomains that only exist/work on .com
 							-->
 	<rule from="^http://(apis|appengine|books|calendar|cbks0|chat|checkout|chrome|clients[12]|code|[\w-]+\.corp|developers|dl|docs\d?|\d\.docs|drive|encrypted|encrypted-tbn[123]|feedburner|fiber|fonts|gg|glass||health|helpouts|history|(?:hosted)?talkgadget|investor|lh\d|(?:chatenabled\.)?mail|pack|pki|play|plus(?:\.sandbox)?|plusone|productforums|profiles|safebrowsing-cache|cert-test\.sandbox|sb-ssl|script|security|services|servicessites|sites|spreadsheets\d?|support|talk|tools)\.google\.com/"
 		to="https://$1.google.com/" />
 	<exclusion pattern="^http://clients[0-9]\.google\.com/ocsp"/>
 	<rule from="^http://earth\.google\.com/"
 		to="https://www.google.com/earth/" />
 	<rule from="^http://scholar\.google\.((?:com?\.)?\w{2,3})/intl/"
 		to="https://www.google.$1/intl/" />
 	<rule from="^http://(?:encrypted-)?tbn2\.google\.com/"
 		to="https://encrypted-tbn2.google.com/" />
 	<rule from="^http://knoll?\.google\.com/"
 		to="https://knol.google.com/" />
 	<rule from="^http://news\.google\.(?:com?\.)?\w{2,3}/(?:$|news|newshp)"
 		to="https://www.google.com/news" />
 	<rule from="^http://trends\.google\.com/"
 		 to="https://www.google.com/trends" />
 	<rule from="^http://([^/:@\.]+\.)?googlecode\.com/"
 		 to="https://$1googlecode.com/" />
 	<rule from="^http://([^\./]\.)?googlesource\.com/"
 		to="https://$1googlesource.com/" />
 	<rule from="^http://partner\.googleadservices\.com/"
 		 to="https://partner.googleadservices.com/" />
 	<rule from="^http://(pagead2|tpc)\.googlesyndication\.com/"
 		 to="https://$1.googlesyndication.com/" />
 	<!--	!www doesn't exist.
 					-->
 	<rule from="^http://www\.googletagservices\.com/tag/js/"
 		to="https://www.googletagservices.com/tag/js/" />
 	<rule from="^http://([^@:\./]+)\.googleusercontent\.com/"
 		to="https://$1.googleusercontent.com/" />
 </ruleset>
--- a/sources/https_rules/GoogleShopping.xml
+++ b/sources/https_rules/GoogleShopping.xml
@ -1,28 +0,0 @@
 <!--
 	For other Google coverage, see GoogleServices.xml.
 -->
 <ruleset name="Google Shopping">
 	<target host="google.*" />
 	<target host="www.google.*" />
 	<target host="google.co.*" />
 	<target host="www.google.co.*" />
 	<target host="*.google.com" />
 	<target host="google.com.*" />
 	<target host="www.google.com.*" />
 	<rule from="^http://encrypted\.google\.com/(prdhp|shopping)" 
 		to="https://www.google.com/$1" />
 	<rule from="^http://shopping\.google\.com/"
 		to="https://shopping.google.com/" />
 	<rule from="^http://(?:encrypted|www)\.google\.com/(.*tbm=shop)"
 		to="https://www.google.com/$1" />
 	<rule from="^http://(?:www\.)?google\.((?:com?\.)?(?:ae|ar|at|au|bg|bh|bo|br|ca|ch|cl|cr|co|cu|de|ec|eg|es|fi|fr|gh|gt|hr|id|ie|il|in|it|jm|jo|jp|ke|kr|kw|kz|lb|lk|ly|mx|my|na|ng|nl|no|nz|om|pa|pe|pk|pl|pt|py|qa|ro|ru|rw|sa|sg|sl|se|sv|th|tr|ug|uk|uy|ve|vn|za|zw))/(?=prdhp|shopping)"
 		to="https://www.google.com/$1" />
 </ruleset>
--- a/sources/https_rules/GoogleSorry.xml
+++ b/sources/https_rules/GoogleSorry.xml
@ -1,7 +0,0 @@
 <ruleset name="GoogleSorry">
  <target host="sorry.google.com" />
  <target host="www.google.com" />
  <target host="google.com" />
  <rule from="^http://((sorry|www)\.)?google\.com/sorry/" to="https://sorry.google.com/sorry/" />
 </ruleset>
--- a/sources/https_rules/GoogleTranslate.xml
+++ b/sources/https_rules/GoogleTranslate.xml
@ -1,8 +0,0 @@
 <ruleset name="Google Translate (broken)" default_off="redirect loops">
  <target host="translate.googleapis.com" />
  <target host="translate.google.com" />
  <rule from="^http://translate\.googleapis\.com/" to="https://translate.googleapis.com/"/>
  <rule from="^http://translate\.google\.com/"
      to="https://translate.google.com/" />
 </ruleset>
--- a/sources/https_rules/GoogleVideos.xml
+++ b/sources/https_rules/GoogleVideos.xml
@ -1,83 +0,0 @@
 <ruleset name="Google Videos">
  <target host="*.google.com" />
  <target host="google.com" />
  <target host="www.google.com.*" />
  <target host="google.com.*" />
  <target host="www.google.co.*" />
  <target host="google.co.*" />
  <target host="www.google.*" />
  <target host="google.*" />
  <rule from="^http://encrypted\.google\.com/videohp" 
          to="https://encrypted.google.com/videohp" />
  <!-- https://videos.google.com is currently broken; work around that... -->
  <rule from="^https?://videos?\.google\.com/$"
          to="https://encrypted.google.com/videohp" />
  <rule from="^http://(?:www\.)?google\.com/videohp"
 	  to="https://encrypted.google.com/videohp" />
  <rule from="^http://(?:images|www|encrypted)\.google\.com/(.*tbm=isch)"
          to="https://encrypted.google.com/$1" />
  <rule
   from="^http://(?:www\.)?google\.(?:com?\.)?(?:au|ca|gh|ie|in|jm|ke|lk|my|na|ng|nz|pk|rw|sl|sg|ug|uk|za|zw)/videohp"
     to="https://encrypted.google.com/videohp" />
  <rule
   from="^http://(?:www\.)?google\.(?:com?\.)?(?:ar|bo|cl|co|cu|cr|ec|es|gt|mx|pa|pe|py|sv|uy|ve)/videohp$"
    to="https://encrypted.google.com/videohp?hl=es" />
  <rule
   from="^http://(?:www\.)?google\.(?:com\.)?(?:ae|bh|eg|jo|kw|lb|ly|om|qa|sa)/videohp$"
     to="https://encrypted.google.com/videohp?hl=ar" />
  <rule from="^http://(?:www\.)?google\.(?:at|ch|de)/videohp$"
          to="https://encrypted.google.com/videohp?hl=de" />
  <rule from="^http://(?:www\.)?google\.(fr|nl|it|pl|ru|bg|pt|ro|hr|fi|no)/videohp$"
          to="https://encrypted.google.com/videohp?hl=$1" />
  <rule from="^http://(?:www\.)?google\.com?\.(id|th|tr)/videohp$"
          to="https://encrypted.google.com/videohp?hl=$1" />
  <rule from="^http://(?:www\.)?google\.com\.il/videohp$"
          to="https://encrypted.google.com/videohp?hl=he" />
  <rule from="^http://(?:www\.)?google\.com\.kr/videohp$"
          to="https://encrypted.google.com/videohp?hl=ko" />
  <rule from="^http://(?:www\.)?google\.com\.kz/videohp$"
          to="https://encrypted.google.com/videohp?hl=kk" />
  <rule from="^http://(?:www\.)?google\.com\.jp/videohp$"
          to="https://encrypted.google.com/videohp?hl=ja" />
  <rule from="^http://(?:www\.)?google\.com\.vn/videohp$"
          to="https://encrypted.google.com/videohp?hl=vi" />
  <rule from="^http://(?:www\.)?google\.com\.br/videohp$"
          to="https://encrypted.google.com/videohp?hl=pt-BR" />
  <rule from="^http://(?:www\.)?google\.se/videohp$"
          to="https://encrypted.google.com/videohp?hl=sv" />
 <!-- If there are URL parameters, keep them. -->
  <rule
   from="^http://(?:www\.)?google\.(?:com?\.)?(?:ar|bo|cl|co|cu|cr|ec|es|gt|mx|pa|pe|py|sv|uy|ve)/videohp\?"
    to="https://encrypted.google.com/videohp?hl=es&#38;" />
  <rule
   from="^http://(?:www\.)?google\.(?:com\.)?(?:ae|bh|eg|jo|kw|lb|ly|om|qa|sa)/videohp\?"
     to="https://encrypted.google.com/videohp?hl=ar&#38;" />
  <rule from="^http://(?:www\.)?google\.(?:at|ch|de)/videohp\?"
          to="https://encrypted.google.com/videohp?hl=de&#38;" />
  <rule from="^http://(?:www\.)?google\.(fr|nl|it|pl|ru|bg|pt|ro|hr|fi|no)/videohp\?"
          to="https://encrypted.google.com/videohp?hl=$1&#38;" />
  <rule from="^http://(?:www\.)?google\.com?\.(id|th|tr)/videohp\?"
          to="https://encrypted.google.com/videohp?hl=$1&#38;" />
  <rule from="^http://(?:www\.)?google\.com\.il/videohp\?"
          to="https://encrypted.google.com/videohp?hl=he&#38;" />
  <rule from="^http://(?:www\.)?google\.com\.kr/videohp\?"
          to="https://encrypted.google.com/videohp?hl=ko&#38;" />
  <rule from="^http://(?:www\.)?google\.com\.kz/videohp\?"
          to="https://encrypted.google.com/videohp?hl=kk&#38;" />
  <rule from="^http://(?:www\.)?google\.com\.jp/videohp\?"
          to="https://encrypted.google.com/videohp?hl=ja&#38;" />
  <rule from="^http://(?:www\.)?google\.com\.vn/videohp\?"
          to="https://encrypted.google.com/videohp?hl=vi&#38;" />
  <rule from="^http://(?:www\.)?google\.com\.br/videohp\?"
          to="https://encrypted.google.com/videohp?hl=pt-BR&#38;" />
  <rule from="^http://(?:www\.)?google\.se/videohp\?"
          to="https://encrypted.google.com/videohp?hl=sv&#38;" />
 	<rule from="^http://video\.google\.com/ThumbnailServer2"
 		to="https://video.google.com/ThumbnailServer2" />
 </ruleset>
--- a/sources/https_rules/GoogleWatchBlog.xml
+++ b/sources/https_rules/GoogleWatchBlog.xml
@ -1,17 +0,0 @@
 <!--
 	gwbhrd.appspot.com
 -->
 <ruleset name="GoogleWatchBlog">
 	<target host="googlewatchblog.de" />
 	<target host="*.googlewatchblog.de" />
 	<securecookie host="^(?:www)?\.googlewatchblog\.de$" name=".+" />
 	<rule from="^http://(static\.|www\.)?googlewatchblog\.de/"
 		to="https://$1googlewatchblog.de/" />
 </ruleset>
--- a/sources/https_rules/Google_App_Engine.xml
+++ b/sources/https_rules/Google_App_Engine.xml
@ -1,21 +0,0 @@
 <!--
 	For other Google coverage, see GoogleServices.xml.
 -->
 <ruleset name="Google App Engine">
 	<target host="appspot.com" />
 	<target host="*.appspot.com" />
 		<!--
 			Redirects to http for some reason.
 								-->
 		<exclusion pattern="^http://photomunchers\.appspot\.com/" />
 	<securecookie host="^.+\.appspot\.com$" name=".+" />
 	<rule from="^http://([^@:\./]+\.)?appspot\.com/"
 		 to="https://$1appspot.com/" />
 </ruleset>
--- a/sources/https_rules/Googleplex.com.xml
+++ b/sources/https_rules/Googleplex.com.xml
@ -1,16 +0,0 @@
 <!-- This rule was automatically generated based on an HSTS
     preload rule in the Chromium browser.  See 
     https://src.chromium.org/viewvc/chrome/trunk/src/net/base/transport_security_state.cc
     for the list of preloads.  Sites are added to the Chromium HSTS
     preload list on request from their administrators, so HTTPS should
     work properly everywhere on this site.
     Because Chromium and derived browsers automatically force HTTPS for
     every access to this site, this rule applies only to Firefox. -->
 <ruleset name="Googleplex.com (default off)" platform="firefox" default_off="Certificate error">
  <target host="googleplex.com" />
  <securecookie host="^googleplex\.com$" name=".+" />
  <rule from="^http://googleplex\.com/" to="https://googleplex.com/" />
 </ruleset>
--- a/sources/https_rules/OpenStreetMap.xml
+++ b/sources/https_rules/OpenStreetMap.xml
@ -1,15 +0,0 @@
 <ruleset name="OpenStreetMap">
 	<target host="openstreetmap.org"/>
 	<target host="*.openstreetmap.org"/>
 	<rule from="^http://(?:www\.)?openstreetmap\.org/"
 		to="https://www.openstreetmap.org/"/>
 	<rule from="^http://tile\.openstreetmap\.org/"
 		to="https://a.tile.openstreetmap.org/"/>
 	<rule from="^http://(blog|help|lists|nominatim|piwik|taginfo|[abc]\.tile|trac|wiki)\.openstreetmap\.org/"
 		to="https://$1.openstreetmap.org/"/>
 </ruleset>
--- a/sources/https_rules/Rawgithub.com.xml
+++ b/sources/https_rules/Rawgithub.com.xml
@ -1,14 +0,0 @@
 <!--
 	www: cert only matches ^rawgithub.com
 -->
 <ruleset name="rawgithub.com">
 	<target host="rawgithub.com" />
 	<target host="www.rawgithub.com" />
 	<rule from="^http://(?:www\.)?rawgithub\.com/"
 		to="https://rawgithub.com/" />
 </ruleset>
--- a/sources/https_rules/Soundcloud.xml
+++ b/sources/https_rules/Soundcloud.xml
@ -1,101 +0,0 @@
 <!--
 	CDN buckets:
 		- akmedia-a.akamaihd.net
 		- soundcloud.assistly.com
 			- help.soundcloud.com
 		- cs70.wac.edgecastcdn.net
 			- a1.sndcdn.com
 			- i1.sndcdn.com
 			- w1.sndcdn.com
 		- wpc.658D.edgecastcdn.net
 		- m-a.sndcdn.com.edgesuite.net 
 		- soundcloud.gettyimages.com
 		- scbackstage.wpengine.netdna-cdn.com
 			- ssl doesn't exist
 			- backstage.soundcloud.com
 		- soundcloud.wpengine.netdna-cdn.com
 			- -ssl doesn't exist
 			- blog.soundcloud.com
 		- gs1.wpc.v2cdn.netcdn.net
 		- gs1.wpc.v2cdn.net
 			- ec-media.soundcloud.com
 	Nonfunctional soundcloud.com subdomains:
 		- help		(redirects to http, mismatched, CN: *.assistly.com)
 		- m		(redirects to http)
 		- media
 		- status	(times out)
 	Problematic domains:
 		- m-a.sndcdn.com	(works, akamai)
 	Partially covered domains:
 		- backstage.soundcloud.com
 	Fully covered domains:
 		- sndcdn.com subdomains:
 			- a[12]
 			- api
 			- i[1-4]
 			- w[12]
 			- wis
 		- soundcloud.com subdomains:
 			- (www.)
 			- api
 			- blog
 			- connect
 			- developers
 			- ec-media
 			- eventlogger
 			- help-assets
 			- media
 			- visuals
 			- w
 -->
 <ruleset name="Soundcloud (partial)">
 	<target host="scbackstage.wpengine.netdna-cdn.com" />
 	<target host="soundcloud.wpengine.netdna-cdn.com" />
 	<target host="*.sndcdn.com" />
 	<target host="soundcloud.com" />
 	<target host="*.soundcloud.com" />
 		<exclusion pattern="^https?://(?:scbackstage\.wpengine\.netdna-cdn|backstage\.soundcloud)\.com/(?!wp-content/)" />
 	<rule from="^http://([aiw]\d|api|wis)\.sndcdn\.com/"
 		to="https://$1.sndcdn.com/" />
 	<rule from="^http://((?:api|backstage|blog|connect|developers|ec-media|eventlogger|help-assets|media|visuals|w|www)\.|)soundcloud\.com/"
 		to="https://$1soundcloud.com/" />
 	<rule from="^https?://scbackstage\.wpengine\.netdna-cdn\.com/"
 		to="https://backstage.soundcloud.com/" />
 	<rule from="^https?://soundcloud\.wpengine\.netdna-cdn\.com/"
 		to="https://blog.soundcloud.com/" />
 </ruleset>
--- a/sources/https_rules/ThePirateBay.xml
+++ b/sources/https_rules/ThePirateBay.xml
@ -1,36 +0,0 @@
 <!--
  Nonfunctional:
    - image.bayimg.com
    - (www.)thepiratebay.sx		(http reply)
  For problematic rules, see ThePirateBay-mismatches.xml.
 -->
 <ruleset name="The Pirate Bay (partial)">
  <target host="suprbay.org" />
  <target host="*.suprbay.org" />
  <!--	* for cross-domain cookie	-->
  <target host="*.forum.suprbay.org" />
  <target host="thepiratebay.org"/>
  <target host="*.thepiratebay.org"/>
  <target host="thepiratebay.se"/>
  <target host="*.thepiratebay.se"/>
  <securecookie host="^.*\.suprbay\.org$" name=".*" />
  <securecookie host="^(.*\.)?thepiratebay\.se$" name=".*"/>
  <!--	Cert doesn't match (www.), redirects like so.	-->
  <rule from="^https?://(?:forum\.|www\.)?suprbay\.org/"
    to="https://forum.suprbay.org/" />
  <rule from="^http://(?:www\.)?thepiratebay\.(?:org|se)/"
    to="https://thepiratebay.se/"/>
  <rule from="^http://(rss|static|torrents)\.thepiratebay\.(?:org|se)/"
    to="https://$1.thepiratebay.se/"/>
 </ruleset>
--- a/sources/https_rules/Torproject.xml
+++ b/sources/https_rules/Torproject.xml
@ -1,18 +0,0 @@
 <ruleset name="Tor Project">
 	<target host="torproject.org" />
 	<target host="*.torproject.org" />
 		<exclusion pattern="^http://torperf\.torproject\.org/" />
 	<!--	Not secured by server:
 					-->
 	<!--securecookie host="^\.blog\.torproject\.org$" name="^SESS[0-9a-f]{32}$" /-->
 	<securecookie host="^(?:.*\.)?torproject\.org$" name=".+" />
 	<rule from="^http://([^/:@\.]+\.)?torproject\.org/"
 		 to="https://$1torproject.org/" />
 </ruleset>
--- a/sources/https_rules/Twitter.xml
+++ b/sources/https_rules/Twitter.xml
@ -1,169 +0,0 @@
 <!--
 	Other Twitter rulesets:
 		- Twitter_Community.com.xml
 	Nonfunctional domains:
 		- status.twitter.com *
 		- status.twitter.jp *
 	* Tumblr
 	CDN buckets:
 		- a1095.g.akamai.net/=/1095/134446/1d/platform.twitter.com/ | platform2.twitter.com.edgesuite.net
 			- platform2.twitter.com
 		- twitter-any.s3.amazonaws.com
 		- twitter-blog.s3.amazonaws.com
 		- d2rdfnizen5apl.cloudfront.net
 			- s.twimg.com
 		- ssl2.twitter.com.edgekey.net
 		- twitter.github.com
 	Problematic domains:
 		- twimg.com subdomains:
 			- a5 *
 			- s		(cloudfront)
 		- twitter.com subdomains:
 			- platform[0-3]		(403, akamai)
 	* akamai
 	Fully covered domains:
 		- (www.)t.co		(www → ^)
 		- twimg.com subdomains:
 			- a[5-9]	(→ si0)
 			- a\d
 			- abs
 			- dnt
 			- ea
 			- g
 			- g2
 			- gu
 			- hca
 			- jp
 			- ma
 			- ma[0123]
 			- o
 			- p
 			- pbs
 			- r
 			- s		(→ d2rdfnizen5apl.cloudfront.net)
 			- si[0-5]
 			- syndication
 			- cdn.syndication
 			- tailfeather
 			- ton
 			- v
 			- widgets
 		- twitter.com subdomains:
 			- (www.)
 			- 201[012]
 			- about
 			- ads
 			- analytics
 			- api
 			- cdn.api
 			- urls.api
 			- blog
 			- business
 			- preview.cdn
 			- preview-dev.cdn
 			- preview-stage.cdn
 			- de
 			- dev
 			- en
 			- engineering
 			- es
 			- firefox
 			- fr
 			- it
 			- ja
 			- jp
 			- m
 			- media
 			- mobile
 			- music
 			- oauth
 			- p
 			- pic
 			- platform
 			- platform[0-3]		(→ platform)
 			- widgets.platform
 			- search
 			- static
 			- support
 			- transparency
 			- upload
 	These altnames don't exist:
 		- i3.twimg.com
 		- p-dev.twimg.com
 		- vmtc.twimg.com
 		- cdn-dev.api.twitter.com
 -->
 <ruleset name="Twitter">
 	<target host="t.co" />
 	<target host="*.t.co" />
 	<target host="*.twimg.com" />
 	<target host="twitter.com" />
 	<target host="*.twitter.com" />
 	<!--	Secured by server:
 					-->
 	<!--securecookie host="^\.twitter\.com$" name="^_twitter_sess$" /-->
 	<!--securecookie host="^support\.twitter\.com$" name="^_help_center_session$" /-->
 	<!--
 		Not secured by server:
 					-->
 	<!--securecookie host="^\.t\.co$" name="^muc$" /-->
 	<!--securecookie host="^\.twitter\.com$" name="^guest_id$" /-->
 	<securecookie host="^\.t\.co$" name=".+" />
 	<securecookie host="^(?:.*\.)?twitter\.com$" name=".+" />
 	<rule from="^http://(?:www\.)?t\.co/"
 		to="https://t.co/" />
 	<rule from="^http://a[5-9]\.twimg\.com/"
 		to="https://si0.twimg.com/" />
 	<rule from="^http://(abs|a\d|dnt|ea|g[2u]?|hca|jp|ma\d?|o|p|pbs|r|si\d|(?:cdn\.)?syndication|tailfeather|ton|v|widgets)\.twimg\.com/"
 		to="https://$1.twimg.com/" />
 	<rule from="^http://s\.twimg\.com/"
 		to="https://d2rdfnizen5apl.cloudfront.net/" />
 	<rule from="^http://((?:201\d|about|ads|analytics|blog|(?:cdn\.|urls\.)?api|business|preview(?:-dev|-stage)?\.cdn|de|dev|engineering|en|es|firefox|fr|it|ja|jp|m|media|mobile|music|oauth|p|pic|platform|widgets\.platform|search|static|support|transparency|upload|www)\.)?twitter\.com/"
 		to="https://$1twitter.com/" />
 	<rule from="^http://platform\d\.twitter\.com/"
 		to="https://platform.twitter.com/" />
 </ruleset>
--- a/sources/https_rules/Vimeo.xml
+++ b/sources/https_rules/Vimeo.xml
@ -1,75 +0,0 @@
 <!--
 	CDN buckets:
 		- av.vimeo.com.edgesuite.net
 			- a808.g.akamai.net
 		- pdl.vimeocdn.com.edgesuite.net
 			- a1189.g.akamai.net
 	Problematic subdomains:
 		- av	(pdl.../crossdomain.xml restricts to port 80)
 		- pdl	(works, akamai)
 	Partially covered subdomains:
 		- developer	(some pages redirect to http)
 		- pdl		(→ akamai)
 	Fully covered subdomains:
 		- (www.)
 		- secure
 Default off per https://trac.torproject.org/projects/tor/ticket/7569 -->
 <ruleset name="Vimeo (default off)" default_off="breaks some video embedding">
 	<target host="vimeo.com" />
 	<target host="*.vimeo.com" />
 		<exclusion pattern="^http://av\.vimeo\.com/crossdomain\.xml" />
 		<!--exclusion pattern="^http://developer\.vimeo\.com/($|\?|(apps|guidelines|help|player)($|[?/]))" /-->
 		<exclusion pattern="^http://developer\.vimeo\.com/(?!apis(?:$|[?/])|favicon\.ico)" />
 	<target host="*.vimeocdn.com" />
 		<!--
 			Uses crossdomain.xml from s3.amazonaws.com, which sets secure="false"
 				https://mail1.eff.org/pipermail/https-everywhere/2012-October/001583.html
 			-->
 		<exclusion pattern="^http://a\.vimeocdn\.com/p/flash/moogaloop/" />
 		<!--	We cannot secure streams because crossdomain.xml
 			restricts to port 80 :(
 						-->
 		<exclusion pattern="^http://pdl\.vimeocdn\.com/(?!crossdomain\.xml)" />
 	<!--	Tracking cookies:
 					-->
 	<securecookie host="^\.(?:player\.)?vimeo\.com$" name="^__utm\w$" />
 	<rule from="^http://((?:developer|player|secure|www)\.)?vimeo\.com/"
 		to="https://$1vimeo.com/" />
 	<rule from="^http://av\.vimeo\.com/"
 		to="https://a248.e.akamai.net/f/808/9207/8m/av.vimeo.com/" />
 	<!--	a & b: Akamai	-->
 	<rule from="^http://(?:secure-)?([ab])\.vimeocdn\.com/"
 		to="https://secure-$1.vimeocdn.com/" />
 	<rule from="^http://i\.vimeocdn\.com/"
 		to="https://i.vimeocdn.com/" />
 	<rule from="^http://pdl\.vimeocdn\.com/"
 		to="https://a248.e.akamai.net/f/1189/4415/8d/pdl.vimeocdn.com/" />
 </ruleset>
--- a/sources/https_rules/WikiLeaks.xml
+++ b/sources/https_rules/WikiLeaks.xml
@ -1,13 +0,0 @@
 <ruleset name="WikiLeaks">
 	<target host="wikileaks.org" />
 	<target host="*.wikileaks.org" />
 	<securecookie host="^(?:w*\.)?wikileaks\.org$" name=".+" />
 	<rule from="^http://((?:chat|search|shop|www)\.)?wikileaks\.org/"
 		to="https://$1wikileaks.org/" />
 </ruleset>
--- a/sources/https_rules/Wikimedia.xml
+++ b/sources/https_rules/Wikimedia.xml
@ -1,107 +0,0 @@
 <!--
 	Wikipedia and other Wikimedia Foundation wikis previously had no real HTTPS support, and
 	URLs had to be rewritten to https://secure.wikimedia.org/$wikitype/$language/ . This is no
 	longer the case, see https://blog.wikimedia.org/2011/10/03/native-https-support-enabled-for-all-wikimedia-foundation-wikis/ ,
 	so this file is a lot simpler these days.
 	Mixed content:
 		- Images, on:
 			- stats.wikimedia.org from upload.wikimedia.org *
 			- stats.wikimedia.org from wikimediafoundation.org *
 	* Secured by us
 -->
 <ruleset name="Wikimedia">
 	<target host="enwp.org" />
 	<target host="frwp.org" />
 	<target host="mediawiki.org" />
 	<target host="www.mediawiki.org" />
 	<target host="wikimedia.org" />
 	<target host="*.wikimedia.org" />
 		<exclusion pattern="^http://(?:apt|cs|cz|parsoid-lb\.eqiad|status|torrus|ubuntu)\.wikimedia\.org" />
 		<!-- https://mail1.eff.org/pipermail/https-everywhere-rules/2012-June/001189.html -->
 		<exclusion pattern="^http://lists\.wikimedia\.org/pipermail(?:$|/)" />
 	<target host="wikimediafoundation.org" />
 	<target host="www.wikimediafoundation.org" />
 	<!-- Wikimedia projects (also some wikimedia.org subdomains) -->
 	<target host="wikibooks.org" />
 	<target host="*.wikibooks.org" />
 	<target host="wikidata.org" />
 	<target host="*.wikidata.org" />
 	<target host="wikinews.org" />
 	<target host="*.wikinews.org" />
 	<target host="wikipedia.org" />
 	<target host="*.wikipedia.org" />
 	<target host="wikiquote.org" />
 	<target host="*.wikiquote.org" />
 	<target host="wikisource.org" />
 	<target host="*.wikisource.org" />
 	<target host="wikiversity.org" />
 	<target host="*.wikiversity.org" />
 	<target host="wikivoyage.org" />
 	<target host="*.wikivoyage.org" />
 	<target host="wiktionary.org" />
 	<target host="*.wiktionary.org" />
 	<!-- Wikimedia chapters -->
 	<target host="wikimedia.ca" />
 	<target host="www.wikimedia.ca" />
 	<!-- Wikimedia Tool Labs -->
 	<target host="tools.wmflabs.org" />
 	<target host="icinga.wmflabs.org" />
 	<target host="ganglia.wmflabs.org" />
 	<!--	Not secured by server:
 					-->
 	<!--securecookie host="^\.wiki(books|ipedia)\.org$" name="^GeoIP$" /-->
 	<securecookie host="^^\.wik(?:ibooks|idata|imedia|inews|ipedia|iquote|isource|iversity|ivoyage|tionary)\.org$" name="^GeoIP$" />
 	<securecookie host="^([^@:/]+\.)?wik(ibooks|idata|inews|ipedia|iquote|isource|iversity|ivoyage|tionary)\.org$" name=".*" />
 	<securecookie host="^(species|commons|meta|incubator|wikitech).wikimedia.org$" name=".*" />
 	<securecookie host="^(?:www\.)?mediawiki\.org$" name=".*" />
 	<securecookie host="^wikimediafoundation.org$" name=".*" />
 	<rule from="^http://(en|fr)wp\.org/"
 		to="https://$1.wikipedia.org/wiki/" />
 	<rule from="^http://(?:www\.)?mediawiki\.org/"
 		to="https://www.mediawiki.org/" />
 	<rule from="^https?://download\.wikipedia\.org/"
 		to="https://dumps.wikimedia.org/" />
 	<rule from="^https?://(download|dataset2|sitemap)\.wikimedia\.org/"
 		to="https://dumps.wikimedia.org/" />
 	<rule from="^https?://(labs-ns[01]|virt0)\.wikimedia\.org/"
 		to="https://wikitech.wikimedia.org/" />	
 	<rule from="^https?://noboard\.chapters\.wikimedia\.org/"
 		to="https://noboard-chapters.wikimedia.org/" />
 	<rule from="^https?://wg\.en\.wikipedia\.org/"
 		to="https://wg-en.wikipedia.org/" />
 	<rule from="^https?://arbcom\.(de|en|fi|nl)\.wikipedia\.org/"
 		to="https://arbcom-$1.wikipedia.org/" />
 	<rule from="^http://([^@:/]+\.)?wik(ibooks|idata|imedia|inews|ipedia|iquote|isource|iversity|ivoyage|tionary)\.org/"
 		to="https://$1wik$2.org/" />
 	<rule from="^http://(www\.)?wikimediafoundation\.org/"
 		to="https://$1wikimediafoundation.org/" />
 	<rule from="^http://(www\.)?wikimedia\.ca/"
 		to="https://wikimedia.ca/" />
 	<rule from="^http://([^@:/]+)\.wmflabs\.org/"
 		to="https://$1.wmflabs.org/" />
 </ruleset>
--- a/sources/https_rules/Yahoo.xml
+++ b/sources/https_rules/Yahoo.xml
--- a/sources/https_rules/YouTube.xml
+++ b/sources/https_rules/YouTube.xml
@ -1,46 +0,0 @@
 <ruleset name="YouTube (partial)">
 	<target host="youtube.com" />
 	<target host="*.youtube.com" />
 		<exclusion pattern="^http://(?:www\.)?youtube\.com/crossdomain\.xml"/>
 		<exclusion pattern="^http://(?:www\.)?youtube\.com/(?:apiplayer|api_video_info)"/>
        <exclusion pattern="^http://(?:[^/@:\.]+\.)?ytimg\.com/.*apiplayer[0-9]*\.swf"/>
 	<target host="*.ytimg.com" />
 	<target host="youtu.be" />
 	<target host="youtube-nocookie.com"/>
 	<target host="www.youtube-nocookie.com"/>
 	<target host="*.googlevideo.com"/>
                <exclusion pattern="^http://([^/@:\.]+)\.googlevideo\.com/crossdomain\.xml"/>
 	<!--	Not secured by server:
 					-->
 	<!--securecookie host="^\.youtube\.com$" name="^(GEUP|PREF|VISITOR_INFO1_LIVE|YSC)$" /-->
 	<!--	observed ^. cookies:
 			- use_hitbox
 			- VISITOR_INFO1_LIVE
 			- recently_watched_video_id_list
 			- .youtube.com		-->
 	<securecookie host="^\.youtube\.com" name=".*"/>
 	<rule from="^http://(www\.)?youtube\.com/"
 		to="https://$1youtube.com/"/>
 	<rule from="^http://(br|de|es|fr|il|img|insight|jp|m|nl|uk)\.youtube\.com/"
 		to="https://$1.youtube.com/"/>
 	<rule from="^http://([^/@:\.]+)\.ytimg\.com/"
 		to="https://$1.ytimg.com/"/>
 	<rule from="^http://youtu\.be/"
 		to="https://youtu.be/"/>
 	<rule from="^http://(?:www\.)?youtube-nocookie\.com/"
 		to="https://www.youtube-nocookie.com/"/>
 	<rule from="^http://([^/@:\.]+)\.googlevideo\.com/"
 	        to="https://$1.googlevideo.com/"/>
 </ruleset>
--- a/sources/languages.py
+++ b/sources/languages.py
@ -1,77 +0,0 @@
 '''
 searx is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 searx is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.
 You should have received a copy of the GNU Affero General Public License
 along with searx. If not, see < http://www.gnu.org/licenses/ >.
 (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
 '''
 # list of language codes
 language_codes = (
    ("ar_XA", "Arabic", "Arabia"),
    ("bg_BG", "Bulgarian", "Bulgaria"),
    ("cs_CZ", "Czech", "Czech Republic"),
    ("de_DE", "German", "Germany"),
    ("da_DK", "Danish", "Denmark"),
    ("de_AT", "German", "Austria"),
    ("de_CH", "German", "Switzerland"),
    ("el_GR", "Greek", "Greece"),
    ("en_AU", "English", "Australia"),
    ("en_CA", "English", "Canada"),
    ("en_GB", "English", "United Kingdom"),
    ("en_ID", "English", "Indonesia"),
    ("en_IE", "English", "Ireland"),
    ("en_IN", "English", "India"),
    ("en_MY", "English", "Malaysia"),
    ("en_NZ", "English", "New Zealand"),
    ("en_PH", "English", "Philippines"),
    ("en_SG", "English", "Singapore"),
    ("en_US", "English", "United States"),
    ("en_XA", "English", "Arabia"),
    ("en_ZA", "English", "South Africa"),
    ("es_AR", "Spanish", "Argentina"),
    ("es_CL", "Spanish", "Chile"),
    ("es_ES", "Spanish", "Spain"),
    ("es_MX", "Spanish", "Mexico"),
    ("es_US", "Spanish", "United States"),
    ("es_XL", "Spanish", "Latin America"),
    ("et_EE", "Estonian", "Estonia"),
    ("fi_FI", "Finnish", "Finland"),
    ("fr_BE", "French", "Belgium"),
    ("fr_CA", "French", "Canada"),
    ("fr_CH", "French", "Switzerland"),
    ("fr_FR", "French", "France"),
    ("he_IL", "Hebrew", "Israel"),
    ("hr_HR", "Croatian", "Croatia"),
    ("hu_HU", "Hungarian", "Hungary"),
    ("it_IT", "Italian", "Italy"),
    ("ja_JP", "Japanese", "Japan"),
    ("ko_KR", "Korean", "Korea"),
    ("lt_LT", "Lithuanian", "Lithuania"),
    ("lv_LV", "Latvian", "Latvia"),
    ("nb_NO", "Norwegian", "Norway"),
    ("nl_BE", "Dutch", "Belgium"),
    ("nl_NL", "Dutch", "Netherlands"),
    ("pl_PL", "Polish", "Poland"),
    ("pt_BR", "Portuguese", "Brazil"),
    ("pt_PT", "Portuguese", "Portugal"),
    ("ro_RO", "Romanian", "Romania"),
    ("ru_RU", "Russian", "Russia"),
    ("sk_SK", "Slovak", "Slovak Republic"),
    ("sl_SL", "Slovenian", "Slovenia"),
    ("sv_SE", "Swedish", "Sweden"),
    ("th_TH", "Thai", "Thailand"),
    ("tr_TR", "Turkish", "Turkey"),
    ("uk_UA", "Ukrainian", "Ukraine"),
    ("zh_CN", "Chinese", "China"),
    ("zh_HK", "Chinese", "Hong Kong SAR"),
    ("zh_TW", "Chinese", "Taiwan"))
--- a/sources/poolrequests.py
+++ b/sources/poolrequests.py
@ -1,61 +0,0 @@
 import requests
 the_http_adapter = requests.adapters.HTTPAdapter(pool_connections=100)
 the_https_adapter = requests.adapters.HTTPAdapter(pool_connections=100)
 class SessionSinglePool(requests.Session):
    def __init__(self):
        global the_https_adapter, the_http_adapter
        super(SessionSinglePool, self).__init__()
        # reuse the same adapters
        self.adapters.clear()
        self.mount('https://', the_https_adapter)
        self.mount('http://', the_http_adapter)
    def close(self):
        """Call super, but clear adapters since there are managed globaly"""
        self.adapters.clear()
        super(SessionSinglePool, self).close()
 def request(method, url, **kwargs):
    """same as requests/requests/api.py request(...) except it use SessionSinglePool"""
    session = SessionSinglePool()
    response = session.request(method=method, url=url, **kwargs)
    session.close()
    return response
 def get(url, **kwargs):
    kwargs.setdefault('allow_redirects', True)
    return request('get', url, **kwargs)
 def options(url, **kwargs):
    kwargs.setdefault('allow_redirects', True)
    return request('options', url, **kwargs)
 def head(url, **kwargs):
    kwargs.setdefault('allow_redirects', False)
    return request('head', url, **kwargs)
 def post(url, data=None,  **kwargs):
    return request('post', url, data=data, **kwargs)
 def put(url, data=None, **kwargs):
    return request('put', url, data=data, **kwargs)
 def patch(url, data=None, **kwargs):
    return request('patch', url, data=data, **kwargs)
 def delete(url, **kwargs):
    return request('delete', url, **kwargs)
--- a/sources/query.py
+++ b/sources/query.py
@ -1,132 +0,0 @@
 #!/usr/bin/env python
 '''
 searx is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 searx is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.
 You should have received a copy of the GNU Affero General Public License
 along with searx. If not, see < http://www.gnu.org/licenses/ >.
 (C) 2014 by Thomas Pointhuber, <thomas.pointhuber@gmx.at>
 '''
 from searx.languages import language_codes
 from searx.engines import (
    categories, engines, engine_shortcuts
 )
 import string
 import re
 class Query(object):
    """parse query"""
    def __init__(self, query, blocked_engines):
        self.query = query
        self.blocked_engines = []
        if blocked_engines:
            self.blocked_engines = blocked_engines
        self.query_parts = []
        self.engines = []
        self.languages = []
        self.specific = False
    # parse query, if tags are set, which
    # change the serch engine or search-language
    def parse_query(self):
        self.query_parts = []
        # split query, including whitespaces
        raw_query_parts = re.split(r'(\s+)', self.query)
        parse_next = True
        for query_part in raw_query_parts:
            if not parse_next:
                self.query_parts[-1] += query_part
                continue
            parse_next = False
            # part does only contain spaces, skip
            if query_part.isspace()\
               or query_part == '':
                parse_next = True
                self.query_parts.append(query_part)
                continue
            # this force a language
            if query_part[0] == ':':
                lang = query_part[1:].lower()
                # check if any language-code is equal with
                # declared language-codes
                for lc in language_codes:
                    lang_id, lang_name, country = map(str.lower, lc)
                    # if correct language-code is found
                    # set it as new search-language
                    if lang == lang_id\
                       or lang_id.startswith(lang)\
                       or lang == lang_name\
                       or lang.replace('_', ' ') == country:
                        parse_next = True
                        self.languages.append(lang)
                        break
            # this force a engine or category
            if query_part[0] == '!' or query_part[0] == '?':
                prefix = query_part[1:].replace('_', ' ')
                # check if prefix is equal with engine shortcut
                if prefix in engine_shortcuts:
                    parse_next = True
                    self.engines.append({'category': 'none',
                                         'name': engine_shortcuts[prefix]})
                # check if prefix is equal with engine name
                elif prefix in engines:
                    parse_next = True
                    self.engines.append({'category': 'none',
                                         'name': prefix})
                # check if prefix is equal with categorie name
                elif prefix in categories:
                    # using all engines for that search, which
                    # are declared under that categorie name
                    parse_next = True
                    self.engines.extend({'category': prefix,
                                         'name': engine.name}
                                        for engine in categories[prefix]
                                        if (engine.name, prefix) not in self.blocked_engines)
            if query_part[0] == '!':
                self.specific = True
            # append query part to query_part list
            self.query_parts.append(query_part)
    def changeSearchQuery(self, search_query):
        if len(self.query_parts):
            self.query_parts[-1] = search_query
        else:
            self.query_parts.append(search_query)
    def getSearchQuery(self):
        if len(self.query_parts):
            return self.query_parts[-1]
        else:
            return ''
    def getFullQuery(self):
        # get full querry including whitespaces
        return string.join(self.query_parts, '')
--- a/sources/search.py
+++ b/sources/search.py
@ -1,556 +0,0 @@
 '''
 searx is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 searx is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.
 You should have received a copy of the GNU Affero General Public License
 along with searx. If not, see < http://www.gnu.org/licenses/ >.
 (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
 '''
 import threading
 import re
 import searx.poolrequests as requests_lib
 from itertools import izip_longest, chain
 from operator import itemgetter
 from Queue import Queue
 from time import time
 from urlparse import urlparse, unquote
 from searx.engines import (
    categories, engines
 )
 from searx.languages import language_codes
 from searx.utils import gen_useragent, get_blocked_engines
 from searx.query import Query
 from searx import logger
 logger = logger.getChild('search')
 number_of_searches = 0
 def search_request_wrapper(fn, url, engine_name, **kwargs):
    try:
        return fn(url, **kwargs)
    except:
        # increase errors stats
        engines[engine_name].stats['errors'] += 1
        # print engine name and specific error message
        logger.exception('engine crash: {0}'.format(engine_name))
        return
 def threaded_requests(requests):
    timeout_limit = max(r[2]['timeout'] for r in requests)
    search_start = time()
    for fn, url, request_args, engine_name in requests:
        request_args['timeout'] = timeout_limit
        th = threading.Thread(
            target=search_request_wrapper,
            args=(fn, url, engine_name),
            kwargs=request_args,
            name='search_request',
        )
        th._engine_name = engine_name
        th.start()
    for th in threading.enumerate():
        if th.name == 'search_request':
            remaining_time = max(0.0, timeout_limit - (time() - search_start))
            th.join(remaining_time)
            if th.isAlive():
                logger.warning('engine timeout: {0}'.format(th._engine_name))
 # get default reqest parameter
 def default_request_params():
    return {
        'method': 'GET',
        'headers': {},
        'data': {},
        'url': '',
        'cookies': {},
        'verify': True
    }
 # create a callback wrapper for the search engine results
 def make_callback(engine_name, results_queue, callback, params):
    # creating a callback wrapper for the search engine results
    def process_callback(response, **kwargs):
        # check if redirect comparing to the True value,
        # because resp can be a Mock object, and any attribut name returns something.
        if response.is_redirect is True:
            logger.debug('{0} redirect on: {1}'.format(engine_name, response))
            return
        response.search_params = params
        timeout_overhead = 0.2  # seconds
        search_duration = time() - params['started']
        timeout_limit = engines[engine_name].timeout + timeout_overhead
        if search_duration > timeout_limit:
            engines[engine_name].stats['page_load_time'] += timeout_limit
            engines[engine_name].stats['errors'] += 1
            return
        # callback
        search_results = callback(response)
        # add results
        for result in search_results:
            result['engine'] = engine_name
        results_queue.put_nowait((engine_name, search_results))
        # update stats with current page-load-time
        engines[engine_name].stats['page_load_time'] += search_duration
    return process_callback
 # return the meaningful length of the content for a result
 def content_result_len(content):
    if isinstance(content, basestring):
        content = re.sub('[,;:!?\./\\\\ ()-_]', '', content)
        return len(content)
    else:
        return 0
 # score results and remove duplications
 def score_results(results):
    # calculate scoring parameters
    flat_res = filter(
        None, chain.from_iterable(izip_longest(*results.values())))
    flat_len = len(flat_res)
    engines_len = len(results)
    results = []
    # pass 1: deduplication + scoring
    for i, res in enumerate(flat_res):
        res['parsed_url'] = urlparse(res['url'])
        res['host'] = res['parsed_url'].netloc
        if res['host'].startswith('www.'):
            res['host'] = res['host'].replace('www.', '', 1)
        res['engines'] = [res['engine']]
        weight = 1.0
        # strip multiple spaces and cariage returns from content
        if res.get('content'):
            res['content'] = re.sub(' +', ' ',
                                    res['content'].strip().replace('\n', ''))
        # get weight of this engine if possible
        if hasattr(engines[res['engine']], 'weight'):
            weight = float(engines[res['engine']].weight)
        # calculate score for that engine
        score = int((flat_len - i) / engines_len) * weight + 1
        # check for duplicates
        duplicated = False
        for new_res in results:
            # remove / from the end of the url if required
            p1 = res['parsed_url'].path[:-1]\
                if res['parsed_url'].path.endswith('/')\
                else res['parsed_url'].path
            p2 = new_res['parsed_url'].path[:-1]\
                if new_res['parsed_url'].path.endswith('/')\
                else new_res['parsed_url'].path
            # check if that result is a duplicate
            if res['host'] == new_res['host'] and\
               unquote(p1) == unquote(p2) and\
               res['parsed_url'].query == new_res['parsed_url'].query and\
               res.get('template') == new_res.get('template'):
                duplicated = new_res
                break
        # merge duplicates together
        if duplicated:
            # using content with more text
            if content_result_len(res.get('content', '')) >\
                    content_result_len(duplicated.get('content', '')):
                duplicated['content'] = res['content']
            # increase result-score
            duplicated['score'] += score
            # add engine to list of result-engines
            duplicated['engines'].append(res['engine'])
            # using https if possible
            if duplicated['parsed_url'].scheme == 'https':
                continue
            elif res['parsed_url'].scheme == 'https':
                duplicated['url'] = res['parsed_url'].geturl()
                duplicated['parsed_url'] = res['parsed_url']
        # if there is no duplicate found, append result
        else:
            res['score'] = score
            results.append(res)
    results = sorted(results, key=itemgetter('score'), reverse=True)
    # pass 2 : group results by category and template
    gresults = []
    categoryPositions = {}
    for i, res in enumerate(results):
        # FIXME : handle more than one category per engine
        category = engines[res['engine']].categories[0] + ':' + ''\
            if 'template' not in res\
            else res['template']
        current = None if category not in categoryPositions\
            else categoryPositions[category]
        # group with previous results using the same category
        # if the group can accept more result and is not too far
        # from the current position
        if current is not None and (current['count'] > 0)\
                and (len(gresults) - current['index'] < 20):
            # group with the previous results using
            # the same category with this one
            index = current['index']
            gresults.insert(index, res)
            # update every index after the current one
            # (including the current one)
            for k in categoryPositions:
                v = categoryPositions[k]['index']
                if v >= index:
                    categoryPositions[k]['index'] = v+1
            # update this category
            current['count'] -= 1
        else:
            # same category
            gresults.append(res)
            # update categoryIndex
            categoryPositions[category] = {'index': len(gresults), 'count': 8}
    # return gresults
    return gresults
 def merge_two_infoboxes(infobox1, infobox2):
    if 'urls' in infobox2:
        urls1 = infobox1.get('urls', None)
        if urls1 is None:
            urls1 = []
            infobox1.set('urls', urls1)
        urlSet = set()
        for url in infobox1.get('urls', []):
            urlSet.add(url.get('url', None))
        for url in infobox2.get('urls', []):
            if url.get('url', None) not in urlSet:
                urls1.append(url)
    if 'attributes' in infobox2:
        attributes1 = infobox1.get('attributes', None)
        if attributes1 is None:
            attributes1 = []
            infobox1.set('attributes', attributes1)
        attributeSet = set()
        for attribute in infobox1.get('attributes', []):
            if attribute.get('label', None) not in attributeSet:
                attributeSet.add(attribute.get('label', None))
        for attribute in infobox2.get('attributes', []):
            attributes1.append(attribute)
    if 'content' in infobox2:
        content1 = infobox1.get('content', None)
        content2 = infobox2.get('content', '')
        if content1 is not None:
            if content_result_len(content2) > content_result_len(content1):
                infobox1['content'] = content2
        else:
            infobox1.set('content', content2)
 def merge_infoboxes(infoboxes):
    results = []
    infoboxes_id = {}
    for infobox in infoboxes:
        add_infobox = True
        infobox_id = infobox.get('id', None)
        if infobox_id is not None:
            existingIndex = infoboxes_id.get(infobox_id, None)
            if existingIndex is not None:
                merge_two_infoboxes(results[existingIndex], infobox)
                add_infobox = False
        if add_infobox:
            results.append(infobox)
            infoboxes_id[infobox_id] = len(results)-1
    return results
 class Search(object):
    """Search information container"""
    def __init__(self, request):
        # init vars
        super(Search, self).__init__()
        self.query = None
        self.engines = []
        self.categories = []
        self.paging = False
        self.pageno = 1
        self.lang = 'all'
        # set blocked engines
        self.blocked_engines = get_blocked_engines(engines, request.cookies)
        self.results = []
        self.suggestions = []
        self.answers = []
        self.infoboxes = []
        self.request_data = {}
        # set specific language if set
        if request.cookies.get('language')\
           and request.cookies['language'] in (x[0] for x in language_codes):
            self.lang = request.cookies['language']
        # set request method
        if request.method == 'POST':
            self.request_data = request.form
        else:
            self.request_data = request.args
        # TODO better exceptions
        if not self.request_data.get('q'):
            raise Exception('noquery')
        # set pagenumber
        pageno_param = self.request_data.get('pageno', '1')
        if not pageno_param.isdigit() or int(pageno_param) < 1:
            raise Exception('wrong pagenumber')
        self.pageno = int(pageno_param)
        # parse query, if tags are set, which change
        # the serch engine or search-language
        query_obj = Query(self.request_data['q'], self.blocked_engines)
        query_obj.parse_query()
        # set query
        self.query = query_obj.getSearchQuery()
        # get last selected language in query, if possible
        # TODO support search with multible languages
        if len(query_obj.languages):
            self.lang = query_obj.languages[-1]
        self.engines = query_obj.engines
        self.categories = []
        # if engines are calculated from query,
        # set categories by using that informations
        if self.engines and query_obj.specific:
            self.categories = list(set(engine['category']
                                       for engine in self.engines))
        # otherwise, using defined categories to
        # calculate which engines should be used
        else:
            # set used categories
            for pd_name, pd in self.request_data.items():
                if pd_name.startswith('category_'):
                    category = pd_name[9:]
                    # if category is not found in list, skip
                    if category not in categories:
                        continue
                    if pd != 'off':
                        # add category to list
                        self.categories.append(category)
                    elif category in self.categories:
                        # remove category from list if property is set to 'off'
                        self.categories.remove(category)
            # if no category is specified for this search,
            # using user-defined default-configuration which
            # (is stored in cookie)
            if not self.categories:
                cookie_categories = request.cookies.get('categories', '')
                cookie_categories = cookie_categories.split(',')
                for ccateg in cookie_categories:
                    if ccateg in categories:
                        self.categories.append(ccateg)
            # if still no category is specified, using general
            # as default-category
            if not self.categories:
                self.categories = ['general']
            # using all engines for that search, which are
            # declared under the specific categories
            for categ in self.categories:
                self.engines.extend({'category': categ,
                                     'name': engine.name}
                                    for engine in categories[categ]
                                    if (engine.name, categ) not in self.blocked_engines)
    # do search-request
    def search(self, request):
        global number_of_searches
        # init vars
        requests = []
        results_queue = Queue()
        results = {}
        suggestions = set()
        answers = set()
        infoboxes = []
        # increase number of searches
        number_of_searches += 1
        # set default useragent
        # user_agent = request.headers.get('User-Agent', '')
        user_agent = gen_useragent()
        # start search-reqest for all selected engines
        for selected_engine in self.engines:
            if selected_engine['name'] not in engines:
                continue
            engine = engines[selected_engine['name']]
            # if paging is not supported, skip
            if self.pageno > 1 and not engine.paging:
                continue
            # if search-language is set and engine does not
            # provide language-support, skip
            if self.lang != 'all' and not engine.language_support:
                continue
            # set default request parameters
            request_params = default_request_params()
            request_params['headers']['User-Agent'] = user_agent
            request_params['category'] = selected_engine['category']
            request_params['started'] = time()
            request_params['pageno'] = self.pageno
            request_params['language'] = self.lang
            try:
                # 0 = None, 1 = Moderate, 2 = Strict
                request_params['safesearch'] = int(request.cookies.get('safesearch', 1))
            except ValueError:
                request_params['safesearch'] = 1
            # update request parameters dependent on
            # search-engine (contained in engines folder)
            engine.request(self.query.encode('utf-8'), request_params)
            if request_params['url'] is None:
                # TODO add support of offline engines
                pass
            # create a callback wrapper for the search engine results
            callback = make_callback(
                selected_engine['name'],
                results_queue,
                engine.response,
                request_params)
            # create dictionary which contain all
            # informations about the request
            request_args = dict(
                headers=request_params['headers'],
                hooks=dict(response=callback),
                cookies=request_params['cookies'],
                timeout=engine.timeout,
                verify=request_params['verify']
            )
            # specific type of request (GET or POST)
            if request_params['method'] == 'GET':
                req = requests_lib.get
            else:
                req = requests_lib.post
                request_args['data'] = request_params['data']
            # ignoring empty urls
            if not request_params['url']:
                continue
            # append request to list
            requests.append((req, request_params['url'],
                             request_args,
                             selected_engine['name']))
        if not requests:
            return results, suggestions, answers, infoboxes
        # send all search-request
        threaded_requests(requests)
        while not results_queue.empty():
            engine_name, engine_results = results_queue.get_nowait()
            # TODO type checks
            [suggestions.add(x['suggestion'])
             for x in list(engine_results)
             if 'suggestion' in x
             and engine_results.remove(x) is None]
            [answers.add(x['answer'])
             for x in list(engine_results)
             if 'answer' in x
             and engine_results.remove(x) is None]
            infoboxes.extend(x for x in list(engine_results)
                             if 'infobox' in x
                             and engine_results.remove(x) is None)
            results[engine_name] = engine_results
        # update engine-specific stats
        for engine_name, engine_results in results.items():
            engines[engine_name].stats['search_count'] += 1
            engines[engine_name].stats['result_count'] += len(engine_results)
        # score results and remove duplications
        results = score_results(results)
        # merge infoboxes according to their ids
        infoboxes = merge_infoboxes(infoboxes)
        # update engine stats, using calculated score
        for result in results:
            for res_engine in result['engines']:
                engines[result['engine']]\
                    .stats['score_count'] += result['score']
        # return results, suggestions, answers and infoboxes
        return results, suggestions, answers, infoboxes
--- a/sources/searx/engines/500px.py
+++ b/sources/searx/engines/500px.py
@ -1,57 +0,0 @@
 ## 500px (Images)
 #
 # @website     https://500px.com
 # @provide-api yes (https://developers.500px.com/)
 #
 # @using-api   no
 # @results     HTML
 # @stable      no (HTML can change)
 # @parse       url, title, thumbnail, img_src, content
 #
 # @todo        rewrite to api
 from urllib import urlencode
 from urlparse import urljoin
 from lxml import html
 # engine dependent config
 categories = ['images']
 paging = True
 # search-url
 base_url = 'https://500px.com'
 search_url = base_url+'/search?search?page={pageno}&type=photos&{query}'
 # do search-request
 def request(query, params):
    params['url'] = search_url.format(pageno=params['pageno'],
                                      query=urlencode({'q': query}))
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    # parse results
    for result in dom.xpath('//div[@class="photo"]'):
        link = result.xpath('.//a')[0]
        url = urljoin(base_url, link.attrib.get('href'))
        title = result.xpath('.//div[@class="title"]//text()')[0]
        img_src = link.xpath('.//img')[0].attrib['src']
        content = result.xpath('.//div[@class="info"]//text()')[0]
        # append result
        results.append({'url': url,
                        'title': title,
                        'img_src': img_src,
                        'content': content,
                        'template': 'images.html'})
    # return results
    return results
--- a/sources/searx/engines/bing_images.py
+++ b/sources/searx/engines/bing_images.py
@ -21,12 +21,18 @@ import re
 # engine dependent config
 categories = ['images']
 paging = True
 safesearch = True
 # search-url
 base_url = 'https://www.bing.com/'
 search_string = 'images/search?{query}&count=10&first={offset}'
 thumb_url = "http://ts1.mm.bing.net/th?id={ihk}"
 # safesearch definitions
 safesearch_types = {2: 'STRICT',
                    1: 'DEMOTE',
                    0: 'OFF'}
 # do search-request
 def request(query, params):
@ -43,7 +49,8 @@ def request(query, params):
        offset=offset)
    params['cookies']['SRCHHPGUSR'] = \
-        'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0]
+        'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0] +\
        '&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
    params['url'] = base_url + search_path
--- a/sources/searx/engines/blekko_images.py
+++ b/sources/searx/engines/blekko_images.py
--- a/sources/searx/engines/currency_convert.py
+++ b/sources/searx/engines/currency_convert.py
@ -13,12 +13,9 @@ def request(query, params):
    if not m:
        # wrong query
        return params
-    try:
+
    ammount, from_currency, to_currency = m.groups()
    ammount = float(ammount)
    except:
        # wrong params
        return params
    q = (from_currency + to_currency).upper()
--- a/sources/searx/engines/duckduckgo.py
+++ b/sources/searx/engines/duckduckgo.py
@ -15,7 +15,7 @@
 from urllib import urlencode
 from lxml.html import fromstring
-from searx.utils import html_to_text
+from searx.engines.xpath import extract_text
 # engine dependent config
 categories = ['general']
@ -28,8 +28,8 @@ url = 'https://duckduckgo.com/html?{query}&s={offset}'
 # specific xpath variables
 result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa
 url_xpath = './/a[@class="large"]/@href'
-title_xpath = './/a[@class="large"]//text()'
+title_xpath = './/a[@class="large"]'
-content_xpath = './/div[@class="snippet"]//text()'
+content_xpath = './/div[@class="snippet"]'
 # do search-request
@ -64,8 +64,8 @@ def response(resp):
        if not res_url:
            continue
-        title = html_to_text(''.join(r.xpath(title_xpath)))
+        title = extract_text(r.xpath(title_xpath))
-        content = html_to_text(''.join(r.xpath(content_xpath)))
+        content = extract_text(r.xpath(content_xpath))
        # append result
        results.append({'title': title,
--- a/sources/searx/engines/duckduckgo_definitions.py
+++ b/sources/searx/engines/duckduckgo_definitions.py
@ -25,9 +25,10 @@ def request(query, params):
 def response(resp):
    search_res = json.loads(resp.text)
    results = []
    search_res = json.loads(resp.text)
    content = ''
    heading = search_res.get('Heading', '')
    attributes = []
@ -68,7 +69,7 @@ def response(resp):
            results.append({'title': heading, 'url': firstURL})
    # related topics
-    for ddg_result in search_res.get('RelatedTopics', None):
+    for ddg_result in search_res.get('RelatedTopics', []):
        if 'FirstURL' in ddg_result:
            suggestion = result_to_text(ddg_result.get('FirstURL', None),
                                        ddg_result.get('Text', None),
--- a/sources/searx/engines/faroo.py
+++ b/sources/searx/engines/faroo.py
@ -37,7 +37,7 @@ search_category = {'general': 'web',
 # do search-request
 def request(query, params):
-    offset = (params['pageno']-1) * number_of_results + 1
+    offset = (params['pageno'] - 1) * number_of_results + 1
    categorie = search_category.get(params['category'], 'web')
    if params['language'] == 'all':
@ -45,11 +45,11 @@ def request(query, params):
    else:
        language = params['language'].split('_')[0]
-    # skip, if language is not supported
+    # if language is not supported, put it in english
    if language != 'en' and\
       language != 'de' and\
       language != 'zh':
-        return params
+        language = 'en'
    params['url'] = search_url.format(offset=offset,
                                      number_of_results=number_of_results,
@ -69,12 +69,10 @@ def response(resp):
    # HTTP-Code 401: api-key is not valide
    if resp.status_code == 401:
        raise Exception("API key is not valide")
        return []
    # HTTP-Code 429: rate limit exceeded
    if resp.status_code == 429:
        raise Exception("rate limit has been exceeded!")
        return []
    results = []
--- a/sources/searx/engines/flickr-noapi.py
+++ b/sources/searx/engines/flickr-noapi.py
@ -1,95 +0,0 @@
 #!/usr/bin/env python
 #  Flickr (Images)
 #
 # @website     https://www.flickr.com
 # @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
 #
 # @using-api   no
 # @results     HTML
 # @stable      no
 # @parse       url, title, thumbnail, img_src
 from urllib import urlencode
 from json import loads
 import re
 categories = ['images']
 url = 'https://secure.flickr.com/'
 search_url = url+'search/?{query}&page={page}'
 photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
 regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL)
 image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
 paging = True
 def build_flickr_url(user_id, photo_id):
    return photo_url.format(userid=user_id, photoid=photo_id)
 def request(query, params):
    params['url'] = search_url.format(query=urlencode({'text': query}),
                                      page=params['pageno'])
    return params
 def response(resp):
    results = []
    matches = regex.search(resp.text)
    if matches is None:
        return results
    match = matches.group(1)
    search_results = loads(match)
    if '_data' not in search_results:
        return []
    photos = search_results['_data']
    for photo in photos:
        # In paged configuration, the first pages' photos
        # are represented by a None object
        if photo is None:
            continue
        img_src = None
        # From the biggest to the lowest format
        for image_size in image_sizes:
            if image_size in photo['sizes']:
                img_src = photo['sizes'][image_size]['displayUrl']
                break
        if not img_src:
            continue
        if 'id' not in photo['owner']:
            continue
        url = build_flickr_url(photo['owner']['id'], photo['id'])
        title = photo.get('title', '')
        content = '<span class="photo-author">' +\
                  photo['owner']['username'] +\
                  '</span><br />'
        if 'description' in photo:
            content = content +\
                '<span class="description">' +\
                photo['description'] +\
                '</span>'
        # append result
        results.append({'url': url,
                        'title': title,
                        'img_src': img_src,
                        'content': content,
                        'template': 'images.html'})
    return results
--- a/sources/searx/engines/gigablast.py
+++ b/sources/searx/engines/gigablast.py
--- a/Show More
+++ b/Show More