add support noroot url

2014-05-27 12:13:54 +02:00 · 2014-05-27 12:13:54 +02:00 · fc207de1ab
parent 5a0f8811d5
commit fc207de1ab
10 changed files with 278 additions and 36 deletions
--- a/conf/settings.yml-noroot
+++ b/conf/settings.yml-noroot
@ -0,0 +1,156 @@
 server:
    port : 8888
    secret_key : "ultrasecretkey" # change this!
    debug : True
    request_timeout : 2.0 # seconds
    base_url : True
 engines:
  - name : wikipedia
    engine : wikipedia
    number_of_results : 1
    paging : False
    shortcut : wp
  - name : bing
    engine : bing
    locale : en-US
    shortcut : bi
  - name : bing news
    engine : bing_news
    locale : en-US
    shortcut : bin
  - name : currency
    engine : currency_convert
    categories : general
    shortcut : cc
  - name : deviantart
    engine : deviantart
    categories : images
    shortcut : da
    timeout: 3.0
  - name : ddg definitions
    engine : duckduckgo_definitions
    shortcut : ddd
  - name : duckduckgo
    engine : duckduckgo
    locale : en-us
    shortcut : ddg
  - name : filecrop
    engine : filecrop
    categories : files
    shortcut : fc
  - name : flickr
    engine : flickr
    categories : images
    shortcut : fl
    timeout: 3.0
  - name : github
    engine : github
    categories : it
    shortcut : gh
  - name : google
    engine : google
    shortcut : go
  - name : google images
    engine : google_images
    shortcut : goi
  - name : google news
    engine : google_news
    shortcut : gon
  - name : piratebay
    engine : piratebay
    categories : videos, music, files
    shortcut : tpb
  - name : soundcloud
    engine : soundcloud
    categories : music
    shortcut : sc
  - name : stackoverflow
    engine : stackoverflow
    categories : it
    shortcut : st
  - name : startpage
    engine : startpage
    base_url : 'https://startpage.com/'
    search_url : 'https://startpage.com/do/search'
    shortcut : sp
 # +30% page load time
 #  - name : ixquick
 #    engine : startpage
 #    base_url : 'https://www.ixquick.com/'
 #    search_url : 'https://www.ixquick.com/do/search'
  - name : twitter
    engine : twitter
    categories : social media
    shortcut : tw
 # maybe in a fun category
 #  - name : uncyclopedia
 #    engine : mediawiki
 #    categories : general
 #    shortcut : unc
 #    url : https://uncyclopedia.wikia.com/
 # tmp suspended - too slow, too many errors
 #  - name : urbandictionary
 #    engine        : xpath
 #    search_url    : http://www.urbandictionary.com/define.php?term={query}
 #    url_xpath     : //div[@class="word"]//a/@href
 #    title_xpath   : //div[@class="word"]//a
 #    content_xpath : //div[@class="definition"]
 #    shortcut : ud
  - name : yahoo
    engine : yahoo
    shortcut : yh
  - name : yahoo news
    engine : yahoo_news
    shortcut : yhn
  - name : youtube
    engine : youtube
    categories : videos
    shortcut : yt
  - name : dailymotion
    engine : dailymotion
    locale : en_US
    categories : videos
    shortcut : dm
  - name : vimeo
    engine : vimeo
    categories : videos
    results_xpath : //div[@id="browse_content"]/ol/li
    url_xpath : ./a/@href
    title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
    content_xpath : ./a/img/@src
    shortcut : vm
 locales:
    en : English
    de : Deutsch
    hu : Magyar
    fr : Français
    es : Español
    it : Italiano
    nl : Nederlands
--- a/manifest.json
+++ b/manifest.json
@ -26,7 +26,16 @@
                },
                "example": "/searx",
                "default": "/searx"
            },
            {
                "name": "public_site",
                "ask": {
                    "en": "Is it a public Searx ?"
                },
                "choices": ["Yes", "No"],
                "default": "Yes"
            }
        ]
    }
 }
--- a/scripts/install
+++ b/scripts/install
@ -41,7 +41,12 @@ then
 fi
 #Configuration Searx
-sudo cp ../conf/settings.yml /opt/searx/searx/
+if [ $path != "/" ];
 then
 	sudo cp ../conf/settings.yml-noroot /opt/searx/searx/settings.yml
 else
 	sudo cp ../conf/settings.yml /opt/searx/searx/
 fi
 sudo sed -i -e "s/ultrasecretkey/`openssl rand -hex 16`/g" /opt/searx/searx/settings.yml
 # Set permissions to searx directory
@ -62,13 +67,15 @@ else
 fi
 # Fix permission
-#sudo chmod 755 /etc/searx/
+#sudo find /opt/searx/ -type d -exec chmod 2755 {} \;
-#sudo find /opt/yunohost/searx/ -type d -exec chmod 2755 {} \;
+#sudo find /opt/searx/ -type f -exec chmod g+r,o+r {} \;
 #sudo find /opt/yunohost/searx/ -type f -exec chmod g+r,o+r {} \;
 #sudo chmod 644 /etc/searx/*
 ## Reload Nginx and regenerate SSOwat conf
 sudo service nginx reload
 sudo service uwsgi restart
-#sudo yunohost app setting searx skipped_uris -v "/"
+
 if [ $is_public = "Yes" ];
 then
 sudo yunohost app setting searx skipped_uris -v "/"
 fi
 sudo yunohost app ssowatconf
--- a/sources/AUTHORS.rst
+++ b/sources/AUTHORS.rst
@ -0,0 +1,26 @@
 Searx was created and is maintained by Adam Tauber.
 Major contributing authors:
 - Adam Tauber <asciimoo@gmail.com> `@asciimoo <https://github.com/asciimoo>`_
 - Matej Cotman
 - Thomas Pointhuber
 - Alexandre Flament
 People who have submitted patches/translates, reported bugs, consulted features or
 generally made searx better:
 - Laszlo Hammerl
 - Stefan Marsiske
 - Gabor Nagy
 - @pw3t
 - @rhapsodhy
 - András Veres-Szentkirályi
 - Benjamin Sonntag
 - @HLFH
 - @TheRadialActive
 - @Okhin
 - André Koot
 - Alejandro León Aznar
 - rike
 - dp
--- a/sources/searx/engines/init.py
+++ b/sources/searx/engines/init.py
@ -154,16 +154,24 @@ def score_results(results):
    # deduplication + scoring
    for i, res in enumerate(flat_res):
        res['parsed_url'] = urlparse(res['url'])
        res['host'] = res['parsed_url'].netloc
        if res['host'].startswith('www.'):
            res['host'] = res['host'].replace('www.', '', 1)
        res['engines'] = [res['engine']]
        weight = 1.0
        if hasattr(engines[res['engine']], 'weight'):
            weight = float(engines[res['engine']].weight)
        score = int((flat_len - i) / engines_len) * weight + 1
        duplicated = False
        for new_res in results:
            p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path  # noqa
            p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path  # noqa
-            if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
+            if res['host'] == new_res['host'] and\
               p1 == p2 and\
               res['parsed_url'].query == new_res['parsed_url'].query and\
               res.get('template') == new_res.get('template'):
--- a/sources/searx/engines/flickr.py
+++ b/sources/searx/engines/flickr.py
@ -1,35 +1,52 @@
 #!/usr/bin/env python
 from urllib import urlencode
-from lxml import html
+#from json import loads
 from urlparse import urljoin
 from lxml import html
 from time import time
 categories = ['images']
 url = 'https://secure.flickr.com/'
 search_url = url+'search/?{query}&page={page}'
-results_xpath = '//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]'  # noqa
+results_xpath = '//div[@class="view display-item-tile"]/figure/div'
 paging = True
 def request(query, params):
-    params['url'] = search_url.format(query=urlencode({'q': query}),
+    params['url'] = search_url.format(query=urlencode({'text': query}),
                                      page=params['pageno'])
    time_string = str(int(time())-3)
    params['cookies']['BX'] = '3oqjr6d9nmpgl&b=3&s=dh'
    params['cookies']['xb'] = '421409'
    params['cookies']['localization'] = 'en-us'
    params['cookies']['flrbp'] = time_string +\
        '-3a8cdb85a427a33efda421fbda347b2eaf765a54'
    params['cookies']['flrbs'] = time_string +\
        '-ed142ae8765ee62c9ec92a9513665e0ee1ba6776'
    params['cookies']['flrb'] = '9'
    return params
 def response(resp):
    global base_url
    results = []
    dom = html.fromstring(resp.text)
    for result in dom.xpath(results_xpath):
-        href = urljoin(url, result.attrib.get('href'))
+        img = result.xpath('.//img')
-        img = result.xpath('.//img')[0]
+
-        title = img.attrib.get('alt', '')
+        if not img:
-        img_src = img.attrib.get('data-defer-src')
+            continue
        img = img[0]
        img_src = 'https:'+img.attrib.get('src')
        if not img_src:
            continue
        href = urljoin(url, result.xpath('.//a')[0].attrib.get('href'))
        title = img.attrib.get('alt', '')
        results.append({'url': href,
                        'title': title,
                        'img_src': img_src,
--- a/sources/searx/engines/piratebay.py
+++ b/sources/searx/engines/piratebay.py
@ -2,6 +2,7 @@ from urlparse import urljoin
 from cgi import escape
 from urllib import quote
 from lxml import html
 from operator import itemgetter
 categories = ['videos', 'music']
@ -29,14 +30,27 @@ def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    search_res = dom.xpath('//table[@id="searchResult"]//tr')
    if not search_res:
        return results
    for result in search_res[1:]:
        link = result.xpath('.//div[@class="detName"]//a')[0]
        href = urljoin(url, link.attrib.get('href'))
        title = ' '.join(link.xpath('.//text()'))
        content = escape(' '.join(result.xpath(content_xpath)))
        seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
        if seed.isdigit():
            seed = int(seed)
        else:
            seed = 0
        if leech.isdigit():
            leech = int(leech)
        else:
            leech = 0
        magnetlink = result.xpath(magnet_xpath)[0]
        results.append({'url': href,
                        'title': title,
@ -45,4 +59,5 @@ def response(resp):
                        'leech': leech,
                        'magnetlink': magnetlink.attrib['href'],
                        'template': 'torrent.html'})
-    return results
+
    return sorted(results, key=itemgetter('seed'), reverse=True)
--- a/sources/searx/tests/test_webapp.py
+++ b/sources/searx/tests/test_webapp.py
@ -51,7 +51,7 @@ class ViewsTestCase(SearxTestCase):
            result.data
        )
        self.assertIn(
-            '<p class="content">first <span class="highlight">test</span> content<br /></p>',
+            '<p class="content">first <span class="highlight">test</span> content<br /></p>',  # noqa
            result.data
        )
--- a/sources/searx/utils.py
+++ b/sources/searx/utils.py
@ -7,7 +7,9 @@ import re
 from random import choice
 ua_versions = ('26.0', '27.0', '28.0')
-ua_os = ('Windows NT 6.3; WOW64', 'X11; Linux x86_64; rv:26.0')
+ua_os = ('Windows NT 6.3; WOW64',
         'X11; Linux x86_64',
         'X11; Linux x86')
 ua = "Mozilla/5.0 ({os}) Gecko/20100101 Firefox/{version}"
@ -28,7 +30,8 @@ def highlight_content(content, query):
    query = query.decode('utf-8')
    if content.lower().find(query.lower()) > -1:
        query_regex = u'({0})'.format(re.escape(query))
-        content = re.sub(query_regex, '<span class="highlight">\\1</span>', content, flags=re.I | re.U)
+        content = re.sub(query_regex, '<span class="highlight">\\1</span>',
                         content, flags=re.I | re.U)
    else:
        regex_parts = []
        for chunk in query.split():
@ -37,7 +40,8 @@ def highlight_content(content, query):
            else:
                regex_parts.append(u'{0}'.format(re.escape(chunk)))
        query_regex = u'({0})'.format('|'.join(regex_parts))
-        content = re.sub(query_regex, '<span class="highlight">\\1</span>', content, flags=re.I | re.U)
+        content = re.sub(query_regex, '<span class="highlight">\\1</span>',
                         content, flags=re.I | re.U)
    return content