add support noroot url

This commit is contained in:
Adrien Beudin 2014-05-27 12:13:54 +02:00
parent 5a0f8811d5
commit fc207de1ab
10 changed files with 278 additions and 36 deletions

View File

@ -92,10 +92,10 @@ engines:
shortcut : sp
# +30% page load time
# - name : ixquick
# engine : startpage
# base_url : 'https://www.ixquick.com/'
# search_url : 'https://www.ixquick.com/do/search'
# - name : ixquick
# engine : startpage
# base_url : 'https://www.ixquick.com/'
# search_url : 'https://www.ixquick.com/do/search'
- name : twitter
engine : twitter
@ -103,20 +103,20 @@ engines:
shortcut : tw
# maybe in a fun category
# - name : uncyclopedia
# engine : mediawiki
# categories : general
# shortcut : unc
# url : https://uncyclopedia.wikia.com/
# - name : uncyclopedia
# engine : mediawiki
# categories : general
# shortcut : unc
# url : https://uncyclopedia.wikia.com/
# tmp suspended - too slow, too many errors
# - name : urbandictionary
# engine : xpath
# search_url : http://www.urbandictionary.com/define.php?term={query}
# url_xpath : //div[@class="word"]//a/@href
# title_xpath : //div[@class="word"]//a
# content_xpath : //div[@class="definition"]
# shortcut : ud
# - name : urbandictionary
# engine : xpath
# search_url : http://www.urbandictionary.com/define.php?term={query}
# url_xpath : //div[@class="word"]//a/@href
# title_xpath : //div[@class="word"]//a
# content_xpath : //div[@class="definition"]
# shortcut : ud
- name : yahoo
engine : yahoo

156
conf/settings.yml-noroot Normal file
View File

@ -0,0 +1,156 @@
server:
port : 8888
secret_key : "ultrasecretkey" # change this!
debug : True
request_timeout : 2.0 # seconds
base_url : True
engines:
- name : wikipedia
engine : wikipedia
number_of_results : 1
paging : False
shortcut : wp
- name : bing
engine : bing
locale : en-US
shortcut : bi
- name : bing news
engine : bing_news
locale : en-US
shortcut : bin
- name : currency
engine : currency_convert
categories : general
shortcut : cc
- name : deviantart
engine : deviantart
categories : images
shortcut : da
timeout: 3.0
- name : ddg definitions
engine : duckduckgo_definitions
shortcut : ddd
- name : duckduckgo
engine : duckduckgo
locale : en-us
shortcut : ddg
- name : filecrop
engine : filecrop
categories : files
shortcut : fc
- name : flickr
engine : flickr
categories : images
shortcut : fl
timeout: 3.0
- name : github
engine : github
categories : it
shortcut : gh
- name : google
engine : google
shortcut : go
- name : google images
engine : google_images
shortcut : goi
- name : google news
engine : google_news
shortcut : gon
- name : piratebay
engine : piratebay
categories : videos, music, files
shortcut : tpb
- name : soundcloud
engine : soundcloud
categories : music
shortcut : sc
- name : stackoverflow
engine : stackoverflow
categories : it
shortcut : st
- name : startpage
engine : startpage
base_url : 'https://startpage.com/'
search_url : 'https://startpage.com/do/search'
shortcut : sp
# +30% page load time
# - name : ixquick
# engine : startpage
# base_url : 'https://www.ixquick.com/'
# search_url : 'https://www.ixquick.com/do/search'
- name : twitter
engine : twitter
categories : social media
shortcut : tw
# maybe in a fun category
# - name : uncyclopedia
# engine : mediawiki
# categories : general
# shortcut : unc
# url : https://uncyclopedia.wikia.com/
# tmp suspended - too slow, too many errors
# - name : urbandictionary
# engine : xpath
# search_url : http://www.urbandictionary.com/define.php?term={query}
# url_xpath : //div[@class="word"]//a/@href
# title_xpath : //div[@class="word"]//a
# content_xpath : //div[@class="definition"]
# shortcut : ud
- name : yahoo
engine : yahoo
shortcut : yh
- name : yahoo news
engine : yahoo_news
shortcut : yhn
- name : youtube
engine : youtube
categories : videos
shortcut : yt
- name : dailymotion
engine : dailymotion
locale : en_US
categories : videos
shortcut : dm
- name : vimeo
engine : vimeo
categories : videos
results_xpath : //div[@id="browse_content"]/ol/li
url_xpath : ./a/@href
title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
content_xpath : ./a/img/@src
shortcut : vm
locales:
en : English
de : Deutsch
hu : Magyar
fr : Français
es : Español
it : Italiano
nl : Nederlands

View File

@ -26,7 +26,16 @@
},
"example": "/searx",
"default": "/searx"
},
{
"name": "public_site",
"ask": {
"en": "Is it a public Searx ?"
},
"choices": ["Yes", "No"],
"default": "Yes"
}
]
}
}

View File

@ -41,7 +41,12 @@ then
fi
#Configuration Searx
sudo cp ../conf/settings.yml /opt/searx/searx/
if [ $path != "/" ];
then
sudo cp ../conf/settings.yml-noroot /opt/searx/searx/settings.yml
else
sudo cp ../conf/settings.yml /opt/searx/searx/
fi
sudo sed -i -e "s/ultrasecretkey/`openssl rand -hex 16`/g" /opt/searx/searx/settings.yml
# Set permissions to searx directory
@ -62,13 +67,15 @@ else
fi
# Fix permission
#sudo chmod 755 /etc/searx/
#sudo find /opt/yunohost/searx/ -type d -exec chmod 2755 {} \;
#sudo find /opt/yunohost/searx/ -type f -exec chmod g+r,o+r {} \;
#sudo chmod 644 /etc/searx/*
#sudo find /opt/searx/ -type d -exec chmod 2755 {} \;
#sudo find /opt/searx/ -type f -exec chmod g+r,o+r {} \;
## Reload Nginx and regenerate SSOwat conf
sudo service nginx reload
sudo service uwsgi restart
#sudo yunohost app setting searx skipped_uris -v "/"
if [ $is_public = "Yes" ];
then
sudo yunohost app setting searx skipped_uris -v "/"
fi
sudo yunohost app ssowatconf

26
sources/AUTHORS.rst Normal file
View File

@ -0,0 +1,26 @@
Searx was created and is maintained by Adam Tauber.
Major contributing authors:
- Adam Tauber <asciimoo@gmail.com> `@asciimoo <https://github.com/asciimoo>`_
- Matej Cotman
- Thomas Pointhuber
- Alexandre Flament
People who have submitted patches/translates, reported bugs, consulted features or
generally made searx better:
- Laszlo Hammerl
- Stefan Marsiske
- Gabor Nagy
- @pw3t
- @rhapsodhy
- András Veres-Szentkirályi
- Benjamin Sonntag
- @HLFH
- @TheRadialActive
- @Okhin
- André Koot
- Alejandro León Aznar
- rike
- dp

View File

@ -154,16 +154,24 @@ def score_results(results):
# deduplication + scoring
for i, res in enumerate(flat_res):
res['parsed_url'] = urlparse(res['url'])
res['host'] = res['parsed_url'].netloc
if res['host'].startswith('www.'):
res['host'] = res['host'].replace('www.', '', 1)
res['engines'] = [res['engine']]
weight = 1.0
if hasattr(engines[res['engine']], 'weight'):
weight = float(engines[res['engine']].weight)
score = int((flat_len - i) / engines_len) * weight + 1
duplicated = False
for new_res in results:
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa
if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
if res['host'] == new_res['host'] and\
p1 == p2 and\
res['parsed_url'].query == new_res['parsed_url'].query and\
res.get('template') == new_res.get('template'):

View File

@ -1,35 +1,52 @@
#!/usr/bin/env python
from urllib import urlencode
from lxml import html
#from json import loads
from urlparse import urljoin
from lxml import html
from time import time
categories = ['images']
url = 'https://secure.flickr.com/'
search_url = url+'search/?{query}&page={page}'
results_xpath = '//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]' # noqa
results_xpath = '//div[@class="view display-item-tile"]/figure/div'
paging = True
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}),
params['url'] = search_url.format(query=urlencode({'text': query}),
page=params['pageno'])
time_string = str(int(time())-3)
params['cookies']['BX'] = '3oqjr6d9nmpgl&b=3&s=dh'
params['cookies']['xb'] = '421409'
params['cookies']['localization'] = 'en-us'
params['cookies']['flrbp'] = time_string +\
'-3a8cdb85a427a33efda421fbda347b2eaf765a54'
params['cookies']['flrbs'] = time_string +\
'-ed142ae8765ee62c9ec92a9513665e0ee1ba6776'
params['cookies']['flrb'] = '9'
return params
def response(resp):
global base_url
results = []
dom = html.fromstring(resp.text)
for result in dom.xpath(results_xpath):
href = urljoin(url, result.attrib.get('href'))
img = result.xpath('.//img')[0]
title = img.attrib.get('alt', '')
img_src = img.attrib.get('data-defer-src')
img = result.xpath('.//img')
if not img:
continue
img = img[0]
img_src = 'https:'+img.attrib.get('src')
if not img_src:
continue
href = urljoin(url, result.xpath('.//a')[0].attrib.get('href'))
title = img.attrib.get('alt', '')
results.append({'url': href,
'title': title,
'img_src': img_src,

View File

@ -2,6 +2,7 @@ from urlparse import urljoin
from cgi import escape
from urllib import quote
from lxml import html
from operator import itemgetter
categories = ['videos', 'music']
@ -29,14 +30,27 @@ def response(resp):
results = []
dom = html.fromstring(resp.text)
search_res = dom.xpath('//table[@id="searchResult"]//tr')
if not search_res:
return results
for result in search_res[1:]:
link = result.xpath('.//div[@class="detName"]//a')[0]
href = urljoin(url, link.attrib.get('href'))
title = ' '.join(link.xpath('.//text()'))
content = escape(' '.join(result.xpath(content_xpath)))
seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
if seed.isdigit():
seed = int(seed)
else:
seed = 0
if leech.isdigit():
leech = int(leech)
else:
leech = 0
magnetlink = result.xpath(magnet_xpath)[0]
results.append({'url': href,
'title': title,
@ -45,4 +59,5 @@ def response(resp):
'leech': leech,
'magnetlink': magnetlink.attrib['href'],
'template': 'torrent.html'})
return results
return sorted(results, key=itemgetter('seed'), reverse=True)

View File

@ -51,7 +51,7 @@ class ViewsTestCase(SearxTestCase):
result.data
)
self.assertIn(
'<p class="content">first <span class="highlight">test</span> content<br /></p>',
'<p class="content">first <span class="highlight">test</span> content<br /></p>', # noqa
result.data
)

View File

@ -7,7 +7,9 @@ import re
from random import choice
ua_versions = ('26.0', '27.0', '28.0')
ua_os = ('Windows NT 6.3; WOW64', 'X11; Linux x86_64; rv:26.0')
ua_os = ('Windows NT 6.3; WOW64',
'X11; Linux x86_64',
'X11; Linux x86')
ua = "Mozilla/5.0 ({os}) Gecko/20100101 Firefox/{version}"
@ -28,7 +30,8 @@ def highlight_content(content, query):
query = query.decode('utf-8')
if content.lower().find(query.lower()) > -1:
query_regex = u'({0})'.format(re.escape(query))
content = re.sub(query_regex, '<span class="highlight">\\1</span>', content, flags=re.I | re.U)
content = re.sub(query_regex, '<span class="highlight">\\1</span>',
content, flags=re.I | re.U)
else:
regex_parts = []
for chunk in query.split():
@ -37,7 +40,8 @@ def highlight_content(content, query):
else:
regex_parts.append(u'{0}'.format(re.escape(chunk)))
query_regex = u'({0})'.format('|'.join(regex_parts))
content = re.sub(query_regex, '<span class="highlight">\\1</span>', content, flags=re.I | re.U)
content = re.sub(query_regex, '<span class="highlight">\\1</span>',
content, flags=re.I | re.U)
return content