add support noroot url

This commit is contained in:
Adrien Beudin 2014-05-27 12:13:54 +02:00
parent 5a0f8811d5
commit fc207de1ab
10 changed files with 278 additions and 36 deletions

156
conf/settings.yml-noroot Normal file
View File

@ -0,0 +1,156 @@
server:
port : 8888
secret_key : "ultrasecretkey" # change this!
debug : True
request_timeout : 2.0 # seconds
base_url : True
engines:
- name : wikipedia
engine : wikipedia
number_of_results : 1
paging : False
shortcut : wp
- name : bing
engine : bing
locale : en-US
shortcut : bi
- name : bing news
engine : bing_news
locale : en-US
shortcut : bin
- name : currency
engine : currency_convert
categories : general
shortcut : cc
- name : deviantart
engine : deviantart
categories : images
shortcut : da
timeout: 3.0
- name : ddg definitions
engine : duckduckgo_definitions
shortcut : ddd
- name : duckduckgo
engine : duckduckgo
locale : en-us
shortcut : ddg
- name : filecrop
engine : filecrop
categories : files
shortcut : fc
- name : flickr
engine : flickr
categories : images
shortcut : fl
timeout: 3.0
- name : github
engine : github
categories : it
shortcut : gh
- name : google
engine : google
shortcut : go
- name : google images
engine : google_images
shortcut : goi
- name : google news
engine : google_news
shortcut : gon
- name : piratebay
engine : piratebay
categories : videos, music, files
shortcut : tpb
- name : soundcloud
engine : soundcloud
categories : music
shortcut : sc
- name : stackoverflow
engine : stackoverflow
categories : it
shortcut : st
- name : startpage
engine : startpage
base_url : 'https://startpage.com/'
search_url : 'https://startpage.com/do/search'
shortcut : sp
# +30% page load time
# - name : ixquick
# engine : startpage
# base_url : 'https://www.ixquick.com/'
# search_url : 'https://www.ixquick.com/do/search'
- name : twitter
engine : twitter
categories : social media
shortcut : tw
# maybe in a fun category
# - name : uncyclopedia
# engine : mediawiki
# categories : general
# shortcut : unc
# url : https://uncyclopedia.wikia.com/
# tmp suspended - too slow, too many errors
# - name : urbandictionary
# engine : xpath
# search_url : http://www.urbandictionary.com/define.php?term={query}
# url_xpath : //div[@class="word"]//a/@href
# title_xpath : //div[@class="word"]//a
# content_xpath : //div[@class="definition"]
# shortcut : ud
- name : yahoo
engine : yahoo
shortcut : yh
- name : yahoo news
engine : yahoo_news
shortcut : yhn
- name : youtube
engine : youtube
categories : videos
shortcut : yt
- name : dailymotion
engine : dailymotion
locale : en_US
categories : videos
shortcut : dm
- name : vimeo
engine : vimeo
categories : videos
results_xpath : //div[@id="browse_content"]/ol/li
url_xpath : ./a/@href
title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
content_xpath : ./a/img/@src
shortcut : vm
locales:
en : English
de : Deutsch
hu : Magyar
fr : Français
es : Español
it : Italiano
nl : Nederlands

View File

@ -26,7 +26,16 @@
}, },
"example": "/searx", "example": "/searx",
"default": "/searx" "default": "/searx"
},
{
"name": "public_site",
"ask": {
"en": "Is it a public Searx ?"
},
"choices": ["Yes", "No"],
"default": "Yes"
} }
] ]
} }
} }

View File

@ -41,7 +41,12 @@ then
fi fi
#Configuration Searx #Configuration Searx
sudo cp ../conf/settings.yml /opt/searx/searx/ if [ $path != "/" ];
then
sudo cp ../conf/settings.yml-noroot /opt/searx/searx/settings.yml
else
sudo cp ../conf/settings.yml /opt/searx/searx/
fi
sudo sed -i -e "s/ultrasecretkey/`openssl rand -hex 16`/g" /opt/searx/searx/settings.yml sudo sed -i -e "s/ultrasecretkey/`openssl rand -hex 16`/g" /opt/searx/searx/settings.yml
# Set permissions to searx directory # Set permissions to searx directory
@ -62,13 +67,15 @@ else
fi fi
# Fix permission # Fix permission
#sudo chmod 755 /etc/searx/ #sudo find /opt/searx/ -type d -exec chmod 2755 {} \;
#sudo find /opt/yunohost/searx/ -type d -exec chmod 2755 {} \; #sudo find /opt/searx/ -type f -exec chmod g+r,o+r {} \;
#sudo find /opt/yunohost/searx/ -type f -exec chmod g+r,o+r {} \;
#sudo chmod 644 /etc/searx/*
## Reload Nginx and regenerate SSOwat conf ## Reload Nginx and regenerate SSOwat conf
sudo service nginx reload sudo service nginx reload
sudo service uwsgi restart sudo service uwsgi restart
#sudo yunohost app setting searx skipped_uris -v "/"
if [ $is_public = "Yes" ];
then
sudo yunohost app setting searx skipped_uris -v "/"
fi
sudo yunohost app ssowatconf sudo yunohost app ssowatconf

26
sources/AUTHORS.rst Normal file
View File

@ -0,0 +1,26 @@
Searx was created and is maintained by Adam Tauber.
Major contributing authors:
- Adam Tauber <asciimoo@gmail.com> `@asciimoo <https://github.com/asciimoo>`_
- Matej Cotman
- Thomas Pointhuber
- Alexandre Flament
People who have submitted patches/translates, reported bugs, consulted features or
generally made searx better:
- Laszlo Hammerl
- Stefan Marsiske
- Gabor Nagy
- @pw3t
- @rhapsodhy
- András Veres-Szentkirályi
- Benjamin Sonntag
- @HLFH
- @TheRadialActive
- @Okhin
- André Koot
- Alejandro León Aznar
- rike
- dp

View File

@ -154,16 +154,24 @@ def score_results(results):
# deduplication + scoring # deduplication + scoring
for i, res in enumerate(flat_res): for i, res in enumerate(flat_res):
res['parsed_url'] = urlparse(res['url']) res['parsed_url'] = urlparse(res['url'])
res['host'] = res['parsed_url'].netloc
if res['host'].startswith('www.'):
res['host'] = res['host'].replace('www.', '', 1)
res['engines'] = [res['engine']] res['engines'] = [res['engine']]
weight = 1.0 weight = 1.0
if hasattr(engines[res['engine']], 'weight'): if hasattr(engines[res['engine']], 'weight'):
weight = float(engines[res['engine']].weight) weight = float(engines[res['engine']].weight)
score = int((flat_len - i) / engines_len) * weight + 1 score = int((flat_len - i) / engines_len) * weight + 1
duplicated = False duplicated = False
for new_res in results: for new_res in results:
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa
if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\ if res['host'] == new_res['host'] and\
p1 == p2 and\ p1 == p2 and\
res['parsed_url'].query == new_res['parsed_url'].query and\ res['parsed_url'].query == new_res['parsed_url'].query and\
res.get('template') == new_res.get('template'): res.get('template') == new_res.get('template'):

View File

@ -1,35 +1,52 @@
#!/usr/bin/env python #!/usr/bin/env python
from urllib import urlencode from urllib import urlencode
from lxml import html #from json import loads
from urlparse import urljoin from urlparse import urljoin
from lxml import html
from time import time
categories = ['images'] categories = ['images']
url = 'https://secure.flickr.com/' url = 'https://secure.flickr.com/'
search_url = url+'search/?{query}&page={page}' search_url = url+'search/?{query}&page={page}'
results_xpath = '//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]' # noqa results_xpath = '//div[@class="view display-item-tile"]/figure/div'
paging = True paging = True
def request(query, params): def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}), params['url'] = search_url.format(query=urlencode({'text': query}),
page=params['pageno']) page=params['pageno'])
time_string = str(int(time())-3)
params['cookies']['BX'] = '3oqjr6d9nmpgl&b=3&s=dh'
params['cookies']['xb'] = '421409'
params['cookies']['localization'] = 'en-us'
params['cookies']['flrbp'] = time_string +\
'-3a8cdb85a427a33efda421fbda347b2eaf765a54'
params['cookies']['flrbs'] = time_string +\
'-ed142ae8765ee62c9ec92a9513665e0ee1ba6776'
params['cookies']['flrb'] = '9'
return params return params
def response(resp): def response(resp):
global base_url
results = [] results = []
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
for result in dom.xpath(results_xpath): for result in dom.xpath(results_xpath):
href = urljoin(url, result.attrib.get('href')) img = result.xpath('.//img')
img = result.xpath('.//img')[0]
title = img.attrib.get('alt', '') if not img:
img_src = img.attrib.get('data-defer-src') continue
img = img[0]
img_src = 'https:'+img.attrib.get('src')
if not img_src: if not img_src:
continue continue
href = urljoin(url, result.xpath('.//a')[0].attrib.get('href'))
title = img.attrib.get('alt', '')
results.append({'url': href, results.append({'url': href,
'title': title, 'title': title,
'img_src': img_src, 'img_src': img_src,

View File

@ -2,6 +2,7 @@ from urlparse import urljoin
from cgi import escape from cgi import escape
from urllib import quote from urllib import quote
from lxml import html from lxml import html
from operator import itemgetter
categories = ['videos', 'music'] categories = ['videos', 'music']
@ -29,14 +30,27 @@ def response(resp):
results = [] results = []
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
search_res = dom.xpath('//table[@id="searchResult"]//tr') search_res = dom.xpath('//table[@id="searchResult"]//tr')
if not search_res: if not search_res:
return results return results
for result in search_res[1:]: for result in search_res[1:]:
link = result.xpath('.//div[@class="detName"]//a')[0] link = result.xpath('.//div[@class="detName"]//a')[0]
href = urljoin(url, link.attrib.get('href')) href = urljoin(url, link.attrib.get('href'))
title = ' '.join(link.xpath('.//text()')) title = ' '.join(link.xpath('.//text()'))
content = escape(' '.join(result.xpath(content_xpath))) content = escape(' '.join(result.xpath(content_xpath)))
seed, leech = result.xpath('.//td[@align="right"]/text()')[:2] seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
if seed.isdigit():
seed = int(seed)
else:
seed = 0
if leech.isdigit():
leech = int(leech)
else:
leech = 0
magnetlink = result.xpath(magnet_xpath)[0] magnetlink = result.xpath(magnet_xpath)[0]
results.append({'url': href, results.append({'url': href,
'title': title, 'title': title,
@ -45,4 +59,5 @@ def response(resp):
'leech': leech, 'leech': leech,
'magnetlink': magnetlink.attrib['href'], 'magnetlink': magnetlink.attrib['href'],
'template': 'torrent.html'}) 'template': 'torrent.html'})
return results
return sorted(results, key=itemgetter('seed'), reverse=True)

View File

@ -51,7 +51,7 @@ class ViewsTestCase(SearxTestCase):
result.data result.data
) )
self.assertIn( self.assertIn(
'<p class="content">first <span class="highlight">test</span> content<br /></p>', '<p class="content">first <span class="highlight">test</span> content<br /></p>', # noqa
result.data result.data
) )

View File

@ -7,7 +7,9 @@ import re
from random import choice from random import choice
ua_versions = ('26.0', '27.0', '28.0') ua_versions = ('26.0', '27.0', '28.0')
ua_os = ('Windows NT 6.3; WOW64', 'X11; Linux x86_64; rv:26.0') ua_os = ('Windows NT 6.3; WOW64',
'X11; Linux x86_64',
'X11; Linux x86')
ua = "Mozilla/5.0 ({os}) Gecko/20100101 Firefox/{version}" ua = "Mozilla/5.0 ({os}) Gecko/20100101 Firefox/{version}"
@ -28,7 +30,8 @@ def highlight_content(content, query):
query = query.decode('utf-8') query = query.decode('utf-8')
if content.lower().find(query.lower()) > -1: if content.lower().find(query.lower()) > -1:
query_regex = u'({0})'.format(re.escape(query)) query_regex = u'({0})'.format(re.escape(query))
content = re.sub(query_regex, '<span class="highlight">\\1</span>', content, flags=re.I | re.U) content = re.sub(query_regex, '<span class="highlight">\\1</span>',
content, flags=re.I | re.U)
else: else:
regex_parts = [] regex_parts = []
for chunk in query.split(): for chunk in query.split():
@ -37,7 +40,8 @@ def highlight_content(content, query):
else: else:
regex_parts.append(u'{0}'.format(re.escape(chunk))) regex_parts.append(u'{0}'.format(re.escape(chunk)))
query_regex = u'({0})'.format('|'.join(regex_parts)) query_regex = u'({0})'.format('|'.join(regex_parts))
content = re.sub(query_regex, '<span class="highlight">\\1</span>', content, flags=re.I | re.U) content = re.sub(query_regex, '<span class="highlight">\\1</span>',
content, flags=re.I | re.U)
return content return content