upgrade searx to version v0.10 (#4)
* remove upstream source files * upgrade searx to version v0.10
This commit is contained in:
parent
ec080cbcc4
commit
d07267b8a8
|
@ -56,6 +56,8 @@ if [ ! -d $final_path ];
|
||||||
then
|
then
|
||||||
sudo mkdir -p $final_path
|
sudo mkdir -p $final_path
|
||||||
fi
|
fi
|
||||||
|
wget https://github.com/asciimoo/searx/archive/v0.10.0.tar.gz
|
||||||
|
tar xvf v0.10.0.tar.gz && cp -rf searx-0.10.0/* ../sources
|
||||||
sudo cp -r ../sources/* $final_path
|
sudo cp -r ../sources/* $final_path
|
||||||
sudo virtualenv --system-site-packages $final_path
|
sudo virtualenv --system-site-packages $final_path
|
||||||
sudo bash -c "source $final_path/bin/activate && pip install -r $final_path/requirements-ynh.txt"
|
sudo bash -c "source $final_path/bin/activate && pip install -r $final_path/requirements-ynh.txt"
|
||||||
|
|
|
@ -37,7 +37,9 @@ then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
final_path=/opt/yunohost/$app
|
final_path=/opt/yunohost/$app
|
||||||
sudo cp -r ../sources/* $final_path
|
wget https://github.com/asciimoo/searx/archive/v0.10.0.tar.gz
|
||||||
|
tar xvf v0.10.0.tar.gz && cp -rf searx-0.10.0/* ../sources
|
||||||
|
sudo cp -rf ../sources/* $final_path
|
||||||
sudo bash -c "source $final_path/bin/activate && pip install -r $final_path/requirements-ynh.txt --upgrade"
|
sudo bash -c "source $final_path/bin/activate && pip install -r $final_path/requirements-ynh.txt --upgrade"
|
||||||
|
|
||||||
# Disable swapfile
|
# Disable swapfile
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
[run]
|
|
||||||
branch = True
|
|
||||||
source =
|
|
||||||
searx/engines
|
|
||||||
searx/__init__.py
|
|
||||||
searx/autocomplete.py
|
|
||||||
searx/https_rewrite.py
|
|
||||||
searx/languages.py
|
|
||||||
searx/search.py
|
|
||||||
searx/testing.py
|
|
||||||
searx/utils.py
|
|
||||||
searx/webapp.py
|
|
||||||
|
|
||||||
[report]
|
|
||||||
show_missing = True
|
|
||||||
exclude_lines =
|
|
||||||
if __name__ == .__main__.:
|
|
||||||
|
|
||||||
[html]
|
|
||||||
directory = coverage
|
|
|
@ -1,17 +0,0 @@
|
||||||
.coverage
|
|
||||||
.installed.cfg
|
|
||||||
engines.cfg
|
|
||||||
env
|
|
||||||
robot_log.html
|
|
||||||
robot_output.xml
|
|
||||||
robot_report.html
|
|
||||||
test_basic/
|
|
||||||
setup.cfg
|
|
||||||
|
|
||||||
*.pyc
|
|
||||||
*/*.pyc
|
|
||||||
*~
|
|
||||||
|
|
||||||
node_modules/
|
|
||||||
|
|
||||||
.tx/
|
|
|
@ -1,3 +0,0 @@
|
||||||
strictness: high
|
|
||||||
ignore-paths:
|
|
||||||
- bootstrap.py
|
|
|
@ -1,32 +0,0 @@
|
||||||
sudo: false
|
|
||||||
cache:
|
|
||||||
- pip
|
|
||||||
- npm
|
|
||||||
- directories:
|
|
||||||
- $HOME/.cache/pip
|
|
||||||
language: python
|
|
||||||
python:
|
|
||||||
- "2.7"
|
|
||||||
before_install:
|
|
||||||
- "export DISPLAY=:99.0"
|
|
||||||
- "sh -e /etc/init.d/xvfb start"
|
|
||||||
- npm install less grunt-cli
|
|
||||||
- ( cd searx/static/themes/oscar;npm install; cd - )
|
|
||||||
install:
|
|
||||||
- ./manage.sh update_dev_packages
|
|
||||||
- pip install coveralls
|
|
||||||
script:
|
|
||||||
- ./manage.sh pep8_check
|
|
||||||
- ./manage.sh styles
|
|
||||||
- ./manage.sh grunt_build
|
|
||||||
- ./manage.sh py_test_coverage
|
|
||||||
- ./manage.sh robot_tests
|
|
||||||
after_success:
|
|
||||||
coveralls
|
|
||||||
notifications:
|
|
||||||
irc:
|
|
||||||
channels:
|
|
||||||
- "irc.freenode.org#searx"
|
|
||||||
template:
|
|
||||||
- "%{repository}/#%{build_number}/%{branch} (%{author}): %{message} %{build_url}"
|
|
||||||
on_success: change
|
|
|
@ -1,53 +0,0 @@
|
||||||
Searx was created by Adam Tauber and is maintained by Adam Tauber and Alexandre Flament.
|
|
||||||
|
|
||||||
Major contributing authors:
|
|
||||||
|
|
||||||
- Adam Tauber <asciimoo@gmail.com> `@asciimoo <https://github.com/asciimoo>`_
|
|
||||||
- Matej Cotman
|
|
||||||
- Thomas Pointhuber
|
|
||||||
- Alexandre Flament `@dalf <https://github.com/dalf>`_
|
|
||||||
- @Cqoicebordel
|
|
||||||
|
|
||||||
People who have submitted patches/translates, reported bugs, consulted features or
|
|
||||||
generally made searx better:
|
|
||||||
|
|
||||||
- Laszlo Hammerl
|
|
||||||
- Stefan Marsiske
|
|
||||||
- Gabor Nagy
|
|
||||||
- @pw3t
|
|
||||||
- @rhapsodhy
|
|
||||||
- András Veres-Szentkirályi
|
|
||||||
- Benjamin Sonntag
|
|
||||||
- @HLFH
|
|
||||||
- @TheRadialActive
|
|
||||||
- @Okhin
|
|
||||||
- André Koot
|
|
||||||
- Alejandro León Aznar
|
|
||||||
- rike
|
|
||||||
- dp
|
|
||||||
- Martin Zimmermann
|
|
||||||
- @courgette
|
|
||||||
- @kernc
|
|
||||||
- @Reventl0v
|
|
||||||
- Caner Başaran
|
|
||||||
- Benjamin Sonntag
|
|
||||||
- @opi
|
|
||||||
- @dimqua
|
|
||||||
- Giorgos Logiotatidis
|
|
||||||
- Luc Didry
|
|
||||||
- Niklas Haas
|
|
||||||
- @underr
|
|
||||||
- Emmanuel Benazera
|
|
||||||
- @GreenLunar
|
|
||||||
- Noemi Vanyi
|
|
||||||
- Kang-min Liu
|
|
||||||
- Kirill Isakov
|
|
||||||
- Guilhem Bonnefille
|
|
||||||
- Marc Abonce Seguin
|
|
||||||
|
|
||||||
- @jibe-b
|
|
||||||
- Christian Pietsch @pietsch
|
|
||||||
- @Maxqia
|
|
||||||
- Ashutosh Das @pyprism
|
|
||||||
- YuLun Shih @imZack
|
|
||||||
- Dmitry Mikhirev @mikhirev
|
|
|
@ -1,164 +0,0 @@
|
||||||
0.9.0 2016.05.24
|
|
||||||
================
|
|
||||||
|
|
||||||
- New search category: science
|
|
||||||
- New engines
|
|
||||||
|
|
||||||
- Wolframalpha (science)
|
|
||||||
- Frinkiac (images)
|
|
||||||
- Arch Linux (it)
|
|
||||||
- BASE - Bielefeld Academic Search Engine (science)
|
|
||||||
- Dokuwiki (general)
|
|
||||||
- Nyaa.se (files, images, music, video)
|
|
||||||
- Reddit (general, images, news, social media)
|
|
||||||
- Torrentz.eu (files, music, video)
|
|
||||||
- Tokyo Toshokan (files, music, video)
|
|
||||||
- F-Droid (files)
|
|
||||||
- Erowid (general)
|
|
||||||
- Bitbucket (it)
|
|
||||||
- GitLab (it)
|
|
||||||
- Geektimes (it)
|
|
||||||
- Habrahabr (it)
|
|
||||||
- New plugins
|
|
||||||
|
|
||||||
- Open links in new tab
|
|
||||||
- Vim hotkeys for better navigation
|
|
||||||
- Wikipedia/Mediawiki engine improvements
|
|
||||||
- Configurable instance name
|
|
||||||
- Configurable connection pool size
|
|
||||||
- Fixed broken google engine
|
|
||||||
- Better docker image
|
|
||||||
- Images in standard results
|
|
||||||
- Fixed and refactored user settings (Warning: backward incompatibility - you have to reset your custom engine preferences)
|
|
||||||
- Suspending engines on errors
|
|
||||||
- Simplified development/deployment tooling
|
|
||||||
- Translation updates
|
|
||||||
- Multilingual autocompleter
|
|
||||||
- Qwant autocompleter backend
|
|
||||||
|
|
||||||
0.8.1 2015.12.22
|
|
||||||
================
|
|
||||||
|
|
||||||
- More efficient result parsing
|
|
||||||
- Rewritten google engine to prevent app crashes
|
|
||||||
- Other engine fixes/tweaks
|
|
||||||
|
|
||||||
- Bing news
|
|
||||||
- Btdigg
|
|
||||||
- Gigablast
|
|
||||||
- Google images
|
|
||||||
- Startpage
|
|
||||||
|
|
||||||
|
|
||||||
News
|
|
||||||
~~~~
|
|
||||||
|
|
||||||
New documentation page is available: https://asciimoo.github.io/searx
|
|
||||||
|
|
||||||
|
|
||||||
0.8.0 2015.09.08
|
|
||||||
================
|
|
||||||
|
|
||||||
- New engines
|
|
||||||
|
|
||||||
- Blekko (image)
|
|
||||||
- Gigablast (general)
|
|
||||||
- Spotify (music)
|
|
||||||
- Swisscows (general, images)
|
|
||||||
- Qwant (general, images, news, social media)
|
|
||||||
- Plugin system
|
|
||||||
- New plugins
|
|
||||||
|
|
||||||
- HTTPS rewrite
|
|
||||||
- Search on cagetory select
|
|
||||||
- User information
|
|
||||||
- Tracker url part remover
|
|
||||||
- Multiple outgoing IP and HTTP/HTTPS proxy support
|
|
||||||
- New autocompleter: startpage
|
|
||||||
- New theme: pix-art
|
|
||||||
- Settings file structure change
|
|
||||||
- Fabfile, docker deployment
|
|
||||||
- Optional safesearch result filter
|
|
||||||
- Force HTTPS in engines if possible
|
|
||||||
- Disabled HTTP referrer on outgoing links
|
|
||||||
- Display cookie information
|
|
||||||
- Prettier search URLs
|
|
||||||
- Right-to-left text handling in themes
|
|
||||||
- Translation updates (New locales: Chinese, Hebrew, Portuguese, Romanian)
|
|
||||||
|
|
||||||
|
|
||||||
New dependencies
|
|
||||||
~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
- pyopenssl
|
|
||||||
- ndg-httpsclient
|
|
||||||
- pyasn1
|
|
||||||
- pyasn1-modules
|
|
||||||
- certifi
|
|
||||||
|
|
||||||
|
|
||||||
News
|
|
||||||
~~~~
|
|
||||||
|
|
||||||
@dalf joined the maintainer "team"
|
|
||||||
|
|
||||||
|
|
||||||
0.7.0 2015.02.03
|
|
||||||
================
|
|
||||||
|
|
||||||
- New engines
|
|
||||||
|
|
||||||
- Digg
|
|
||||||
- Google Play Store
|
|
||||||
- Deezer
|
|
||||||
- Btdigg
|
|
||||||
- Mixcloud
|
|
||||||
- 1px
|
|
||||||
- Image proxy
|
|
||||||
- Search speed improvements
|
|
||||||
- Autocompletition of engines, shortcuts and supported languages
|
|
||||||
- Translation updates (New locales: Turkish, Russian)
|
|
||||||
- Default theme changed to oscar
|
|
||||||
- Settings option to disable engines by default
|
|
||||||
- UI code cleanup and restructure
|
|
||||||
- Engine tests
|
|
||||||
- Multiple engine bug fixes and tweaks
|
|
||||||
- Config option to set default interface locale
|
|
||||||
- Flexible result template handling
|
|
||||||
- Application logging and sophisticated engine exception tracebacks
|
|
||||||
- Kickass torrent size display (oscar theme)
|
|
||||||
|
|
||||||
|
|
||||||
New dependencies
|
|
||||||
~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
- pygments - http://pygments.org/
|
|
||||||
|
|
||||||
|
|
||||||
0.6.0 - 2014.12.25
|
|
||||||
==================
|
|
||||||
|
|
||||||
- Changelog added
|
|
||||||
- New engines
|
|
||||||
|
|
||||||
- Flickr (api)
|
|
||||||
- Subtitleseeker
|
|
||||||
- photon
|
|
||||||
- 500px
|
|
||||||
- Searchcode
|
|
||||||
- Searchcode doc
|
|
||||||
- Kickass torrent
|
|
||||||
- Precise search request timeout handling
|
|
||||||
- Better favicon support
|
|
||||||
- Stricter config parsing
|
|
||||||
- Translation updates
|
|
||||||
- Multiple ui fixes
|
|
||||||
- Flickr (noapi) engine fix
|
|
||||||
- Pep8 fixes
|
|
||||||
|
|
||||||
|
|
||||||
News
|
|
||||||
~~~~
|
|
||||||
|
|
||||||
Health status of searx instances and engines: http://stats.searx.oe5tpo.com
|
|
||||||
(source: https://github.com/pointhi/searx_stats)
|
|
|
@ -1,54 +0,0 @@
|
||||||
FROM alpine:3.3
|
|
||||||
MAINTAINER searx <https://github.com/asciimoo/searx>
|
|
||||||
LABEL description "A privacy-respecting, hackable metasearch engine."
|
|
||||||
|
|
||||||
ENV BASE_URL=False IMAGE_PROXY=False
|
|
||||||
EXPOSE 8888
|
|
||||||
WORKDIR /usr/local/searx
|
|
||||||
CMD ["/usr/bin/tini","--","/usr/local/searx/run.sh"]
|
|
||||||
|
|
||||||
RUN adduser -D -h /usr/local/searx -s /bin/sh searx searx \
|
|
||||||
&& echo '#!/bin/sh' >> run.sh \
|
|
||||||
&& echo 'sed -i "s|base_url : False|base_url : $BASE_URL|g" searx/settings.yml' >> run.sh \
|
|
||||||
&& echo 'sed -i "s/image_proxy : False/image_proxy : $IMAGE_PROXY/g" searx/settings.yml' >> run.sh \
|
|
||||||
&& echo 'sed -i "s/ultrasecretkey/`openssl rand -hex 16`/g" searx/settings.yml' >> run.sh \
|
|
||||||
&& echo 'python searx/webapp.py' >> run.sh \
|
|
||||||
&& chmod +x run.sh
|
|
||||||
|
|
||||||
COPY requirements.txt ./requirements.txt
|
|
||||||
|
|
||||||
RUN echo "@commuedge http://nl.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories \
|
|
||||||
&& apk -U add \
|
|
||||||
build-base \
|
|
||||||
python \
|
|
||||||
python-dev \
|
|
||||||
py-pip \
|
|
||||||
libxml2 \
|
|
||||||
libxml2-dev \
|
|
||||||
libxslt \
|
|
||||||
libxslt-dev \
|
|
||||||
libffi-dev \
|
|
||||||
openssl \
|
|
||||||
openssl-dev \
|
|
||||||
ca-certificates \
|
|
||||||
tini@commuedge \
|
|
||||||
&& pip install --no-cache -r requirements.txt \
|
|
||||||
&& apk del \
|
|
||||||
build-base \
|
|
||||||
python-dev \
|
|
||||||
py-pip\
|
|
||||||
libffi-dev \
|
|
||||||
openssl-dev \
|
|
||||||
libxslt-dev \
|
|
||||||
libxml2-dev \
|
|
||||||
openssl-dev \
|
|
||||||
ca-certificates \
|
|
||||||
&& rm -f /var/cache/apk/*
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
RUN chown -R searx:searx *
|
|
||||||
|
|
||||||
USER searx
|
|
||||||
|
|
||||||
RUN sed -i "s/127.0.0.1/0.0.0.0/g" searx/settings.yml
|
|
661
sources/LICENSE
661
sources/LICENSE
|
@ -1,661 +0,0 @@
|
||||||
GNU AFFERO GENERAL PUBLIC LICENSE
|
|
||||||
Version 3, 19 November 2007
|
|
||||||
|
|
||||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
|
||||||
Everyone is permitted to copy and distribute verbatim copies
|
|
||||||
of this license document, but changing it is not allowed.
|
|
||||||
|
|
||||||
Preamble
|
|
||||||
|
|
||||||
The GNU Affero General Public License is a free, copyleft license for
|
|
||||||
software and other kinds of works, specifically designed to ensure
|
|
||||||
cooperation with the community in the case of network server software.
|
|
||||||
|
|
||||||
The licenses for most software and other practical works are designed
|
|
||||||
to take away your freedom to share and change the works. By contrast,
|
|
||||||
our General Public Licenses are intended to guarantee your freedom to
|
|
||||||
share and change all versions of a program--to make sure it remains free
|
|
||||||
software for all its users.
|
|
||||||
|
|
||||||
When we speak of free software, we are referring to freedom, not
|
|
||||||
price. Our General Public Licenses are designed to make sure that you
|
|
||||||
have the freedom to distribute copies of free software (and charge for
|
|
||||||
them if you wish), that you receive source code or can get it if you
|
|
||||||
want it, that you can change the software or use pieces of it in new
|
|
||||||
free programs, and that you know you can do these things.
|
|
||||||
|
|
||||||
Developers that use our General Public Licenses protect your rights
|
|
||||||
with two steps: (1) assert copyright on the software, and (2) offer
|
|
||||||
you this License which gives you legal permission to copy, distribute
|
|
||||||
and/or modify the software.
|
|
||||||
|
|
||||||
A secondary benefit of defending all users' freedom is that
|
|
||||||
improvements made in alternate versions of the program, if they
|
|
||||||
receive widespread use, become available for other developers to
|
|
||||||
incorporate. Many developers of free software are heartened and
|
|
||||||
encouraged by the resulting cooperation. However, in the case of
|
|
||||||
software used on network servers, this result may fail to come about.
|
|
||||||
The GNU General Public License permits making a modified version and
|
|
||||||
letting the public access it on a server without ever releasing its
|
|
||||||
source code to the public.
|
|
||||||
|
|
||||||
The GNU Affero General Public License is designed specifically to
|
|
||||||
ensure that, in such cases, the modified source code becomes available
|
|
||||||
to the community. It requires the operator of a network server to
|
|
||||||
provide the source code of the modified version running there to the
|
|
||||||
users of that server. Therefore, public use of a modified version, on
|
|
||||||
a publicly accessible server, gives the public access to the source
|
|
||||||
code of the modified version.
|
|
||||||
|
|
||||||
An older license, called the Affero General Public License and
|
|
||||||
published by Affero, was designed to accomplish similar goals. This is
|
|
||||||
a different license, not a version of the Affero GPL, but Affero has
|
|
||||||
released a new version of the Affero GPL which permits relicensing under
|
|
||||||
this license.
|
|
||||||
|
|
||||||
The precise terms and conditions for copying, distribution and
|
|
||||||
modification follow.
|
|
||||||
|
|
||||||
TERMS AND CONDITIONS
|
|
||||||
|
|
||||||
0. Definitions.
|
|
||||||
|
|
||||||
"This License" refers to version 3 of the GNU Affero General Public License.
|
|
||||||
|
|
||||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
|
||||||
works, such as semiconductor masks.
|
|
||||||
|
|
||||||
"The Program" refers to any copyrightable work licensed under this
|
|
||||||
License. Each licensee is addressed as "you". "Licensees" and
|
|
||||||
"recipients" may be individuals or organizations.
|
|
||||||
|
|
||||||
To "modify" a work means to copy from or adapt all or part of the work
|
|
||||||
in a fashion requiring copyright permission, other than the making of an
|
|
||||||
exact copy. The resulting work is called a "modified version" of the
|
|
||||||
earlier work or a work "based on" the earlier work.
|
|
||||||
|
|
||||||
A "covered work" means either the unmodified Program or a work based
|
|
||||||
on the Program.
|
|
||||||
|
|
||||||
To "propagate" a work means to do anything with it that, without
|
|
||||||
permission, would make you directly or secondarily liable for
|
|
||||||
infringement under applicable copyright law, except executing it on a
|
|
||||||
computer or modifying a private copy. Propagation includes copying,
|
|
||||||
distribution (with or without modification), making available to the
|
|
||||||
public, and in some countries other activities as well.
|
|
||||||
|
|
||||||
To "convey" a work means any kind of propagation that enables other
|
|
||||||
parties to make or receive copies. Mere interaction with a user through
|
|
||||||
a computer network, with no transfer of a copy, is not conveying.
|
|
||||||
|
|
||||||
An interactive user interface displays "Appropriate Legal Notices"
|
|
||||||
to the extent that it includes a convenient and prominently visible
|
|
||||||
feature that (1) displays an appropriate copyright notice, and (2)
|
|
||||||
tells the user that there is no warranty for the work (except to the
|
|
||||||
extent that warranties are provided), that licensees may convey the
|
|
||||||
work under this License, and how to view a copy of this License. If
|
|
||||||
the interface presents a list of user commands or options, such as a
|
|
||||||
menu, a prominent item in the list meets this criterion.
|
|
||||||
|
|
||||||
1. Source Code.
|
|
||||||
|
|
||||||
The "source code" for a work means the preferred form of the work
|
|
||||||
for making modifications to it. "Object code" means any non-source
|
|
||||||
form of a work.
|
|
||||||
|
|
||||||
A "Standard Interface" means an interface that either is an official
|
|
||||||
standard defined by a recognized standards body, or, in the case of
|
|
||||||
interfaces specified for a particular programming language, one that
|
|
||||||
is widely used among developers working in that language.
|
|
||||||
|
|
||||||
The "System Libraries" of an executable work include anything, other
|
|
||||||
than the work as a whole, that (a) is included in the normal form of
|
|
||||||
packaging a Major Component, but which is not part of that Major
|
|
||||||
Component, and (b) serves only to enable use of the work with that
|
|
||||||
Major Component, or to implement a Standard Interface for which an
|
|
||||||
implementation is available to the public in source code form. A
|
|
||||||
"Major Component", in this context, means a major essential component
|
|
||||||
(kernel, window system, and so on) of the specific operating system
|
|
||||||
(if any) on which the executable work runs, or a compiler used to
|
|
||||||
produce the work, or an object code interpreter used to run it.
|
|
||||||
|
|
||||||
The "Corresponding Source" for a work in object code form means all
|
|
||||||
the source code needed to generate, install, and (for an executable
|
|
||||||
work) run the object code and to modify the work, including scripts to
|
|
||||||
control those activities. However, it does not include the work's
|
|
||||||
System Libraries, or general-purpose tools or generally available free
|
|
||||||
programs which are used unmodified in performing those activities but
|
|
||||||
which are not part of the work. For example, Corresponding Source
|
|
||||||
includes interface definition files associated with source files for
|
|
||||||
the work, and the source code for shared libraries and dynamically
|
|
||||||
linked subprograms that the work is specifically designed to require,
|
|
||||||
such as by intimate data communication or control flow between those
|
|
||||||
subprograms and other parts of the work.
|
|
||||||
|
|
||||||
The Corresponding Source need not include anything that users
|
|
||||||
can regenerate automatically from other parts of the Corresponding
|
|
||||||
Source.
|
|
||||||
|
|
||||||
The Corresponding Source for a work in source code form is that
|
|
||||||
same work.
|
|
||||||
|
|
||||||
2. Basic Permissions.
|
|
||||||
|
|
||||||
All rights granted under this License are granted for the term of
|
|
||||||
copyright on the Program, and are irrevocable provided the stated
|
|
||||||
conditions are met. This License explicitly affirms your unlimited
|
|
||||||
permission to run the unmodified Program. The output from running a
|
|
||||||
covered work is covered by this License only if the output, given its
|
|
||||||
content, constitutes a covered work. This License acknowledges your
|
|
||||||
rights of fair use or other equivalent, as provided by copyright law.
|
|
||||||
|
|
||||||
You may make, run and propagate covered works that you do not
|
|
||||||
convey, without conditions so long as your license otherwise remains
|
|
||||||
in force. You may convey covered works to others for the sole purpose
|
|
||||||
of having them make modifications exclusively for you, or provide you
|
|
||||||
with facilities for running those works, provided that you comply with
|
|
||||||
the terms of this License in conveying all material for which you do
|
|
||||||
not control copyright. Those thus making or running the covered works
|
|
||||||
for you must do so exclusively on your behalf, under your direction
|
|
||||||
and control, on terms that prohibit them from making any copies of
|
|
||||||
your copyrighted material outside their relationship with you.
|
|
||||||
|
|
||||||
Conveying under any other circumstances is permitted solely under
|
|
||||||
the conditions stated below. Sublicensing is not allowed; section 10
|
|
||||||
makes it unnecessary.
|
|
||||||
|
|
||||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
|
||||||
|
|
||||||
No covered work shall be deemed part of an effective technological
|
|
||||||
measure under any applicable law fulfilling obligations under article
|
|
||||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
|
||||||
similar laws prohibiting or restricting circumvention of such
|
|
||||||
measures.
|
|
||||||
|
|
||||||
When you convey a covered work, you waive any legal power to forbid
|
|
||||||
circumvention of technological measures to the extent such circumvention
|
|
||||||
is effected by exercising rights under this License with respect to
|
|
||||||
the covered work, and you disclaim any intention to limit operation or
|
|
||||||
modification of the work as a means of enforcing, against the work's
|
|
||||||
users, your or third parties' legal rights to forbid circumvention of
|
|
||||||
technological measures.
|
|
||||||
|
|
||||||
4. Conveying Verbatim Copies.
|
|
||||||
|
|
||||||
You may convey verbatim copies of the Program's source code as you
|
|
||||||
receive it, in any medium, provided that you conspicuously and
|
|
||||||
appropriately publish on each copy an appropriate copyright notice;
|
|
||||||
keep intact all notices stating that this License and any
|
|
||||||
non-permissive terms added in accord with section 7 apply to the code;
|
|
||||||
keep intact all notices of the absence of any warranty; and give all
|
|
||||||
recipients a copy of this License along with the Program.
|
|
||||||
|
|
||||||
You may charge any price or no price for each copy that you convey,
|
|
||||||
and you may offer support or warranty protection for a fee.
|
|
||||||
|
|
||||||
5. Conveying Modified Source Versions.
|
|
||||||
|
|
||||||
You may convey a work based on the Program, or the modifications to
|
|
||||||
produce it from the Program, in the form of source code under the
|
|
||||||
terms of section 4, provided that you also meet all of these conditions:
|
|
||||||
|
|
||||||
a) The work must carry prominent notices stating that you modified
|
|
||||||
it, and giving a relevant date.
|
|
||||||
|
|
||||||
b) The work must carry prominent notices stating that it is
|
|
||||||
released under this License and any conditions added under section
|
|
||||||
7. This requirement modifies the requirement in section 4 to
|
|
||||||
"keep intact all notices".
|
|
||||||
|
|
||||||
c) You must license the entire work, as a whole, under this
|
|
||||||
License to anyone who comes into possession of a copy. This
|
|
||||||
License will therefore apply, along with any applicable section 7
|
|
||||||
additional terms, to the whole of the work, and all its parts,
|
|
||||||
regardless of how they are packaged. This License gives no
|
|
||||||
permission to license the work in any other way, but it does not
|
|
||||||
invalidate such permission if you have separately received it.
|
|
||||||
|
|
||||||
d) If the work has interactive user interfaces, each must display
|
|
||||||
Appropriate Legal Notices; however, if the Program has interactive
|
|
||||||
interfaces that do not display Appropriate Legal Notices, your
|
|
||||||
work need not make them do so.
|
|
||||||
|
|
||||||
A compilation of a covered work with other separate and independent
|
|
||||||
works, which are not by their nature extensions of the covered work,
|
|
||||||
and which are not combined with it such as to form a larger program,
|
|
||||||
in or on a volume of a storage or distribution medium, is called an
|
|
||||||
"aggregate" if the compilation and its resulting copyright are not
|
|
||||||
used to limit the access or legal rights of the compilation's users
|
|
||||||
beyond what the individual works permit. Inclusion of a covered work
|
|
||||||
in an aggregate does not cause this License to apply to the other
|
|
||||||
parts of the aggregate.
|
|
||||||
|
|
||||||
6. Conveying Non-Source Forms.
|
|
||||||
|
|
||||||
You may convey a covered work in object code form under the terms
|
|
||||||
of sections 4 and 5, provided that you also convey the
|
|
||||||
machine-readable Corresponding Source under the terms of this License,
|
|
||||||
in one of these ways:
|
|
||||||
|
|
||||||
a) Convey the object code in, or embodied in, a physical product
|
|
||||||
(including a physical distribution medium), accompanied by the
|
|
||||||
Corresponding Source fixed on a durable physical medium
|
|
||||||
customarily used for software interchange.
|
|
||||||
|
|
||||||
b) Convey the object code in, or embodied in, a physical product
|
|
||||||
(including a physical distribution medium), accompanied by a
|
|
||||||
written offer, valid for at least three years and valid for as
|
|
||||||
long as you offer spare parts or customer support for that product
|
|
||||||
model, to give anyone who possesses the object code either (1) a
|
|
||||||
copy of the Corresponding Source for all the software in the
|
|
||||||
product that is covered by this License, on a durable physical
|
|
||||||
medium customarily used for software interchange, for a price no
|
|
||||||
more than your reasonable cost of physically performing this
|
|
||||||
conveying of source, or (2) access to copy the
|
|
||||||
Corresponding Source from a network server at no charge.
|
|
||||||
|
|
||||||
c) Convey individual copies of the object code with a copy of the
|
|
||||||
written offer to provide the Corresponding Source. This
|
|
||||||
alternative is allowed only occasionally and noncommercially, and
|
|
||||||
only if you received the object code with such an offer, in accord
|
|
||||||
with subsection 6b.
|
|
||||||
|
|
||||||
d) Convey the object code by offering access from a designated
|
|
||||||
place (gratis or for a charge), and offer equivalent access to the
|
|
||||||
Corresponding Source in the same way through the same place at no
|
|
||||||
further charge. You need not require recipients to copy the
|
|
||||||
Corresponding Source along with the object code. If the place to
|
|
||||||
copy the object code is a network server, the Corresponding Source
|
|
||||||
may be on a different server (operated by you or a third party)
|
|
||||||
that supports equivalent copying facilities, provided you maintain
|
|
||||||
clear directions next to the object code saying where to find the
|
|
||||||
Corresponding Source. Regardless of what server hosts the
|
|
||||||
Corresponding Source, you remain obligated to ensure that it is
|
|
||||||
available for as long as needed to satisfy these requirements.
|
|
||||||
|
|
||||||
e) Convey the object code using peer-to-peer transmission, provided
|
|
||||||
you inform other peers where the object code and Corresponding
|
|
||||||
Source of the work are being offered to the general public at no
|
|
||||||
charge under subsection 6d.
|
|
||||||
|
|
||||||
A separable portion of the object code, whose source code is excluded
|
|
||||||
from the Corresponding Source as a System Library, need not be
|
|
||||||
included in conveying the object code work.
|
|
||||||
|
|
||||||
A "User Product" is either (1) a "consumer product", which means any
|
|
||||||
tangible personal property which is normally used for personal, family,
|
|
||||||
or household purposes, or (2) anything designed or sold for incorporation
|
|
||||||
into a dwelling. In determining whether a product is a consumer product,
|
|
||||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
|
||||||
product received by a particular user, "normally used" refers to a
|
|
||||||
typical or common use of that class of product, regardless of the status
|
|
||||||
of the particular user or of the way in which the particular user
|
|
||||||
actually uses, or expects or is expected to use, the product. A product
|
|
||||||
is a consumer product regardless of whether the product has substantial
|
|
||||||
commercial, industrial or non-consumer uses, unless such uses represent
|
|
||||||
the only significant mode of use of the product.
|
|
||||||
|
|
||||||
"Installation Information" for a User Product means any methods,
|
|
||||||
procedures, authorization keys, or other information required to install
|
|
||||||
and execute modified versions of a covered work in that User Product from
|
|
||||||
a modified version of its Corresponding Source. The information must
|
|
||||||
suffice to ensure that the continued functioning of the modified object
|
|
||||||
code is in no case prevented or interfered with solely because
|
|
||||||
modification has been made.
|
|
||||||
|
|
||||||
If you convey an object code work under this section in, or with, or
|
|
||||||
specifically for use in, a User Product, and the conveying occurs as
|
|
||||||
part of a transaction in which the right of possession and use of the
|
|
||||||
User Product is transferred to the recipient in perpetuity or for a
|
|
||||||
fixed term (regardless of how the transaction is characterized), the
|
|
||||||
Corresponding Source conveyed under this section must be accompanied
|
|
||||||
by the Installation Information. But this requirement does not apply
|
|
||||||
if neither you nor any third party retains the ability to install
|
|
||||||
modified object code on the User Product (for example, the work has
|
|
||||||
been installed in ROM).
|
|
||||||
|
|
||||||
The requirement to provide Installation Information does not include a
|
|
||||||
requirement to continue to provide support service, warranty, or updates
|
|
||||||
for a work that has been modified or installed by the recipient, or for
|
|
||||||
the User Product in which it has been modified or installed. Access to a
|
|
||||||
network may be denied when the modification itself materially and
|
|
||||||
adversely affects the operation of the network or violates the rules and
|
|
||||||
protocols for communication across the network.
|
|
||||||
|
|
||||||
Corresponding Source conveyed, and Installation Information provided,
|
|
||||||
in accord with this section must be in a format that is publicly
|
|
||||||
documented (and with an implementation available to the public in
|
|
||||||
source code form), and must require no special password or key for
|
|
||||||
unpacking, reading or copying.
|
|
||||||
|
|
||||||
7. Additional Terms.
|
|
||||||
|
|
||||||
"Additional permissions" are terms that supplement the terms of this
|
|
||||||
License by making exceptions from one or more of its conditions.
|
|
||||||
Additional permissions that are applicable to the entire Program shall
|
|
||||||
be treated as though they were included in this License, to the extent
|
|
||||||
that they are valid under applicable law. If additional permissions
|
|
||||||
apply only to part of the Program, that part may be used separately
|
|
||||||
under those permissions, but the entire Program remains governed by
|
|
||||||
this License without regard to the additional permissions.
|
|
||||||
|
|
||||||
When you convey a copy of a covered work, you may at your option
|
|
||||||
remove any additional permissions from that copy, or from any part of
|
|
||||||
it. (Additional permissions may be written to require their own
|
|
||||||
removal in certain cases when you modify the work.) You may place
|
|
||||||
additional permissions on material, added by you to a covered work,
|
|
||||||
for which you have or can give appropriate copyright permission.
|
|
||||||
|
|
||||||
Notwithstanding any other provision of this License, for material you
|
|
||||||
add to a covered work, you may (if authorized by the copyright holders of
|
|
||||||
that material) supplement the terms of this License with terms:
|
|
||||||
|
|
||||||
a) Disclaiming warranty or limiting liability differently from the
|
|
||||||
terms of sections 15 and 16 of this License; or
|
|
||||||
|
|
||||||
b) Requiring preservation of specified reasonable legal notices or
|
|
||||||
author attributions in that material or in the Appropriate Legal
|
|
||||||
Notices displayed by works containing it; or
|
|
||||||
|
|
||||||
c) Prohibiting misrepresentation of the origin of that material, or
|
|
||||||
requiring that modified versions of such material be marked in
|
|
||||||
reasonable ways as different from the original version; or
|
|
||||||
|
|
||||||
d) Limiting the use for publicity purposes of names of licensors or
|
|
||||||
authors of the material; or
|
|
||||||
|
|
||||||
e) Declining to grant rights under trademark law for use of some
|
|
||||||
trade names, trademarks, or service marks; or
|
|
||||||
|
|
||||||
f) Requiring indemnification of licensors and authors of that
|
|
||||||
material by anyone who conveys the material (or modified versions of
|
|
||||||
it) with contractual assumptions of liability to the recipient, for
|
|
||||||
any liability that these contractual assumptions directly impose on
|
|
||||||
those licensors and authors.
|
|
||||||
|
|
||||||
All other non-permissive additional terms are considered "further
|
|
||||||
restrictions" within the meaning of section 10. If the Program as you
|
|
||||||
received it, or any part of it, contains a notice stating that it is
|
|
||||||
governed by this License along with a term that is a further
|
|
||||||
restriction, you may remove that term. If a license document contains
|
|
||||||
a further restriction but permits relicensing or conveying under this
|
|
||||||
License, you may add to a covered work material governed by the terms
|
|
||||||
of that license document, provided that the further restriction does
|
|
||||||
not survive such relicensing or conveying.
|
|
||||||
|
|
||||||
If you add terms to a covered work in accord with this section, you
|
|
||||||
must place, in the relevant source files, a statement of the
|
|
||||||
additional terms that apply to those files, or a notice indicating
|
|
||||||
where to find the applicable terms.
|
|
||||||
|
|
||||||
Additional terms, permissive or non-permissive, may be stated in the
|
|
||||||
form of a separately written license, or stated as exceptions;
|
|
||||||
the above requirements apply either way.
|
|
||||||
|
|
||||||
8. Termination.
|
|
||||||
|
|
||||||
You may not propagate or modify a covered work except as expressly
|
|
||||||
provided under this License. Any attempt otherwise to propagate or
|
|
||||||
modify it is void, and will automatically terminate your rights under
|
|
||||||
this License (including any patent licenses granted under the third
|
|
||||||
paragraph of section 11).
|
|
||||||
|
|
||||||
However, if you cease all violation of this License, then your
|
|
||||||
license from a particular copyright holder is reinstated (a)
|
|
||||||
provisionally, unless and until the copyright holder explicitly and
|
|
||||||
finally terminates your license, and (b) permanently, if the copyright
|
|
||||||
holder fails to notify you of the violation by some reasonable means
|
|
||||||
prior to 60 days after the cessation.
|
|
||||||
|
|
||||||
Moreover, your license from a particular copyright holder is
|
|
||||||
reinstated permanently if the copyright holder notifies you of the
|
|
||||||
violation by some reasonable means, this is the first time you have
|
|
||||||
received notice of violation of this License (for any work) from that
|
|
||||||
copyright holder, and you cure the violation prior to 30 days after
|
|
||||||
your receipt of the notice.
|
|
||||||
|
|
||||||
Termination of your rights under this section does not terminate the
|
|
||||||
licenses of parties who have received copies or rights from you under
|
|
||||||
this License. If your rights have been terminated and not permanently
|
|
||||||
reinstated, you do not qualify to receive new licenses for the same
|
|
||||||
material under section 10.
|
|
||||||
|
|
||||||
9. Acceptance Not Required for Having Copies.
|
|
||||||
|
|
||||||
You are not required to accept this License in order to receive or
|
|
||||||
run a copy of the Program. Ancillary propagation of a covered work
|
|
||||||
occurring solely as a consequence of using peer-to-peer transmission
|
|
||||||
to receive a copy likewise does not require acceptance. However,
|
|
||||||
nothing other than this License grants you permission to propagate or
|
|
||||||
modify any covered work. These actions infringe copyright if you do
|
|
||||||
not accept this License. Therefore, by modifying or propagating a
|
|
||||||
covered work, you indicate your acceptance of this License to do so.
|
|
||||||
|
|
||||||
10. Automatic Licensing of Downstream Recipients.
|
|
||||||
|
|
||||||
Each time you convey a covered work, the recipient automatically
|
|
||||||
receives a license from the original licensors, to run, modify and
|
|
||||||
propagate that work, subject to this License. You are not responsible
|
|
||||||
for enforcing compliance by third parties with this License.
|
|
||||||
|
|
||||||
An "entity transaction" is a transaction transferring control of an
|
|
||||||
organization, or substantially all assets of one, or subdividing an
|
|
||||||
organization, or merging organizations. If propagation of a covered
|
|
||||||
work results from an entity transaction, each party to that
|
|
||||||
transaction who receives a copy of the work also receives whatever
|
|
||||||
licenses to the work the party's predecessor in interest had or could
|
|
||||||
give under the previous paragraph, plus a right to possession of the
|
|
||||||
Corresponding Source of the work from the predecessor in interest, if
|
|
||||||
the predecessor has it or can get it with reasonable efforts.
|
|
||||||
|
|
||||||
You may not impose any further restrictions on the exercise of the
|
|
||||||
rights granted or affirmed under this License. For example, you may
|
|
||||||
not impose a license fee, royalty, or other charge for exercise of
|
|
||||||
rights granted under this License, and you may not initiate litigation
|
|
||||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
||||||
any patent claim is infringed by making, using, selling, offering for
|
|
||||||
sale, or importing the Program or any portion of it.
|
|
||||||
|
|
||||||
11. Patents.
|
|
||||||
|
|
||||||
A "contributor" is a copyright holder who authorizes use under this
|
|
||||||
License of the Program or a work on which the Program is based. The
|
|
||||||
work thus licensed is called the contributor's "contributor version".
|
|
||||||
|
|
||||||
A contributor's "essential patent claims" are all patent claims
|
|
||||||
owned or controlled by the contributor, whether already acquired or
|
|
||||||
hereafter acquired, that would be infringed by some manner, permitted
|
|
||||||
by this License, of making, using, or selling its contributor version,
|
|
||||||
but do not include claims that would be infringed only as a
|
|
||||||
consequence of further modification of the contributor version. For
|
|
||||||
purposes of this definition, "control" includes the right to grant
|
|
||||||
patent sublicenses in a manner consistent with the requirements of
|
|
||||||
this License.
|
|
||||||
|
|
||||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
|
||||||
patent license under the contributor's essential patent claims, to
|
|
||||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
|
||||||
propagate the contents of its contributor version.
|
|
||||||
|
|
||||||
In the following three paragraphs, a "patent license" is any express
|
|
||||||
agreement or commitment, however denominated, not to enforce a patent
|
|
||||||
(such as an express permission to practice a patent or covenant not to
|
|
||||||
sue for patent infringement). To "grant" such a patent license to a
|
|
||||||
party means to make such an agreement or commitment not to enforce a
|
|
||||||
patent against the party.
|
|
||||||
|
|
||||||
If you convey a covered work, knowingly relying on a patent license,
|
|
||||||
and the Corresponding Source of the work is not available for anyone
|
|
||||||
to copy, free of charge and under the terms of this License, through a
|
|
||||||
publicly available network server or other readily accessible means,
|
|
||||||
then you must either (1) cause the Corresponding Source to be so
|
|
||||||
available, or (2) arrange to deprive yourself of the benefit of the
|
|
||||||
patent license for this particular work, or (3) arrange, in a manner
|
|
||||||
consistent with the requirements of this License, to extend the patent
|
|
||||||
license to downstream recipients. "Knowingly relying" means you have
|
|
||||||
actual knowledge that, but for the patent license, your conveying the
|
|
||||||
covered work in a country, or your recipient's use of the covered work
|
|
||||||
in a country, would infringe one or more identifiable patents in that
|
|
||||||
country that you have reason to believe are valid.
|
|
||||||
|
|
||||||
If, pursuant to or in connection with a single transaction or
|
|
||||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
|
||||||
covered work, and grant a patent license to some of the parties
|
|
||||||
receiving the covered work authorizing them to use, propagate, modify
|
|
||||||
or convey a specific copy of the covered work, then the patent license
|
|
||||||
you grant is automatically extended to all recipients of the covered
|
|
||||||
work and works based on it.
|
|
||||||
|
|
||||||
A patent license is "discriminatory" if it does not include within
|
|
||||||
the scope of its coverage, prohibits the exercise of, or is
|
|
||||||
conditioned on the non-exercise of one or more of the rights that are
|
|
||||||
specifically granted under this License. You may not convey a covered
|
|
||||||
work if you are a party to an arrangement with a third party that is
|
|
||||||
in the business of distributing software, under which you make payment
|
|
||||||
to the third party based on the extent of your activity of conveying
|
|
||||||
the work, and under which the third party grants, to any of the
|
|
||||||
parties who would receive the covered work from you, a discriminatory
|
|
||||||
patent license (a) in connection with copies of the covered work
|
|
||||||
conveyed by you (or copies made from those copies), or (b) primarily
|
|
||||||
for and in connection with specific products or compilations that
|
|
||||||
contain the covered work, unless you entered into that arrangement,
|
|
||||||
or that patent license was granted, prior to 28 March 2007.
|
|
||||||
|
|
||||||
Nothing in this License shall be construed as excluding or limiting
|
|
||||||
any implied license or other defenses to infringement that may
|
|
||||||
otherwise be available to you under applicable patent law.
|
|
||||||
|
|
||||||
12. No Surrender of Others' Freedom.
|
|
||||||
|
|
||||||
If conditions are imposed on you (whether by court order, agreement or
|
|
||||||
otherwise) that contradict the conditions of this License, they do not
|
|
||||||
excuse you from the conditions of this License. If you cannot convey a
|
|
||||||
covered work so as to satisfy simultaneously your obligations under this
|
|
||||||
License and any other pertinent obligations, then as a consequence you may
|
|
||||||
not convey it at all. For example, if you agree to terms that obligate you
|
|
||||||
to collect a royalty for further conveying from those to whom you convey
|
|
||||||
the Program, the only way you could satisfy both those terms and this
|
|
||||||
License would be to refrain entirely from conveying the Program.
|
|
||||||
|
|
||||||
13. Remote Network Interaction; Use with the GNU General Public License.
|
|
||||||
|
|
||||||
Notwithstanding any other provision of this License, if you modify the
|
|
||||||
Program, your modified version must prominently offer all users
|
|
||||||
interacting with it remotely through a computer network (if your version
|
|
||||||
supports such interaction) an opportunity to receive the Corresponding
|
|
||||||
Source of your version by providing access to the Corresponding Source
|
|
||||||
from a network server at no charge, through some standard or customary
|
|
||||||
means of facilitating copying of software. This Corresponding Source
|
|
||||||
shall include the Corresponding Source for any work covered by version 3
|
|
||||||
of the GNU General Public License that is incorporated pursuant to the
|
|
||||||
following paragraph.
|
|
||||||
|
|
||||||
Notwithstanding any other provision of this License, you have
|
|
||||||
permission to link or combine any covered work with a work licensed
|
|
||||||
under version 3 of the GNU General Public License into a single
|
|
||||||
combined work, and to convey the resulting work. The terms of this
|
|
||||||
License will continue to apply to the part which is the covered work,
|
|
||||||
but the work with which it is combined will remain governed by version
|
|
||||||
3 of the GNU General Public License.
|
|
||||||
|
|
||||||
14. Revised Versions of this License.
|
|
||||||
|
|
||||||
The Free Software Foundation may publish revised and/or new versions of
|
|
||||||
the GNU Affero General Public License from time to time. Such new versions
|
|
||||||
will be similar in spirit to the present version, but may differ in detail to
|
|
||||||
address new problems or concerns.
|
|
||||||
|
|
||||||
Each version is given a distinguishing version number. If the
|
|
||||||
Program specifies that a certain numbered version of the GNU Affero General
|
|
||||||
Public License "or any later version" applies to it, you have the
|
|
||||||
option of following the terms and conditions either of that numbered
|
|
||||||
version or of any later version published by the Free Software
|
|
||||||
Foundation. If the Program does not specify a version number of the
|
|
||||||
GNU Affero General Public License, you may choose any version ever published
|
|
||||||
by the Free Software Foundation.
|
|
||||||
|
|
||||||
If the Program specifies that a proxy can decide which future
|
|
||||||
versions of the GNU Affero General Public License can be used, that proxy's
|
|
||||||
public statement of acceptance of a version permanently authorizes you
|
|
||||||
to choose that version for the Program.
|
|
||||||
|
|
||||||
Later license versions may give you additional or different
|
|
||||||
permissions. However, no additional obligations are imposed on any
|
|
||||||
author or copyright holder as a result of your choosing to follow a
|
|
||||||
later version.
|
|
||||||
|
|
||||||
15. Disclaimer of Warranty.
|
|
||||||
|
|
||||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
|
||||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
|
||||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
|
||||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
|
||||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
|
||||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
|
||||||
|
|
||||||
16. Limitation of Liability.
|
|
||||||
|
|
||||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
|
||||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
|
||||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
|
||||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
|
||||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
|
||||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
|
||||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
|
||||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
|
||||||
SUCH DAMAGES.
|
|
||||||
|
|
||||||
17. Interpretation of Sections 15 and 16.
|
|
||||||
|
|
||||||
If the disclaimer of warranty and limitation of liability provided
|
|
||||||
above cannot be given local legal effect according to their terms,
|
|
||||||
reviewing courts shall apply local law that most closely approximates
|
|
||||||
an absolute waiver of all civil liability in connection with the
|
|
||||||
Program, unless a warranty or assumption of liability accompanies a
|
|
||||||
copy of the Program in return for a fee.
|
|
||||||
|
|
||||||
END OF TERMS AND CONDITIONS
|
|
||||||
|
|
||||||
How to Apply These Terms to Your New Programs
|
|
||||||
|
|
||||||
If you develop a new program, and you want it to be of the greatest
|
|
||||||
possible use to the public, the best way to achieve this is to make it
|
|
||||||
free software which everyone can redistribute and change under these terms.
|
|
||||||
|
|
||||||
To do so, attach the following notices to the program. It is safest
|
|
||||||
to attach them to the start of each source file to most effectively
|
|
||||||
state the exclusion of warranty; and each file should have at least
|
|
||||||
the "copyright" line and a pointer to where the full notice is found.
|
|
||||||
|
|
||||||
<one line to give the program's name and a brief idea of what it does.>
|
|
||||||
Copyright (C) <year> <name of author>
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU Affero General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU Affero General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Affero General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
|
|
||||||
Also add information on how to contact you by electronic and paper mail.
|
|
||||||
|
|
||||||
If your software can interact with users remotely through a computer
|
|
||||||
network, you should also make sure that it provides a way for users to
|
|
||||||
get its source. For example, if your program is a web application, its
|
|
||||||
interface could display a "Source" link that leads users to an archive
|
|
||||||
of the code. There are many ways you could offer source, and different
|
|
||||||
solutions will be better for different programs; see section 13 for the
|
|
||||||
specific requirements.
|
|
||||||
|
|
||||||
You should also get your employer (if you work as a programmer) or school,
|
|
||||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
|
||||||
For more information on this, and how to apply and follow the GNU AGPL, see
|
|
||||||
<http://www.gnu.org/licenses/>.
|
|
|
@ -1,45 +0,0 @@
|
||||||
searx
|
|
||||||
=====
|
|
||||||
|
|
||||||
A privacy-respecting, hackable `metasearch
|
|
||||||
engine <https://en.wikipedia.org/wiki/Metasearch_engine>`__.
|
|
||||||
|
|
||||||
List of `running
|
|
||||||
instances <https://github.com/asciimoo/searx/wiki/Searx-instances>`__.
|
|
||||||
|
|
||||||
See the `documentation <https://asciimoo.github.io/searx>`__ and the `wiki <https://github.com/asciimoo/searx/wiki>`__ for more information.
|
|
||||||
|
|
||||||
|Flattr searx|
|
|
||||||
|
|
||||||
Installation
|
|
||||||
~~~~~~~~~~~~
|
|
||||||
|
|
||||||
- clone source:
|
|
||||||
``git clone git@github.com:asciimoo/searx.git && cd searx``
|
|
||||||
- install dependencies: ``./manage.sh update_packages``
|
|
||||||
- edit your
|
|
||||||
`settings.yml <https://github.com/asciimoo/searx/blob/master/searx/settings.yml>`__
|
|
||||||
(set your ``secret_key``!)
|
|
||||||
- run ``python searx/webapp.py`` to start the application
|
|
||||||
|
|
||||||
For all the details, follow this `step by step
|
|
||||||
installation <https://github.com/asciimoo/searx/wiki/Installation>`__
|
|
||||||
|
|
||||||
Bugs
|
|
||||||
~~~~
|
|
||||||
|
|
||||||
Bugs or suggestions? Visit the `issue
|
|
||||||
tracker <https://github.com/asciimoo/searx/issues>`__.
|
|
||||||
|
|
||||||
`License <https://github.com/asciimoo/searx/blob/master/LICENSE>`__
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
More about searx
|
|
||||||
~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
- `ohloh <https://www.ohloh.net/p/searx/>`__
|
|
||||||
- `twitter <https://twitter.com/Searx_engine>`__
|
|
||||||
- IRC: #searx @ freenode
|
|
||||||
|
|
||||||
.. |Flattr searx| image:: http://api.flattr.com/button/flattr-badge-large.png
|
|
||||||
:target: https://flattr.com/submit/auto?user_id=asciimoo&url=https://github.com/asciimoo/searx&title=searx&language=&tags=github&category=software
|
|
|
@ -1,3 +0,0 @@
|
||||||
[python: **.py]
|
|
||||||
[jinja2: **/templates/**.html]
|
|
||||||
extensions=jinja2.ext.autoescape,jinja2.ext.with_
|
|
|
@ -1,25 +0,0 @@
|
||||||
|
|
||||||
categories = ['general'] # optional
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
'''pre-request callback
|
|
||||||
params<dict>:
|
|
||||||
method : POST/GET
|
|
||||||
headers : {}
|
|
||||||
data : {} # if method == POST
|
|
||||||
url : ''
|
|
||||||
category: 'search category'
|
|
||||||
pageno : 1 # number of the requested page
|
|
||||||
'''
|
|
||||||
|
|
||||||
params['url'] = 'https://host/%s' % query
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
'''post-response callback
|
|
||||||
resp: requests response object
|
|
||||||
'''
|
|
||||||
return [{'url': '', 'title': '', 'content': ''}]
|
|
||||||
|
|
|
@ -1,95 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
|
|
||||||
BASE_DIR=$(dirname `readlink -f $0`)
|
|
||||||
PYTHONPATH=$BASE_DIR
|
|
||||||
SEARX_DIR="$BASE_DIR/searx"
|
|
||||||
ACTION=$1
|
|
||||||
|
|
||||||
update_packages() {
|
|
||||||
pip install --upgrade -r "$BASE_DIR/requirements.txt"
|
|
||||||
}
|
|
||||||
|
|
||||||
update_dev_packages() {
|
|
||||||
update_packages
|
|
||||||
pip install --upgrade -r "$BASE_DIR/requirements-dev.txt"
|
|
||||||
}
|
|
||||||
|
|
||||||
pep8_check() {
|
|
||||||
echo '[!] Running pep8 check'
|
|
||||||
# ignored rules:
|
|
||||||
# E402 module level import not at top of file
|
|
||||||
# W503 line break before binary operator
|
|
||||||
pep8 --max-line-length=120 --ignore "E402,W503" "$SEARX_DIR" "$BASE_DIR/tests"
|
|
||||||
}
|
|
||||||
|
|
||||||
unit_tests() {
|
|
||||||
echo '[!] Running unit tests'
|
|
||||||
python -m nose2 -s "$BASE_DIR/tests/unit"
|
|
||||||
}
|
|
||||||
|
|
||||||
py_test_coverage() {
|
|
||||||
echo '[!] Running python test coverage'
|
|
||||||
PYTHONPATH=`pwd` python -m nose2 -C --coverage "$SEARX_DIR" -s "$BASE_DIR/tests/unit"
|
|
||||||
coverage report
|
|
||||||
coverage html
|
|
||||||
}
|
|
||||||
|
|
||||||
robot_tests() {
|
|
||||||
echo '[!] Running robot tests'
|
|
||||||
PYTHONPATH=`pwd` python "$SEARX_DIR/testing.py" robot
|
|
||||||
}
|
|
||||||
|
|
||||||
tests() {
|
|
||||||
set -e
|
|
||||||
pep8_check
|
|
||||||
unit_tests
|
|
||||||
robot_tests
|
|
||||||
set +e
|
|
||||||
}
|
|
||||||
|
|
||||||
build_style() {
|
|
||||||
lessc -x "$BASE_DIR/searx/static/$1" "$BASE_DIR/searx/static/$2"
|
|
||||||
}
|
|
||||||
|
|
||||||
styles() {
|
|
||||||
echo '[!] Building styles'
|
|
||||||
build_style themes/default/less/style.less themes/default/css/style.css
|
|
||||||
build_style themes/default/less/style-rtl.less themes/default/css/style-rtl.css
|
|
||||||
build_style themes/courgette/less/style.less themes/courgette/css/style.css
|
|
||||||
build_style themes/courgette/less/style-rtl.less themes/courgette/css/style-rtl.css
|
|
||||||
build_style less/bootstrap/bootstrap.less css/bootstrap.min.css
|
|
||||||
build_style themes/oscar/less/oscar/oscar.less themes/oscar/css/oscar.min.css
|
|
||||||
build_style themes/pix-art/less/style.less themes/pix-art/css/style.css
|
|
||||||
}
|
|
||||||
|
|
||||||
grunt_build() {
|
|
||||||
grunt --gruntfile "$SEARX_DIR/static/themes/oscar/gruntfile.js"
|
|
||||||
}
|
|
||||||
|
|
||||||
locales() {
|
|
||||||
pybabel compile -d "$SEARX_DIR/translations"
|
|
||||||
}
|
|
||||||
|
|
||||||
help() {
|
|
||||||
[ -z "$1" ] || printf "Error: $1\n"
|
|
||||||
echo "Searx manage.sh help
|
|
||||||
|
|
||||||
Commands
|
|
||||||
========
|
|
||||||
grunt_build - Build js files
|
|
||||||
help - This text
|
|
||||||
locales - Compile locales
|
|
||||||
pep8_check - Pep8 validation
|
|
||||||
py_test_coverage - Unit test coverage
|
|
||||||
robot_tests - Run selenium tests
|
|
||||||
styles - Build less files
|
|
||||||
tests - Run all python tests (pep8, unit, robot)
|
|
||||||
unit_tests - Run unit tests
|
|
||||||
update_dev_packages - Check & update development and production dependency changes
|
|
||||||
update_packages - Check & update dependency changes
|
|
||||||
"
|
|
||||||
}
|
|
||||||
|
|
||||||
[ "$(command -V "$ACTION" | grep ' function$')" = "" ] \
|
|
||||||
&& help "action not found" \
|
|
||||||
|| $ACTION
|
|
|
@ -1,10 +0,0 @@
|
||||||
babel==2.2.0
|
|
||||||
mock==1.0.1
|
|
||||||
nose2[coverage-plugin]
|
|
||||||
pep8==1.7.0
|
|
||||||
plone.testing==4.0.15
|
|
||||||
robotframework-selenium2library==1.7.4
|
|
||||||
robotsuite==1.7.0
|
|
||||||
transifex-client==0.11
|
|
||||||
unittest2==1.1.0
|
|
||||||
zope.testrunner==4.4.10
|
|
|
@ -1,12 +0,0 @@
|
||||||
certifi==2015.11.20.1
|
|
||||||
flask==0.10.1
|
|
||||||
flask-babel==0.9
|
|
||||||
lxml==3.5.0
|
|
||||||
ndg-httpsclient==0.4.0
|
|
||||||
pyasn1==0.1.9
|
|
||||||
pyasn1-modules==0.0.8
|
|
||||||
pygments==2.0.2
|
|
||||||
pyopenssl==0.15.1
|
|
||||||
python-dateutil==2.4.2
|
|
||||||
pyyaml==3.11
|
|
||||||
requests==2.9.1
|
|
|
@ -1,59 +0,0 @@
|
||||||
'''
|
|
||||||
searx is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU Affero General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
searx is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU Affero General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Affero General Public License
|
|
||||||
along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
|
||||||
|
|
||||||
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
|
|
||||||
'''
|
|
||||||
|
|
||||||
import certifi
|
|
||||||
import logging
|
|
||||||
from os import environ
|
|
||||||
from os.path import realpath, dirname, join, abspath
|
|
||||||
from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION
|
|
||||||
try:
|
|
||||||
from yaml import load
|
|
||||||
except:
|
|
||||||
from sys import exit, stderr
|
|
||||||
stderr.write('[E] install pyyaml\n')
|
|
||||||
exit(2)
|
|
||||||
|
|
||||||
searx_dir = abspath(dirname(__file__))
|
|
||||||
engine_dir = dirname(realpath(__file__))
|
|
||||||
|
|
||||||
# if possible set path to settings using the
|
|
||||||
# enviroment variable SEARX_SETTINGS_PATH
|
|
||||||
if 'SEARX_SETTINGS_PATH' in environ:
|
|
||||||
settings_path = environ['SEARX_SETTINGS_PATH']
|
|
||||||
# otherwise using default path
|
|
||||||
else:
|
|
||||||
settings_path = join(searx_dir, 'settings.yml')
|
|
||||||
|
|
||||||
# load settings
|
|
||||||
with open(settings_path) as settings_yaml:
|
|
||||||
settings = load(settings_yaml)
|
|
||||||
|
|
||||||
if settings.get('general', {}).get('debug'):
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
|
||||||
else:
|
|
||||||
logging.basicConfig(level=logging.WARNING)
|
|
||||||
|
|
||||||
logger = logging.getLogger('searx')
|
|
||||||
|
|
||||||
# Workaround for openssl versions <1.0.2
|
|
||||||
# https://github.com/certifi/python-certifi/issues/26
|
|
||||||
if OPENSSL_VERSION_INFO[0:3] < (1, 0, 2):
|
|
||||||
if hasattr(certifi, 'old_where'):
|
|
||||||
environ['REQUESTS_CA_BUNDLE'] = certifi.old_where()
|
|
||||||
logger.warning('You are using an old openssl version({0}), please upgrade above 1.0.2!'.format(OPENSSL_VERSION))
|
|
||||||
|
|
||||||
logger.info('Initialisation done')
|
|
|
@ -1,197 +0,0 @@
|
||||||
'''
|
|
||||||
searx is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU Affero General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
searx is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU Affero General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Affero General Public License
|
|
||||||
along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
|
||||||
|
|
||||||
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
from lxml import etree
|
|
||||||
from json import loads
|
|
||||||
from urllib import urlencode
|
|
||||||
from searx import settings
|
|
||||||
from searx.languages import language_codes
|
|
||||||
from searx.engines import (
|
|
||||||
categories, engines, engine_shortcuts
|
|
||||||
)
|
|
||||||
from searx.poolrequests import get as http_get
|
|
||||||
|
|
||||||
|
|
||||||
def get(*args, **kwargs):
|
|
||||||
if 'timeout' not in kwargs:
|
|
||||||
kwargs['timeout'] = settings['outgoing']['request_timeout']
|
|
||||||
|
|
||||||
return http_get(*args, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
def searx_bang(full_query):
|
|
||||||
'''check if the searchQuery contain a bang, and create fitting autocompleter results'''
|
|
||||||
# check if there is a query which can be parsed
|
|
||||||
if len(full_query.getSearchQuery()) == 0:
|
|
||||||
return []
|
|
||||||
|
|
||||||
results = []
|
|
||||||
|
|
||||||
# check if current query stats with !bang
|
|
||||||
first_char = full_query.getSearchQuery()[0]
|
|
||||||
if first_char == '!' or first_char == '?':
|
|
||||||
if len(full_query.getSearchQuery()) == 1:
|
|
||||||
# show some example queries
|
|
||||||
# TODO, check if engine is not avaliable
|
|
||||||
results.append(first_char + "images")
|
|
||||||
results.append(first_char + "wikipedia")
|
|
||||||
results.append(first_char + "osm")
|
|
||||||
else:
|
|
||||||
engine_query = full_query.getSearchQuery()[1:]
|
|
||||||
|
|
||||||
# check if query starts with categorie name
|
|
||||||
for categorie in categories:
|
|
||||||
if categorie.startswith(engine_query):
|
|
||||||
results.append(first_char + '{categorie}'.format(categorie=categorie))
|
|
||||||
|
|
||||||
# check if query starts with engine name
|
|
||||||
for engine in engines:
|
|
||||||
if engine.startswith(engine_query.replace('_', ' ')):
|
|
||||||
results.append(first_char + '{engine}'.format(engine=engine.replace(' ', '_')))
|
|
||||||
|
|
||||||
# check if query starts with engine shortcut
|
|
||||||
for engine_shortcut in engine_shortcuts:
|
|
||||||
if engine_shortcut.startswith(engine_query):
|
|
||||||
results.append(first_char + '{engine_shortcut}'.format(engine_shortcut=engine_shortcut))
|
|
||||||
|
|
||||||
# check if current query stats with :bang
|
|
||||||
elif first_char == ':':
|
|
||||||
if len(full_query.getSearchQuery()) == 1:
|
|
||||||
# show some example queries
|
|
||||||
results.append(":en")
|
|
||||||
results.append(":en_us")
|
|
||||||
results.append(":english")
|
|
||||||
results.append(":united_kingdom")
|
|
||||||
else:
|
|
||||||
engine_query = full_query.getSearchQuery()[1:]
|
|
||||||
|
|
||||||
for lc in language_codes:
|
|
||||||
lang_id, lang_name, country = map(str.lower, lc)
|
|
||||||
|
|
||||||
# check if query starts with language-id
|
|
||||||
if lang_id.startswith(engine_query):
|
|
||||||
if len(engine_query) <= 2:
|
|
||||||
results.append(':{lang_id}'.format(lang_id=lang_id.split('_')[0]))
|
|
||||||
else:
|
|
||||||
results.append(':{lang_id}'.format(lang_id=lang_id))
|
|
||||||
|
|
||||||
# check if query starts with language name
|
|
||||||
if lang_name.startswith(engine_query):
|
|
||||||
results.append(':{lang_name}'.format(lang_name=lang_name))
|
|
||||||
|
|
||||||
# check if query starts with country
|
|
||||||
if country.startswith(engine_query.replace('_', ' ')):
|
|
||||||
results.append(':{country}'.format(country=country.replace(' ', '_')))
|
|
||||||
|
|
||||||
# remove duplicates
|
|
||||||
result_set = set(results)
|
|
||||||
|
|
||||||
# remove results which are already contained in the query
|
|
||||||
for query_part in full_query.query_parts:
|
|
||||||
if query_part in result_set:
|
|
||||||
result_set.remove(query_part)
|
|
||||||
|
|
||||||
# convert result_set back to list
|
|
||||||
return list(result_set)
|
|
||||||
|
|
||||||
|
|
||||||
def dbpedia(query, lang):
|
|
||||||
# dbpedia autocompleter, no HTTPS
|
|
||||||
autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
|
|
||||||
|
|
||||||
response = get(autocomplete_url + urlencode(dict(QueryString=query)))
|
|
||||||
|
|
||||||
results = []
|
|
||||||
|
|
||||||
if response.ok:
|
|
||||||
dom = etree.fromstring(response.content)
|
|
||||||
results = dom.xpath('//a:Result/a:Label//text()',
|
|
||||||
namespaces={'a': 'http://lookup.dbpedia.org/'})
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def duckduckgo(query, lang):
|
|
||||||
# duckduckgo autocompleter
|
|
||||||
url = 'https://ac.duckduckgo.com/ac/?{0}&type=list'
|
|
||||||
|
|
||||||
resp = loads(get(url.format(urlencode(dict(q=query)))).text)
|
|
||||||
if len(resp) > 1:
|
|
||||||
return resp[1]
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def google(query, lang):
|
|
||||||
# google autocompleter
|
|
||||||
autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
|
|
||||||
|
|
||||||
response = get(autocomplete_url + urlencode(dict(hl=lang, q=query)))
|
|
||||||
|
|
||||||
results = []
|
|
||||||
|
|
||||||
if response.ok:
|
|
||||||
dom = etree.fromstring(response.text)
|
|
||||||
results = dom.xpath('//suggestion/@data')
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def startpage(query, lang):
|
|
||||||
# startpage autocompleter
|
|
||||||
url = 'https://startpage.com/do/suggest?{query}'
|
|
||||||
|
|
||||||
resp = get(url.format(query=urlencode({'query': query}))).text.split('\n')
|
|
||||||
if len(resp) > 1:
|
|
||||||
return resp
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def qwant(query, lang):
|
|
||||||
# qwant autocompleter (additional parameter : lang=en_en&count=xxx )
|
|
||||||
url = 'https://api.qwant.com/api/suggest?{query}'
|
|
||||||
|
|
||||||
resp = get(url.format(query=urlencode({'q': query, 'lang': lang})))
|
|
||||||
|
|
||||||
results = []
|
|
||||||
|
|
||||||
if resp.ok:
|
|
||||||
data = loads(resp.text)
|
|
||||||
if data['status'] == 'success':
|
|
||||||
for item in data['data']['items']:
|
|
||||||
results.append(item['value'])
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def wikipedia(query, lang):
|
|
||||||
# wikipedia autocompleter
|
|
||||||
url = 'https://' + lang + '.wikipedia.org/w/api.php?action=opensearch&{0}&limit=10&namespace=0&format=json'
|
|
||||||
|
|
||||||
resp = loads(get(url.format(urlencode(dict(search=query)))).text)
|
|
||||||
if len(resp) > 1:
|
|
||||||
return resp[1]
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
backends = {'dbpedia': dbpedia,
|
|
||||||
'duckduckgo': duckduckgo,
|
|
||||||
'google': google,
|
|
||||||
'startpage': startpage,
|
|
||||||
'qwant': qwant,
|
|
||||||
'wikipedia': wikipedia
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,202 +0,0 @@
|
||||||
|
|
||||||
'''
|
|
||||||
searx is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU Affero General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
searx is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU Affero General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Affero General Public License
|
|
||||||
along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
|
||||||
|
|
||||||
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
|
|
||||||
'''
|
|
||||||
|
|
||||||
from os.path import realpath, dirname, splitext, join
|
|
||||||
import sys
|
|
||||||
from imp import load_source
|
|
||||||
from flask.ext.babel import gettext
|
|
||||||
from operator import itemgetter
|
|
||||||
from searx import settings
|
|
||||||
from searx import logger
|
|
||||||
|
|
||||||
|
|
||||||
logger = logger.getChild('engines')
|
|
||||||
|
|
||||||
engine_dir = dirname(realpath(__file__))
|
|
||||||
|
|
||||||
engines = {}
|
|
||||||
|
|
||||||
categories = {'general': []}
|
|
||||||
|
|
||||||
engine_shortcuts = {}
|
|
||||||
engine_default_args = {'paging': False,
|
|
||||||
'categories': ['general'],
|
|
||||||
'language_support': True,
|
|
||||||
'safesearch': False,
|
|
||||||
'timeout': settings['outgoing']['request_timeout'],
|
|
||||||
'shortcut': '-',
|
|
||||||
'disabled': False,
|
|
||||||
'suspend_end_time': 0,
|
|
||||||
'continuous_errors': 0}
|
|
||||||
|
|
||||||
|
|
||||||
def load_module(filename):
|
|
||||||
modname = splitext(filename)[0]
|
|
||||||
if modname in sys.modules:
|
|
||||||
del sys.modules[modname]
|
|
||||||
filepath = join(engine_dir, filename)
|
|
||||||
module = load_source(modname, filepath)
|
|
||||||
module.name = modname
|
|
||||||
return module
|
|
||||||
|
|
||||||
|
|
||||||
def load_engine(engine_data):
|
|
||||||
engine_name = engine_data['engine']
|
|
||||||
engine = load_module(engine_name + '.py')
|
|
||||||
|
|
||||||
for param_name in engine_data:
|
|
||||||
if param_name == 'engine':
|
|
||||||
continue
|
|
||||||
if param_name == 'categories':
|
|
||||||
if engine_data['categories'] == 'none':
|
|
||||||
engine.categories = []
|
|
||||||
else:
|
|
||||||
engine.categories = map(
|
|
||||||
str.strip, engine_data['categories'].split(','))
|
|
||||||
continue
|
|
||||||
setattr(engine, param_name, engine_data[param_name])
|
|
||||||
|
|
||||||
for arg_name, arg_value in engine_default_args.iteritems():
|
|
||||||
if not hasattr(engine, arg_name):
|
|
||||||
setattr(engine, arg_name, arg_value)
|
|
||||||
|
|
||||||
# checking required variables
|
|
||||||
for engine_attr in dir(engine):
|
|
||||||
if engine_attr.startswith('_'):
|
|
||||||
continue
|
|
||||||
if getattr(engine, engine_attr) is None:
|
|
||||||
logger.error('Missing engine config attribute: "{0}.{1}"'
|
|
||||||
.format(engine.name, engine_attr))
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
engine.stats = {
|
|
||||||
'result_count': 0,
|
|
||||||
'search_count': 0,
|
|
||||||
'page_load_time': 0,
|
|
||||||
'score_count': 0,
|
|
||||||
'errors': 0
|
|
||||||
}
|
|
||||||
|
|
||||||
for category_name in engine.categories:
|
|
||||||
categories.setdefault(category_name, []).append(engine)
|
|
||||||
|
|
||||||
if engine.shortcut in engine_shortcuts:
|
|
||||||
logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut))
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
engine_shortcuts[engine.shortcut] = engine.name
|
|
||||||
|
|
||||||
return engine
|
|
||||||
|
|
||||||
|
|
||||||
def get_engines_stats():
|
|
||||||
# TODO refactor
|
|
||||||
pageloads = []
|
|
||||||
results = []
|
|
||||||
scores = []
|
|
||||||
errors = []
|
|
||||||
scores_per_result = []
|
|
||||||
|
|
||||||
max_pageload = max_results = max_score = max_errors = max_score_per_result = 0 # noqa
|
|
||||||
for engine in engines.values():
|
|
||||||
if engine.stats['search_count'] == 0:
|
|
||||||
continue
|
|
||||||
results_num = \
|
|
||||||
engine.stats['result_count'] / float(engine.stats['search_count'])
|
|
||||||
load_times = engine.stats['page_load_time'] / float(engine.stats['search_count']) # noqa
|
|
||||||
if results_num:
|
|
||||||
score = engine.stats['score_count'] / float(engine.stats['search_count']) # noqa
|
|
||||||
score_per_result = score / results_num
|
|
||||||
else:
|
|
||||||
score = score_per_result = 0.0
|
|
||||||
max_results = max(results_num, max_results)
|
|
||||||
max_pageload = max(load_times, max_pageload)
|
|
||||||
max_score = max(score, max_score)
|
|
||||||
max_score_per_result = max(score_per_result, max_score_per_result)
|
|
||||||
max_errors = max(max_errors, engine.stats['errors'])
|
|
||||||
pageloads.append({'avg': load_times, 'name': engine.name})
|
|
||||||
results.append({'avg': results_num, 'name': engine.name})
|
|
||||||
scores.append({'avg': score, 'name': engine.name})
|
|
||||||
errors.append({'avg': engine.stats['errors'], 'name': engine.name})
|
|
||||||
scores_per_result.append({
|
|
||||||
'avg': score_per_result,
|
|
||||||
'name': engine.name
|
|
||||||
})
|
|
||||||
|
|
||||||
for engine in pageloads:
|
|
||||||
if max_pageload:
|
|
||||||
engine['percentage'] = int(engine['avg'] / max_pageload * 100)
|
|
||||||
else:
|
|
||||||
engine['percentage'] = 0
|
|
||||||
|
|
||||||
for engine in results:
|
|
||||||
if max_results:
|
|
||||||
engine['percentage'] = int(engine['avg'] / max_results * 100)
|
|
||||||
else:
|
|
||||||
engine['percentage'] = 0
|
|
||||||
|
|
||||||
for engine in scores:
|
|
||||||
if max_score:
|
|
||||||
engine['percentage'] = int(engine['avg'] / max_score * 100)
|
|
||||||
else:
|
|
||||||
engine['percentage'] = 0
|
|
||||||
|
|
||||||
for engine in scores_per_result:
|
|
||||||
if max_score_per_result:
|
|
||||||
engine['percentage'] = int(engine['avg']
|
|
||||||
/ max_score_per_result * 100)
|
|
||||||
else:
|
|
||||||
engine['percentage'] = 0
|
|
||||||
|
|
||||||
for engine in errors:
|
|
||||||
if max_errors:
|
|
||||||
engine['percentage'] = int(float(engine['avg']) / max_errors * 100)
|
|
||||||
else:
|
|
||||||
engine['percentage'] = 0
|
|
||||||
|
|
||||||
return [
|
|
||||||
(
|
|
||||||
gettext('Page loads (sec)'),
|
|
||||||
sorted(pageloads, key=itemgetter('avg'))
|
|
||||||
),
|
|
||||||
(
|
|
||||||
gettext('Number of results'),
|
|
||||||
sorted(results, key=itemgetter('avg'), reverse=True)
|
|
||||||
),
|
|
||||||
(
|
|
||||||
gettext('Scores'),
|
|
||||||
sorted(scores, key=itemgetter('avg'), reverse=True)
|
|
||||||
),
|
|
||||||
(
|
|
||||||
gettext('Scores per result'),
|
|
||||||
sorted(scores_per_result, key=itemgetter('avg'), reverse=True)
|
|
||||||
),
|
|
||||||
(
|
|
||||||
gettext('Errors'),
|
|
||||||
sorted(errors, key=itemgetter('avg'), reverse=True)
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
if 'engines' not in settings or not settings['engines']:
|
|
||||||
logger.error('No engines found. Edit your settings.yml')
|
|
||||||
exit(2)
|
|
||||||
|
|
||||||
for engine_data in settings['engines']:
|
|
||||||
engine = load_engine(engine_data)
|
|
||||||
engines[engine.name] = engine
|
|
|
@ -1,141 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
Arch Linux Wiki
|
|
||||||
|
|
||||||
@website https://wiki.archlinux.org
|
|
||||||
@provide-api no (Mediawiki provides API, but Arch Wiki blocks access to it
|
|
||||||
@using-api no
|
|
||||||
@results HTML
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urlparse import urljoin
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['it']
|
|
||||||
language_support = True
|
|
||||||
paging = True
|
|
||||||
base_url = 'https://wiki.archlinux.org'
|
|
||||||
|
|
||||||
# xpath queries
|
|
||||||
xpath_results = '//ul[@class="mw-search-results"]/li'
|
|
||||||
xpath_link = './/div[@class="mw-search-result-heading"]/a'
|
|
||||||
|
|
||||||
|
|
||||||
# cut 'en' from 'en_US', 'de' from 'de_CH', and so on
|
|
||||||
def locale_to_lang_code(locale):
|
|
||||||
if locale.find('_') >= 0:
|
|
||||||
locale = locale.split('_')[0]
|
|
||||||
return locale
|
|
||||||
|
|
||||||
# wikis for some languages were moved off from the main site, we need to make
|
|
||||||
# requests to correct URLs to be able to get results in those languages
|
|
||||||
lang_urls = {
|
|
||||||
'all': {
|
|
||||||
'base': 'https://wiki.archlinux.org',
|
|
||||||
'search': '/index.php?title=Special:Search&offset={offset}&{query}'
|
|
||||||
},
|
|
||||||
'de': {
|
|
||||||
'base': 'https://wiki.archlinux.de',
|
|
||||||
'search': '/index.php?title=Spezial:Suche&offset={offset}&{query}'
|
|
||||||
},
|
|
||||||
'fr': {
|
|
||||||
'base': 'https://wiki.archlinux.fr',
|
|
||||||
'search': '/index.php?title=Spécial:Recherche&offset={offset}&{query}'
|
|
||||||
},
|
|
||||||
'ja': {
|
|
||||||
'base': 'https://wiki.archlinuxjp.org',
|
|
||||||
'search': '/index.php?title=特別:検索&offset={offset}&{query}'
|
|
||||||
},
|
|
||||||
'ro': {
|
|
||||||
'base': 'http://wiki.archlinux.ro',
|
|
||||||
'search': '/index.php?title=Special:Căutare&offset={offset}&{query}'
|
|
||||||
},
|
|
||||||
'tr': {
|
|
||||||
'base': 'http://archtr.org/wiki',
|
|
||||||
'search': '/index.php?title=Özel:Ara&offset={offset}&{query}'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# get base & search URLs for selected language
|
|
||||||
def get_lang_urls(language):
|
|
||||||
if language in lang_urls:
|
|
||||||
return lang_urls[language]
|
|
||||||
return lang_urls['all']
|
|
||||||
|
|
||||||
# Language names to build search requests for
|
|
||||||
# those languages which are hosted on the main site.
|
|
||||||
main_langs = {
|
|
||||||
'ar': 'العربية',
|
|
||||||
'bg': 'Български',
|
|
||||||
'cs': 'Česky',
|
|
||||||
'da': 'Dansk',
|
|
||||||
'el': 'Ελληνικά',
|
|
||||||
'es': 'Español',
|
|
||||||
'he': 'עברית',
|
|
||||||
'hr': 'Hrvatski',
|
|
||||||
'hu': 'Magyar',
|
|
||||||
'it': 'Italiano',
|
|
||||||
'ko': '한국어',
|
|
||||||
'lt': 'Lietuviškai',
|
|
||||||
'nl': 'Nederlands',
|
|
||||||
'pl': 'Polski',
|
|
||||||
'pt': 'Português',
|
|
||||||
'ru': 'Русский',
|
|
||||||
'sl': 'Slovenský',
|
|
||||||
'th': 'ไทย',
|
|
||||||
'uk': 'Українська',
|
|
||||||
'zh': '简体中文'
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
# translate the locale (e.g. 'en_US') to language code ('en')
|
|
||||||
language = locale_to_lang_code(params['language'])
|
|
||||||
|
|
||||||
# if our language is hosted on the main site, we need to add its name
|
|
||||||
# to the query in order to narrow the results to that language
|
|
||||||
if language in main_langs:
|
|
||||||
query += '(' + main_langs[language] + ')'
|
|
||||||
|
|
||||||
# prepare the request parameters
|
|
||||||
query = urlencode({'search': query})
|
|
||||||
offset = (params['pageno'] - 1) * 20
|
|
||||||
|
|
||||||
# get request URLs for our language of choice
|
|
||||||
urls = get_lang_urls(language)
|
|
||||||
search_url = urls['base'] + urls['search']
|
|
||||||
|
|
||||||
params['url'] = search_url.format(query=query, offset=offset)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
# get the base URL for the language in which request was made
|
|
||||||
language = locale_to_lang_code(resp.search_params['language'])
|
|
||||||
base_url = get_lang_urls(language)['base']
|
|
||||||
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in dom.xpath(xpath_results):
|
|
||||||
link = result.xpath(xpath_link)[0]
|
|
||||||
href = urljoin(base_url, link.attrib.get('href'))
|
|
||||||
title = escape(extract_text(link))
|
|
||||||
|
|
||||||
results.append({'url': href,
|
|
||||||
'title': title})
|
|
||||||
|
|
||||||
return results
|
|
|
@ -1,122 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
"""
|
|
||||||
BASE (Scholar publications)
|
|
||||||
|
|
||||||
@website https://base-search.net
|
|
||||||
@provide-api yes with authorization (https://api.base-search.net/)
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results XML
|
|
||||||
@stable ?
|
|
||||||
@parse url, title, publishedDate, content
|
|
||||||
More info on api: http://base-search.net/about/download/base_interface.pdf
|
|
||||||
"""
|
|
||||||
|
|
||||||
from lxml import etree
|
|
||||||
from urllib import urlencode
|
|
||||||
from searx.utils import searx_useragent
|
|
||||||
from cgi import escape
|
|
||||||
from datetime import datetime
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
categories = ['science']
|
|
||||||
|
|
||||||
base_url = 'https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi'\
|
|
||||||
+ '?func=PerformSearch&{query}&boost=oa&hits={hits}&offset={offset}'
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
paging = True
|
|
||||||
number_of_results = 10
|
|
||||||
|
|
||||||
# shortcuts for advanced search
|
|
||||||
shorcut_dict = {
|
|
||||||
# user-friendly keywords
|
|
||||||
'format:': 'dcformat:',
|
|
||||||
'author:': 'dccreator:',
|
|
||||||
'collection:': 'dccollection:',
|
|
||||||
'hdate:': 'dchdate:',
|
|
||||||
'contributor:': 'dccontributor:',
|
|
||||||
'coverage:': 'dccoverage:',
|
|
||||||
'date:': 'dcdate:',
|
|
||||||
'abstract:': 'dcdescription:',
|
|
||||||
'urls:': 'dcidentifier:',
|
|
||||||
'language:': 'dclanguage:',
|
|
||||||
'publisher:': 'dcpublisher:',
|
|
||||||
'relation:': 'dcrelation:',
|
|
||||||
'rights:': 'dcrights:',
|
|
||||||
'source:': 'dcsource:',
|
|
||||||
'subject:': 'dcsubject:',
|
|
||||||
'title:': 'dctitle:',
|
|
||||||
'type:': 'dcdctype:'
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
# replace shortcuts with API advanced search keywords
|
|
||||||
for key in shorcut_dict.keys():
|
|
||||||
query = re.sub(str(key), str(shorcut_dict[key]), query)
|
|
||||||
|
|
||||||
# basic search
|
|
||||||
offset = (params['pageno'] - 1) * number_of_results
|
|
||||||
|
|
||||||
string_args = dict(query=urlencode({'query': query}),
|
|
||||||
offset=offset,
|
|
||||||
hits=number_of_results)
|
|
||||||
|
|
||||||
params['url'] = base_url.format(**string_args)
|
|
||||||
|
|
||||||
params['headers']['User-Agent'] = searx_useragent()
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_results = etree.XML(resp.content)
|
|
||||||
|
|
||||||
for entry in search_results.xpath('./result/doc'):
|
|
||||||
content = "No description available"
|
|
||||||
|
|
||||||
date = datetime.now() # needed in case no dcdate is available for an item
|
|
||||||
for item in entry:
|
|
||||||
if item.attrib["name"] == "dchdate":
|
|
||||||
harvestDate = item.text
|
|
||||||
|
|
||||||
elif item.attrib["name"] == "dcdate":
|
|
||||||
date = item.text
|
|
||||||
|
|
||||||
elif item.attrib["name"] == "dctitle":
|
|
||||||
title = item.text
|
|
||||||
|
|
||||||
elif item.attrib["name"] == "dclink":
|
|
||||||
url = item.text
|
|
||||||
|
|
||||||
elif item.attrib["name"] == "dcdescription":
|
|
||||||
content = escape(item.text[:300])
|
|
||||||
if len(item.text) > 300:
|
|
||||||
content += "..."
|
|
||||||
|
|
||||||
# dates returned by the BASE API are not several formats
|
|
||||||
publishedDate = None
|
|
||||||
for date_format in ['%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%d', '%Y-%m', '%Y']:
|
|
||||||
try:
|
|
||||||
publishedDate = datetime.strptime(date, date_format)
|
|
||||||
break
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if publishedDate is not None:
|
|
||||||
res_dict = {'url': url,
|
|
||||||
'title': title,
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'content': content}
|
|
||||||
else:
|
|
||||||
res_dict = {'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content}
|
|
||||||
|
|
||||||
results.append(res_dict)
|
|
||||||
|
|
||||||
return results
|
|
|
@ -1,88 +0,0 @@
|
||||||
"""
|
|
||||||
Bing (Web)
|
|
||||||
|
|
||||||
@website https://www.bing.com
|
|
||||||
@provide-api yes (http://datamarket.azure.com/dataset/bing/search),
|
|
||||||
max. 5000 query/month
|
|
||||||
|
|
||||||
@using-api no (because of query limit)
|
|
||||||
@results HTML (using search portal)
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, content
|
|
||||||
|
|
||||||
@todo publishedDate
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from cgi import escape
|
|
||||||
from lxml import html
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['general']
|
|
||||||
paging = True
|
|
||||||
language_support = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://www.bing.com/'
|
|
||||||
search_string = 'search?{query}&first={offset}'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 10 + 1
|
|
||||||
|
|
||||||
if params['language'] == 'all':
|
|
||||||
language = 'en-US'
|
|
||||||
else:
|
|
||||||
language = params['language'].replace('_', '-')
|
|
||||||
|
|
||||||
search_path = search_string.format(
|
|
||||||
query=urlencode({'q': query, 'setmkt': language}),
|
|
||||||
offset=offset)
|
|
||||||
|
|
||||||
params['cookies']['SRCHHPGUSR'] = \
|
|
||||||
'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0]
|
|
||||||
|
|
||||||
params['url'] = base_url + search_path
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
try:
|
|
||||||
results.append({'number_of_results': int(dom.xpath('//span[@class="sb_count"]/text()')[0]
|
|
||||||
.split()[0].replace(',', ''))})
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in dom.xpath('//div[@class="sa_cc"]'):
|
|
||||||
link = result.xpath('.//h3/a')[0]
|
|
||||||
url = link.attrib.get('href')
|
|
||||||
title = extract_text(link)
|
|
||||||
content = escape(extract_text(result.xpath('.//p')))
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# parse results again if nothing is found yet
|
|
||||||
for result in dom.xpath('//li[@class="b_algo"]'):
|
|
||||||
link = result.xpath('.//h2/a')[0]
|
|
||||||
url = link.attrib.get('href')
|
|
||||||
title = extract_text(link)
|
|
||||||
content = escape(extract_text(result.xpath('.//p')))
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,98 +0,0 @@
|
||||||
"""
|
|
||||||
Bing (Images)
|
|
||||||
|
|
||||||
@website https://www.bing.com/images
|
|
||||||
@provide-api yes (http://datamarket.azure.com/dataset/bing/search),
|
|
||||||
max. 5000 query/month
|
|
||||||
|
|
||||||
@using-api no (because of query limit)
|
|
||||||
@results HTML (using search portal)
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, img_src
|
|
||||||
|
|
||||||
@todo currently there are up to 35 images receive per page,
|
|
||||||
because bing does not parse count=10.
|
|
||||||
limited response to 10 images
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
|
||||||
from json import loads
|
|
||||||
import re
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['images']
|
|
||||||
paging = True
|
|
||||||
safesearch = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://www.bing.com/'
|
|
||||||
search_string = 'images/search?{query}&count=10&first={offset}'
|
|
||||||
thumb_url = "https://www.bing.com/th?id={ihk}"
|
|
||||||
|
|
||||||
# safesearch definitions
|
|
||||||
safesearch_types = {2: 'STRICT',
|
|
||||||
1: 'DEMOTE',
|
|
||||||
0: 'OFF'}
|
|
||||||
|
|
||||||
|
|
||||||
_quote_keys_regex = re.compile('({|,)([a-z][a-z0-9]*):(")', re.I | re.U)
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 10 + 1
|
|
||||||
|
|
||||||
# required for cookie
|
|
||||||
if params['language'] == 'all':
|
|
||||||
language = 'en-US'
|
|
||||||
else:
|
|
||||||
language = params['language'].replace('_', '-')
|
|
||||||
|
|
||||||
search_path = search_string.format(
|
|
||||||
query=urlencode({'q': query}),
|
|
||||||
offset=offset)
|
|
||||||
|
|
||||||
params['cookies']['SRCHHPGUSR'] = \
|
|
||||||
'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0] +\
|
|
||||||
'&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
|
|
||||||
|
|
||||||
params['url'] = base_url + search_path
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in dom.xpath('//div[@class="dg_u"]/div'):
|
|
||||||
link = result.xpath('./a')[0]
|
|
||||||
|
|
||||||
# parse json-data (it is required to add a space, to make it parsable)
|
|
||||||
json_data = loads(_quote_keys_regex.sub(r'\1"\2": \3', link.attrib.get('m')))
|
|
||||||
|
|
||||||
title = link.attrib.get('t1')
|
|
||||||
ihk = link.attrib.get('ihk')
|
|
||||||
|
|
||||||
# url = 'http://' + link.attrib.get('t3')
|
|
||||||
url = json_data.get('surl')
|
|
||||||
img_src = json_data.get('imgurl')
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'template': 'images.html',
|
|
||||||
'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': '',
|
|
||||||
'thumbnail_src': thumb_url.format(ihk=ihk),
|
|
||||||
'img_src': img_src})
|
|
||||||
|
|
||||||
# TODO stop parsing if 10 images are found
|
|
||||||
if len(results) >= 10:
|
|
||||||
break
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,111 +0,0 @@
|
||||||
"""
|
|
||||||
Bing (News)
|
|
||||||
|
|
||||||
@website https://www.bing.com/news
|
|
||||||
@provide-api yes (http://datamarket.azure.com/dataset/bing/search),
|
|
||||||
max. 5000 query/month
|
|
||||||
|
|
||||||
@using-api no (because of query limit)
|
|
||||||
@results RSS (using search portal)
|
|
||||||
@stable yes (except perhaps for the images)
|
|
||||||
@parse url, title, content, publishedDate, thumbnail
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from urlparse import urlparse, parse_qsl
|
|
||||||
from datetime import datetime
|
|
||||||
from dateutil import parser
|
|
||||||
from lxml import etree
|
|
||||||
from searx.utils import list_get
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['news']
|
|
||||||
paging = True
|
|
||||||
language_support = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://www.bing.com/'
|
|
||||||
search_string = 'news/search?{query}&first={offset}&format=RSS'
|
|
||||||
|
|
||||||
|
|
||||||
# remove click
|
|
||||||
def url_cleanup(url_string):
|
|
||||||
parsed_url = urlparse(url_string)
|
|
||||||
if parsed_url.netloc == 'www.bing.com' and parsed_url.path == '/news/apiclick.aspx':
|
|
||||||
query = dict(parse_qsl(parsed_url.query))
|
|
||||||
return query.get('url', None)
|
|
||||||
return url_string
|
|
||||||
|
|
||||||
|
|
||||||
# replace the http://*bing4.com/th?id=... by https://www.bing.com/th?id=...
|
|
||||||
def image_url_cleanup(url_string):
|
|
||||||
parsed_url = urlparse(url_string)
|
|
||||||
if parsed_url.netloc.endswith('bing4.com') and parsed_url.path == '/th':
|
|
||||||
query = dict(parse_qsl(parsed_url.query))
|
|
||||||
return "https://www.bing.com/th?id=" + query.get('id')
|
|
||||||
return url_string
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 10 + 1
|
|
||||||
|
|
||||||
if params['language'] == 'all':
|
|
||||||
language = 'en-US'
|
|
||||||
else:
|
|
||||||
language = params['language'].replace('_', '-')
|
|
||||||
|
|
||||||
search_path = search_string.format(
|
|
||||||
query=urlencode({'q': query, 'setmkt': language}),
|
|
||||||
offset=offset)
|
|
||||||
|
|
||||||
params['url'] = base_url + search_path
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
rss = etree.fromstring(resp.content)
|
|
||||||
|
|
||||||
ns = rss.nsmap
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for item in rss.xpath('./channel/item'):
|
|
||||||
# url / title / content
|
|
||||||
url = url_cleanup(item.xpath('./link/text()')[0])
|
|
||||||
title = list_get(item.xpath('./title/text()'), 0, url)
|
|
||||||
content = list_get(item.xpath('./description/text()'), 0, '')
|
|
||||||
|
|
||||||
# publishedDate
|
|
||||||
publishedDate = list_get(item.xpath('./pubDate/text()'), 0)
|
|
||||||
try:
|
|
||||||
publishedDate = parser.parse(publishedDate, dayfirst=False)
|
|
||||||
except TypeError:
|
|
||||||
publishedDate = datetime.now()
|
|
||||||
except ValueError:
|
|
||||||
publishedDate = datetime.now()
|
|
||||||
|
|
||||||
# thumbnail
|
|
||||||
thumbnail = list_get(item.xpath('./News:Image/text()', namespaces=ns), 0)
|
|
||||||
if thumbnail is not None:
|
|
||||||
thumbnail = image_url_cleanup(thumbnail)
|
|
||||||
|
|
||||||
# append result
|
|
||||||
if thumbnail is not None:
|
|
||||||
results.append({'template': 'videos.html',
|
|
||||||
'url': url,
|
|
||||||
'title': title,
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'content': content,
|
|
||||||
'thumbnail': thumbnail})
|
|
||||||
else:
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,70 +0,0 @@
|
||||||
"""
|
|
||||||
Blekko (Images)
|
|
||||||
|
|
||||||
@website https://blekko.com
|
|
||||||
@provide-api yes (inofficial)
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes
|
|
||||||
@parse url, title, img_src
|
|
||||||
"""
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from urllib import urlencode
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['images']
|
|
||||||
paging = True
|
|
||||||
safesearch = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://blekko.com'
|
|
||||||
search_url = '/api/images?{query}&c={c}'
|
|
||||||
|
|
||||||
# safesearch definitions
|
|
||||||
safesearch_types = {2: '1',
|
|
||||||
1: '',
|
|
||||||
0: '0'}
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
c = (params['pageno'] - 1) * 48
|
|
||||||
|
|
||||||
params['url'] = base_url +\
|
|
||||||
search_url.format(query=urlencode({'q': query}),
|
|
||||||
c=c)
|
|
||||||
|
|
||||||
if params['pageno'] != 1:
|
|
||||||
params['url'] += '&page={pageno}'.format(pageno=(params['pageno'] - 1))
|
|
||||||
|
|
||||||
# let Blekko know we wan't have profiling
|
|
||||||
params['cookies']['tag_lesslogging'] = '1'
|
|
||||||
|
|
||||||
# parse safesearch argument
|
|
||||||
params['cookies']['safesearch'] = safesearch_types.get(params['safesearch'], '')
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_results = loads(resp.text)
|
|
||||||
|
|
||||||
# return empty array if there are no results
|
|
||||||
if not search_results:
|
|
||||||
return []
|
|
||||||
|
|
||||||
for result in search_results:
|
|
||||||
# append result
|
|
||||||
results.append({'url': result['page_url'],
|
|
||||||
'title': result['title'],
|
|
||||||
'content': '',
|
|
||||||
'img_src': result['url'],
|
|
||||||
'template': 'images.html'})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,106 +0,0 @@
|
||||||
"""
|
|
||||||
BTDigg (Videos, Music, Files)
|
|
||||||
|
|
||||||
@website https://btdigg.org
|
|
||||||
@provide-api yes (on demand)
|
|
||||||
|
|
||||||
@using-api no
|
|
||||||
@results HTML (using search portal)
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, content, seed, leech, magnetlink
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urlparse import urljoin
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import quote
|
|
||||||
from lxml import html
|
|
||||||
from operator import itemgetter
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['videos', 'music', 'files']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'https://btdigg.org'
|
|
||||||
search_url = url + '/search?q={search_term}&p={pageno}'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(search_term=quote(query),
|
|
||||||
pageno=params['pageno'] - 1)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.content)
|
|
||||||
|
|
||||||
search_res = dom.xpath('//div[@id="search_res"]/table/tr')
|
|
||||||
|
|
||||||
# return empty array if nothing is found
|
|
||||||
if not search_res:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in search_res:
|
|
||||||
link = result.xpath('.//td[@class="torrent_name"]//a')[0]
|
|
||||||
href = urljoin(url, link.attrib.get('href'))
|
|
||||||
title = escape(extract_text(link))
|
|
||||||
content = escape(extract_text(result.xpath('.//pre[@class="snippet"]')[0]))
|
|
||||||
content = "<br />".join(content.split("\n"))
|
|
||||||
|
|
||||||
filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0]
|
|
||||||
filesize_multiplier = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[1]
|
|
||||||
files = result.xpath('.//span[@class="attr_val"]/text()')[1]
|
|
||||||
seed = result.xpath('.//span[@class="attr_val"]/text()')[2]
|
|
||||||
|
|
||||||
# convert seed to int if possible
|
|
||||||
if seed.isdigit():
|
|
||||||
seed = int(seed)
|
|
||||||
else:
|
|
||||||
seed = 0
|
|
||||||
|
|
||||||
leech = 0
|
|
||||||
|
|
||||||
# convert filesize to byte if possible
|
|
||||||
try:
|
|
||||||
filesize = float(filesize)
|
|
||||||
|
|
||||||
# convert filesize to byte
|
|
||||||
if filesize_multiplier == 'TB':
|
|
||||||
filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
|
|
||||||
elif filesize_multiplier == 'GB':
|
|
||||||
filesize = int(filesize * 1024 * 1024 * 1024)
|
|
||||||
elif filesize_multiplier == 'MB':
|
|
||||||
filesize = int(filesize * 1024 * 1024)
|
|
||||||
elif filesize_multiplier == 'KB':
|
|
||||||
filesize = int(filesize * 1024)
|
|
||||||
except:
|
|
||||||
filesize = None
|
|
||||||
|
|
||||||
# convert files to int if possible
|
|
||||||
if files.isdigit():
|
|
||||||
files = int(files)
|
|
||||||
else:
|
|
||||||
files = None
|
|
||||||
|
|
||||||
magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href']
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': href,
|
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'seed': seed,
|
|
||||||
'leech': leech,
|
|
||||||
'filesize': filesize,
|
|
||||||
'files': files,
|
|
||||||
'magnetlink': magnetlink,
|
|
||||||
'template': 'torrent.html'})
|
|
||||||
|
|
||||||
# return results sorted by seeder
|
|
||||||
return sorted(results, key=itemgetter('seed'), reverse=True)
|
|
|
@ -1,101 +0,0 @@
|
||||||
from datetime import datetime
|
|
||||||
import re
|
|
||||||
import os
|
|
||||||
import json
|
|
||||||
import unicodedata
|
|
||||||
|
|
||||||
|
|
||||||
categories = []
|
|
||||||
url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
|
|
||||||
weight = 100
|
|
||||||
|
|
||||||
parser_re = re.compile(u'.*?(\d+(?:\.\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa
|
|
||||||
|
|
||||||
db = 1
|
|
||||||
|
|
||||||
|
|
||||||
def normalize_name(name):
|
|
||||||
name = name.lower().replace('-', ' ').rstrip('s')
|
|
||||||
name = re.sub(' +', ' ', name)
|
|
||||||
return unicodedata.normalize('NFKD', name).lower()
|
|
||||||
|
|
||||||
|
|
||||||
def name_to_iso4217(name):
|
|
||||||
global db
|
|
||||||
|
|
||||||
name = normalize_name(name)
|
|
||||||
currencies = db['names'].get(name, [name])
|
|
||||||
return currencies[0]
|
|
||||||
|
|
||||||
|
|
||||||
def iso4217_to_name(iso4217, language):
|
|
||||||
global db
|
|
||||||
|
|
||||||
return db['iso4217'].get(iso4217, {}).get(language, iso4217)
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
m = parser_re.match(unicode(query, 'utf8'))
|
|
||||||
if not m:
|
|
||||||
# wrong query
|
|
||||||
return params
|
|
||||||
|
|
||||||
ammount, from_currency, to_currency = m.groups()
|
|
||||||
ammount = float(ammount)
|
|
||||||
from_currency = name_to_iso4217(from_currency.strip())
|
|
||||||
to_currency = name_to_iso4217(to_currency.strip())
|
|
||||||
|
|
||||||
q = (from_currency + to_currency).upper()
|
|
||||||
|
|
||||||
params['url'] = url.format(query=q)
|
|
||||||
params['ammount'] = ammount
|
|
||||||
params['from'] = from_currency
|
|
||||||
params['to'] = to_currency
|
|
||||||
params['from_name'] = iso4217_to_name(from_currency, 'en')
|
|
||||||
params['to_name'] = iso4217_to_name(to_currency, 'en')
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
try:
|
|
||||||
_, conversion_rate, _ = resp.text.split(',', 2)
|
|
||||||
conversion_rate = float(conversion_rate)
|
|
||||||
except:
|
|
||||||
return results
|
|
||||||
|
|
||||||
answer = '{0} {1} = {2} {3}, 1 {1} ({5}) = {4} {3} ({6})'.format(
|
|
||||||
resp.search_params['ammount'],
|
|
||||||
resp.search_params['from'],
|
|
||||||
resp.search_params['ammount'] * conversion_rate,
|
|
||||||
resp.search_params['to'],
|
|
||||||
conversion_rate,
|
|
||||||
resp.search_params['from_name'],
|
|
||||||
resp.search_params['to_name'],
|
|
||||||
)
|
|
||||||
|
|
||||||
now_date = datetime.now().strftime('%Y%m%d')
|
|
||||||
url = 'https://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html' # noqa
|
|
||||||
url = url.format(
|
|
||||||
now_date,
|
|
||||||
resp.search_params['ammount'],
|
|
||||||
resp.search_params['from'].lower(),
|
|
||||||
resp.search_params['to'].lower()
|
|
||||||
)
|
|
||||||
|
|
||||||
results.append({'answer': answer, 'url': url})
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def load():
|
|
||||||
global db
|
|
||||||
|
|
||||||
current_dir = os.path.dirname(os.path.realpath(__file__))
|
|
||||||
json_data = open(current_dir + "/../data/currencies.json").read()
|
|
||||||
|
|
||||||
db = json.loads(json_data)
|
|
||||||
|
|
||||||
|
|
||||||
load()
|
|
|
@ -1,77 +0,0 @@
|
||||||
"""
|
|
||||||
Dailymotion (Videos)
|
|
||||||
|
|
||||||
@website https://www.dailymotion.com
|
|
||||||
@provide-api yes (http://www.dailymotion.com/developer)
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes
|
|
||||||
@parse url, title, thumbnail, publishedDate, embedded
|
|
||||||
|
|
||||||
@todo set content-parameter with correct data
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
|
||||||
from cgi import escape
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['videos']
|
|
||||||
paging = True
|
|
||||||
language_support = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
# see http://www.dailymotion.com/doc/api/obj-video.html
|
|
||||||
search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,description,duration,url,thumbnail_360_url,id&sort=relevance&limit=5&page={pageno}&{query}' # noqa
|
|
||||||
embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\
|
|
||||||
'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
if params['language'] == 'all':
|
|
||||||
locale = 'en-US'
|
|
||||||
else:
|
|
||||||
locale = params['language']
|
|
||||||
|
|
||||||
params['url'] = search_url.format(
|
|
||||||
query=urlencode({'search': query, 'localization': locale}),
|
|
||||||
pageno=params['pageno'])
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_res = loads(resp.text)
|
|
||||||
|
|
||||||
# return empty array if there are no results
|
|
||||||
if 'list' not in search_res:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for res in search_res['list']:
|
|
||||||
title = res['title']
|
|
||||||
url = res['url']
|
|
||||||
content = escape(res['description'])
|
|
||||||
thumbnail = res['thumbnail_360_url']
|
|
||||||
publishedDate = datetime.fromtimestamp(res['created_time'], None)
|
|
||||||
embedded = embedded_url.format(videoid=res['id'])
|
|
||||||
|
|
||||||
# http to https
|
|
||||||
thumbnail = thumbnail.replace("http://", "https://")
|
|
||||||
|
|
||||||
results.append({'template': 'videos.html',
|
|
||||||
'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'embedded': embedded,
|
|
||||||
'thumbnail': thumbnail})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,67 +0,0 @@
|
||||||
"""
|
|
||||||
Deezer (Music)
|
|
||||||
|
|
||||||
@website https://deezer.com
|
|
||||||
@provide-api yes (http://developers.deezer.com/api/)
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes
|
|
||||||
@parse url, title, content, embedded
|
|
||||||
"""
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from urllib import urlencode
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['music']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'https://api.deezer.com/'
|
|
||||||
search_url = url + 'search?{query}&index={offset}'
|
|
||||||
|
|
||||||
embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' +\
|
|
||||||
'data-src="https://www.deezer.com/plugins/player?type=tracks&id={audioid}" ' +\
|
|
||||||
'width="540" height="80"></iframe>'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 25
|
|
||||||
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
|
||||||
offset=offset)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_res = loads(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in search_res.get('data', []):
|
|
||||||
if result['type'] == 'track':
|
|
||||||
title = result['title']
|
|
||||||
url = result['link']
|
|
||||||
|
|
||||||
if url.startswith('http://'):
|
|
||||||
url = 'https' + url[4:]
|
|
||||||
|
|
||||||
content = result['artist']['name'] +\
|
|
||||||
" • " +\
|
|
||||||
result['album']['title'] +\
|
|
||||||
" • " + result['title']
|
|
||||||
embedded = embedded_url.format(audioid=result['id'])
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'embedded': embedded,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,75 +0,0 @@
|
||||||
"""
|
|
||||||
Deviantart (Images)
|
|
||||||
|
|
||||||
@website https://www.deviantart.com/
|
|
||||||
@provide-api yes (https://www.deviantart.com/developers/) (RSS)
|
|
||||||
|
|
||||||
@using-api no (TODO, rewrite to api)
|
|
||||||
@results HTML
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, thumbnail_src, img_src
|
|
||||||
|
|
||||||
@todo rewrite to api
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from urlparse import urljoin
|
|
||||||
from lxml import html
|
|
||||||
import re
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['images']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://www.deviantart.com/'
|
|
||||||
search_url = base_url + 'browse/all/?offset={offset}&{query}'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 24
|
|
||||||
|
|
||||||
params['url'] = search_url.format(offset=offset,
|
|
||||||
query=urlencode({'q': query}))
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
# return empty array if a redirection code is returned
|
|
||||||
if resp.status_code == 302:
|
|
||||||
return []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
regex = re.compile('\/200H\/')
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'):
|
|
||||||
link = result.xpath('.//a[contains(@class, "thumb")]')[0]
|
|
||||||
url = urljoin(base_url, link.attrib.get('href'))
|
|
||||||
title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]')
|
|
||||||
title = extract_text(title_links[0])
|
|
||||||
thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
|
|
||||||
img_src = regex.sub('/', thumbnail_src)
|
|
||||||
|
|
||||||
# http to https, remove domain sharding
|
|
||||||
thumbnail_src = re.sub(r"https?://(th|fc)\d+.", "https://th01.", thumbnail_src)
|
|
||||||
thumbnail_src = re.sub(r"http://", "https://", thumbnail_src)
|
|
||||||
|
|
||||||
url = re.sub(r"http://(.*)\.deviantart\.com/", "https://\\1.deviantart.com/", url)
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'img_src': img_src,
|
|
||||||
'thumbnail_src': thumbnail_src,
|
|
||||||
'template': 'images.html'})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,75 +0,0 @@
|
||||||
"""
|
|
||||||
Digg (News, Social media)
|
|
||||||
|
|
||||||
@website https://digg.com/
|
|
||||||
@provide-api no
|
|
||||||
|
|
||||||
@using-api no
|
|
||||||
@results HTML (using search portal)
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, content, publishedDate, thumbnail
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import quote_plus
|
|
||||||
from json import loads
|
|
||||||
from lxml import html
|
|
||||||
from cgi import escape
|
|
||||||
from dateutil import parser
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['news', 'social media']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://digg.com/'
|
|
||||||
search_url = base_url + 'api/search/{query}.json?position={position}&format=html'
|
|
||||||
|
|
||||||
# specific xpath variables
|
|
||||||
results_xpath = '//article'
|
|
||||||
link_xpath = './/small[@class="time"]//a'
|
|
||||||
title_xpath = './/h2//a//text()'
|
|
||||||
content_xpath = './/p//text()'
|
|
||||||
pubdate_xpath = './/time'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 10
|
|
||||||
params['url'] = search_url.format(position=offset,
|
|
||||||
query=quote_plus(query))
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_result = loads(resp.text)
|
|
||||||
|
|
||||||
if 'html' not in search_result or search_result['html'] == '':
|
|
||||||
return results
|
|
||||||
|
|
||||||
dom = html.fromstring(search_result['html'])
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in dom.xpath(results_xpath):
|
|
||||||
url = result.attrib.get('data-contenturl')
|
|
||||||
thumbnail = result.xpath('.//img')[0].attrib.get('src')
|
|
||||||
title = ''.join(result.xpath(title_xpath))
|
|
||||||
content = escape(''.join(result.xpath(content_xpath)))
|
|
||||||
pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
|
|
||||||
publishedDate = parser.parse(pubdate)
|
|
||||||
|
|
||||||
# http to https
|
|
||||||
thumbnail = thumbnail.replace("http://static.digg.com", "https://static.digg.com")
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'template': 'videos.html',
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'thumbnail': thumbnail})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,84 +0,0 @@
|
||||||
# Doku Wiki
|
|
||||||
#
|
|
||||||
# @website https://www.dokuwiki.org/
|
|
||||||
# @provide-api yes
|
|
||||||
# (https://www.dokuwiki.org/devel:xmlrpc)
|
|
||||||
#
|
|
||||||
# @using-api no
|
|
||||||
# @results HTML
|
|
||||||
# @stable yes
|
|
||||||
# @parse (general) url, title, content
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml.html import fromstring
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
|
|
||||||
paging = False
|
|
||||||
language_support = False
|
|
||||||
number_of_results = 5
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
# Doku is OpenSearch compatible
|
|
||||||
base_url = 'http://localhost:8090'
|
|
||||||
search_url = '/?do=search'\
|
|
||||||
'&{query}'
|
|
||||||
# TODO '&startRecord={offset}'\
|
|
||||||
# TODO '&maximumRecords={limit}'\
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
|
|
||||||
params['url'] = base_url +\
|
|
||||||
search_url.format(query=urlencode({'id': query}))
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
doc = fromstring(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
# Quickhits
|
|
||||||
for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'):
|
|
||||||
try:
|
|
||||||
res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1]
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not res_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title'))
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'title': title,
|
|
||||||
'content': "",
|
|
||||||
'url': base_url + res_url})
|
|
||||||
|
|
||||||
# Search results
|
|
||||||
for r in doc.xpath('//dl[@class="search_results"]/*'):
|
|
||||||
try:
|
|
||||||
if r.tag == "dt":
|
|
||||||
res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1]
|
|
||||||
title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title'))
|
|
||||||
elif r.tag == "dd":
|
|
||||||
content = extract_text(r.xpath('.'))
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'title': title,
|
|
||||||
'content': content,
|
|
||||||
'url': base_url + res_url})
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not res_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,78 +0,0 @@
|
||||||
"""
|
|
||||||
DuckDuckGo (Web)
|
|
||||||
|
|
||||||
@website https://duckduckgo.com/
|
|
||||||
@provide-api yes (https://duckduckgo.com/api),
|
|
||||||
but not all results from search-site
|
|
||||||
|
|
||||||
@using-api no
|
|
||||||
@results HTML (using search portal)
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, content
|
|
||||||
|
|
||||||
@todo rewrite to api
|
|
||||||
@todo language support
|
|
||||||
(the current used site does not support language-change)
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml.html import fromstring
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['general']
|
|
||||||
paging = True
|
|
||||||
language_support = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'https://duckduckgo.com/html?{query}&s={offset}'
|
|
||||||
|
|
||||||
# specific xpath variables
|
|
||||||
result_xpath = '//div[@class="result results_links results_links_deep web-result "]' # noqa
|
|
||||||
url_xpath = './/a[@class="result__a"]/@href'
|
|
||||||
title_xpath = './/a[@class="result__a"]'
|
|
||||||
content_xpath = './/a[@class="result__snippet"]'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 30
|
|
||||||
|
|
||||||
if params['language'] == 'all':
|
|
||||||
locale = 'en-us'
|
|
||||||
else:
|
|
||||||
locale = params['language'].replace('_', '-').lower()
|
|
||||||
|
|
||||||
params['url'] = url.format(
|
|
||||||
query=urlencode({'q': query, 'kl': locale}),
|
|
||||||
offset=offset)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
doc = fromstring(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for r in doc.xpath(result_xpath):
|
|
||||||
try:
|
|
||||||
res_url = r.xpath(url_xpath)[-1]
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not res_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
title = extract_text(r.xpath(title_xpath))
|
|
||||||
content = extract_text(r.xpath(content_xpath))
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'title': title,
|
|
||||||
'content': content,
|
|
||||||
'url': res_url})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,156 +0,0 @@
|
||||||
import json
|
|
||||||
from urllib import urlencode
|
|
||||||
from re import compile, sub
|
|
||||||
from lxml import html
|
|
||||||
from searx.utils import html_to_text
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
|
|
||||||
url = 'https://api.duckduckgo.com/'\
|
|
||||||
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'
|
|
||||||
|
|
||||||
http_regex = compile(r'^http:')
|
|
||||||
|
|
||||||
|
|
||||||
def result_to_text(url, text, htmlResult):
|
|
||||||
# TODO : remove result ending with "Meaning" or "Category"
|
|
||||||
dom = html.fromstring(htmlResult)
|
|
||||||
a = dom.xpath('//a')
|
|
||||||
if len(a) >= 1:
|
|
||||||
return extract_text(a[0])
|
|
||||||
else:
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = url.format(query=urlencode({'q': query}))
|
|
||||||
params['headers']['Accept-Language'] = params['language']
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_res = json.loads(resp.text)
|
|
||||||
|
|
||||||
content = ''
|
|
||||||
heading = search_res.get('Heading', '')
|
|
||||||
attributes = []
|
|
||||||
urls = []
|
|
||||||
infobox_id = None
|
|
||||||
relatedTopics = []
|
|
||||||
|
|
||||||
# add answer if there is one
|
|
||||||
answer = search_res.get('Answer', '')
|
|
||||||
if answer != '':
|
|
||||||
results.append({'answer': html_to_text(answer)})
|
|
||||||
|
|
||||||
# add infobox
|
|
||||||
if 'Definition' in search_res:
|
|
||||||
content = content + search_res.get('Definition', '')
|
|
||||||
|
|
||||||
if 'Abstract' in search_res:
|
|
||||||
content = content + search_res.get('Abstract', '')
|
|
||||||
|
|
||||||
# image
|
|
||||||
image = search_res.get('Image', '')
|
|
||||||
image = None if image == '' else image
|
|
||||||
|
|
||||||
# attributes
|
|
||||||
if 'Infobox' in search_res:
|
|
||||||
infobox = search_res.get('Infobox', None)
|
|
||||||
if 'content' in infobox:
|
|
||||||
for info in infobox.get('content'):
|
|
||||||
attributes.append({'label': info.get('label'),
|
|
||||||
'value': info.get('value')})
|
|
||||||
|
|
||||||
# urls
|
|
||||||
for ddg_result in search_res.get('Results', []):
|
|
||||||
if 'FirstURL' in ddg_result:
|
|
||||||
firstURL = ddg_result.get('FirstURL', '')
|
|
||||||
text = ddg_result.get('Text', '')
|
|
||||||
urls.append({'title': text, 'url': firstURL})
|
|
||||||
results.append({'title': heading, 'url': firstURL})
|
|
||||||
|
|
||||||
# related topics
|
|
||||||
for ddg_result in search_res.get('RelatedTopics', []):
|
|
||||||
if 'FirstURL' in ddg_result:
|
|
||||||
suggestion = result_to_text(ddg_result.get('FirstURL', None),
|
|
||||||
ddg_result.get('Text', None),
|
|
||||||
ddg_result.get('Result', None))
|
|
||||||
if suggestion != heading:
|
|
||||||
results.append({'suggestion': suggestion})
|
|
||||||
elif 'Topics' in ddg_result:
|
|
||||||
suggestions = []
|
|
||||||
relatedTopics.append({'name': ddg_result.get('Name', ''),
|
|
||||||
'suggestions': suggestions})
|
|
||||||
for topic_result in ddg_result.get('Topics', []):
|
|
||||||
suggestion = result_to_text(topic_result.get('FirstURL', None),
|
|
||||||
topic_result.get('Text', None),
|
|
||||||
topic_result.get('Result', None))
|
|
||||||
if suggestion != heading:
|
|
||||||
suggestions.append(suggestion)
|
|
||||||
|
|
||||||
# abstract
|
|
||||||
abstractURL = search_res.get('AbstractURL', '')
|
|
||||||
if abstractURL != '':
|
|
||||||
# add as result ? problem always in english
|
|
||||||
infobox_id = abstractURL
|
|
||||||
urls.append({'title': search_res.get('AbstractSource'),
|
|
||||||
'url': abstractURL})
|
|
||||||
|
|
||||||
# definition
|
|
||||||
definitionURL = search_res.get('DefinitionURL', '')
|
|
||||||
if definitionURL != '':
|
|
||||||
# add as result ? as answer ? problem always in english
|
|
||||||
infobox_id = definitionURL
|
|
||||||
urls.append({'title': search_res.get('DefinitionSource'),
|
|
||||||
'url': definitionURL})
|
|
||||||
|
|
||||||
# to merge with wikidata's infobox
|
|
||||||
if infobox_id:
|
|
||||||
infobox_id = http_regex.sub('https:', infobox_id)
|
|
||||||
|
|
||||||
# entity
|
|
||||||
entity = search_res.get('Entity', None)
|
|
||||||
# TODO continent / country / department / location / waterfall /
|
|
||||||
# mountain range :
|
|
||||||
# link to map search, get weather, near by locations
|
|
||||||
# TODO musician : link to music search
|
|
||||||
# TODO concert tour : ??
|
|
||||||
# TODO film / actor / television / media franchise :
|
|
||||||
# links to IMDB / rottentomatoes (or scrap result)
|
|
||||||
# TODO music : link tu musicbrainz / last.fm
|
|
||||||
# TODO book : ??
|
|
||||||
# TODO artist / playwright : ??
|
|
||||||
# TODO compagny : ??
|
|
||||||
# TODO software / os : ??
|
|
||||||
# TODO software engineer : ??
|
|
||||||
# TODO prepared food : ??
|
|
||||||
# TODO website : ??
|
|
||||||
# TODO performing art : ??
|
|
||||||
# TODO prepared food : ??
|
|
||||||
# TODO programming language : ??
|
|
||||||
# TODO file format : ??
|
|
||||||
|
|
||||||
if len(heading) > 0:
|
|
||||||
# TODO get infobox.meta.value where .label='article_title'
|
|
||||||
if image is None and len(attributes) == 0 and len(urls) == 1 and\
|
|
||||||
len(relatedTopics) == 0 and len(content) == 0:
|
|
||||||
results.append({
|
|
||||||
'url': urls[0]['url'],
|
|
||||||
'title': heading,
|
|
||||||
'content': content
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
results.append({
|
|
||||||
'infobox': heading,
|
|
||||||
'id': infobox_id,
|
|
||||||
'entity': entity,
|
|
||||||
'content': content,
|
|
||||||
'img_src': image,
|
|
||||||
'attributes': attributes,
|
|
||||||
'urls': urls,
|
|
||||||
'relatedTopics': relatedTopics
|
|
||||||
})
|
|
||||||
|
|
||||||
return results
|
|
|
@ -1,16 +0,0 @@
|
||||||
"""
|
|
||||||
Dummy
|
|
||||||
|
|
||||||
@results empty array
|
|
||||||
@stable yes
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
return []
|
|
|
@ -1,116 +0,0 @@
|
||||||
"""
|
|
||||||
Faroo (Web, News)
|
|
||||||
|
|
||||||
@website http://www.faroo.com
|
|
||||||
@provide-api yes (http://www.faroo.com/hp/api/api.html), require API-key
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes
|
|
||||||
@parse url, title, content, publishedDate, img_src
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
|
||||||
import datetime
|
|
||||||
from searx.utils import searx_useragent
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['general', 'news']
|
|
||||||
paging = True
|
|
||||||
language_support = True
|
|
||||||
number_of_results = 10
|
|
||||||
api_key = None
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'http://www.faroo.com/'
|
|
||||||
search_url = url + 'api?{query}'\
|
|
||||||
'&start={offset}'\
|
|
||||||
'&length={number_of_results}'\
|
|
||||||
'&l={language}'\
|
|
||||||
'&src={categorie}'\
|
|
||||||
'&i=false'\
|
|
||||||
'&f=json'\
|
|
||||||
'&key={api_key}' # noqa
|
|
||||||
|
|
||||||
search_category = {'general': 'web',
|
|
||||||
'news': 'news'}
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * number_of_results + 1
|
|
||||||
categorie = search_category.get(params['category'], 'web')
|
|
||||||
|
|
||||||
if params['language'] == 'all':
|
|
||||||
language = 'en'
|
|
||||||
else:
|
|
||||||
language = params['language'].split('_')[0]
|
|
||||||
|
|
||||||
# if language is not supported, put it in english
|
|
||||||
if language != 'en' and\
|
|
||||||
language != 'de' and\
|
|
||||||
language != 'zh':
|
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
params['url'] = search_url.format(offset=offset,
|
|
||||||
number_of_results=number_of_results,
|
|
||||||
query=urlencode({'q': query}),
|
|
||||||
language=language,
|
|
||||||
categorie=categorie,
|
|
||||||
api_key=api_key)
|
|
||||||
|
|
||||||
# using searx User-Agent
|
|
||||||
params['headers']['User-Agent'] = searx_useragent()
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
# HTTP-Code 401: api-key is not valide
|
|
||||||
if resp.status_code == 401:
|
|
||||||
raise Exception("API key is not valide")
|
|
||||||
|
|
||||||
# HTTP-Code 429: rate limit exceeded
|
|
||||||
if resp.status_code == 429:
|
|
||||||
raise Exception("rate limit has been exceeded!")
|
|
||||||
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_res = loads(resp.text)
|
|
||||||
|
|
||||||
# return empty array if there are no results
|
|
||||||
if not search_res.get('results', {}):
|
|
||||||
return []
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in search_res['results']:
|
|
||||||
if result['news']:
|
|
||||||
# timestamp (milliseconds since 1970)
|
|
||||||
publishedDate = datetime.datetime.fromtimestamp(result['date'] / 1000.0) # noqa
|
|
||||||
|
|
||||||
# append news result
|
|
||||||
results.append({'url': result['url'],
|
|
||||||
'title': result['title'],
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'content': result['kwic']})
|
|
||||||
|
|
||||||
else:
|
|
||||||
# append general result
|
|
||||||
# TODO, publishedDate correct?
|
|
||||||
results.append({'url': result['url'],
|
|
||||||
'title': result['title'],
|
|
||||||
'content': result['kwic']})
|
|
||||||
|
|
||||||
# append image result if image url is set
|
|
||||||
# TODO, show results with an image like in faroo
|
|
||||||
if result['iurl']:
|
|
||||||
results.append({'template': 'images.html',
|
|
||||||
'url': result['url'],
|
|
||||||
'title': result['title'],
|
|
||||||
'content': result['kwic'],
|
|
||||||
'img_src': result['iurl']})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,53 +0,0 @@
|
||||||
"""
|
|
||||||
F-Droid (a repository of FOSS applications for Android)
|
|
||||||
|
|
||||||
@website https://f-droid.org/
|
|
||||||
@provide-api no
|
|
||||||
@using-api no
|
|
||||||
@results HTML
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, content
|
|
||||||
"""
|
|
||||||
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['files']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://f-droid.org/'
|
|
||||||
search_url = base_url + 'repository/browse/?{query}'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
query = urlencode({'fdfilter': query,
|
|
||||||
'fdpage': params['pageno']})
|
|
||||||
params['url'] = search_url.format(query=query)
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
for app in dom.xpath('//div[@id="appheader"]'):
|
|
||||||
url = app.xpath('./ancestor::a/@href')[0]
|
|
||||||
title = app.xpath('./p/span/text()')[0]
|
|
||||||
img_src = app.xpath('.//img/@src')[0]
|
|
||||||
|
|
||||||
content = extract_text(app.xpath('./p')[0])
|
|
||||||
content = escape(content.replace(title, '', 1).strip())
|
|
||||||
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'img_src': img_src})
|
|
||||||
|
|
||||||
return results
|
|
|
@ -1,84 +0,0 @@
|
||||||
from urllib import urlencode
|
|
||||||
from HTMLParser import HTMLParser
|
|
||||||
|
|
||||||
url = 'http://www.filecrop.com/'
|
|
||||||
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa
|
|
||||||
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
|
|
||||||
class FilecropResultParser(HTMLParser):
|
|
||||||
def __init__(self):
|
|
||||||
HTMLParser.__init__(self)
|
|
||||||
self.__start_processing = False
|
|
||||||
|
|
||||||
self.results = []
|
|
||||||
self.result = {}
|
|
||||||
|
|
||||||
self.tr_counter = 0
|
|
||||||
self.data_counter = 0
|
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
|
||||||
|
|
||||||
if tag == 'tr':
|
|
||||||
if ('bgcolor', '#edeff5') in attrs or\
|
|
||||||
('bgcolor', '#ffffff') in attrs:
|
|
||||||
self.__start_processing = True
|
|
||||||
|
|
||||||
if not self.__start_processing:
|
|
||||||
return
|
|
||||||
|
|
||||||
if tag == 'label':
|
|
||||||
self.result['title'] = [attr[1] for attr in attrs
|
|
||||||
if attr[0] == 'title'][0]
|
|
||||||
elif tag == 'a' and ('rel', 'nofollow') in attrs\
|
|
||||||
and ('class', 'sourcelink') in attrs:
|
|
||||||
if 'content' in self.result:
|
|
||||||
self.result['content'] += [attr[1] for attr in attrs
|
|
||||||
if attr[0] == 'title'][0]
|
|
||||||
else:
|
|
||||||
self.result['content'] = [attr[1] for attr in attrs
|
|
||||||
if attr[0] == 'title'][0]
|
|
||||||
self.result['content'] += ' '
|
|
||||||
elif tag == 'a':
|
|
||||||
self.result['url'] = url + [attr[1] for attr in attrs
|
|
||||||
if attr[0] == 'href'][0]
|
|
||||||
|
|
||||||
def handle_endtag(self, tag):
|
|
||||||
if self.__start_processing is False:
|
|
||||||
return
|
|
||||||
|
|
||||||
if tag == 'tr':
|
|
||||||
self.tr_counter += 1
|
|
||||||
|
|
||||||
if self.tr_counter == 2:
|
|
||||||
self.__start_processing = False
|
|
||||||
self.tr_counter = 0
|
|
||||||
self.data_counter = 0
|
|
||||||
self.results.append(self.result)
|
|
||||||
self.result = {}
|
|
||||||
|
|
||||||
def handle_data(self, data):
|
|
||||||
if not self.__start_processing:
|
|
||||||
return
|
|
||||||
|
|
||||||
if 'content' in self.result:
|
|
||||||
self.result['content'] += data + ' '
|
|
||||||
else:
|
|
||||||
self.result['content'] = data + ' '
|
|
||||||
|
|
||||||
self.data_counter += 1
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
index = 1 + (params['pageno'] - 1) * 30
|
|
||||||
params['url'] = search_url.format(query=urlencode({'w': query}),
|
|
||||||
index=index)
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
parser = FilecropResultParser()
|
|
||||||
parser.feed(resp.text)
|
|
||||||
|
|
||||||
return parser.results
|
|
|
@ -1,98 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
"""
|
|
||||||
Flickr (Images)
|
|
||||||
|
|
||||||
@website https://www.flickr.com
|
|
||||||
@provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes
|
|
||||||
@parse url, title, thumbnail, img_src
|
|
||||||
More info on api-key : https://www.flickr.com/services/apps/create/
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
|
||||||
|
|
||||||
categories = ['images']
|
|
||||||
|
|
||||||
nb_per_page = 15
|
|
||||||
paging = True
|
|
||||||
api_key = None
|
|
||||||
|
|
||||||
|
|
||||||
url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search' +\
|
|
||||||
'&api_key={api_key}&{text}&sort=relevance' +\
|
|
||||||
'&extras=description%2C+owner_name%2C+url_o%2C+url_n%2C+url_z' +\
|
|
||||||
'&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}'
|
|
||||||
photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
|
|
||||||
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
|
|
||||||
def build_flickr_url(user_id, photo_id):
|
|
||||||
return photo_url.format(userid=user_id, photoid=photo_id)
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = url.format(text=urlencode({'text': query}),
|
|
||||||
api_key=api_key,
|
|
||||||
nb_per_page=nb_per_page,
|
|
||||||
page=params['pageno'])
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_results = loads(resp.text)
|
|
||||||
|
|
||||||
# return empty array if there are no results
|
|
||||||
if 'photos' not in search_results:
|
|
||||||
return []
|
|
||||||
|
|
||||||
if 'photo' not in search_results['photos']:
|
|
||||||
return []
|
|
||||||
|
|
||||||
photos = search_results['photos']['photo']
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for photo in photos:
|
|
||||||
if 'url_o' in photo:
|
|
||||||
img_src = photo['url_o']
|
|
||||||
elif 'url_z' in photo:
|
|
||||||
img_src = photo['url_z']
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# For a bigger thumbnail, keep only the url_z, not the url_n
|
|
||||||
if 'url_n' in photo:
|
|
||||||
thumbnail_src = photo['url_n']
|
|
||||||
elif 'url_z' in photo:
|
|
||||||
thumbnail_src = photo['url_z']
|
|
||||||
else:
|
|
||||||
thumbnail_src = img_src
|
|
||||||
|
|
||||||
url = build_flickr_url(photo['owner'], photo['id'])
|
|
||||||
|
|
||||||
title = photo['title']
|
|
||||||
|
|
||||||
content = '<span class="photo-author">' +\
|
|
||||||
photo['ownername'] +\
|
|
||||||
'</span><br />' +\
|
|
||||||
'<span class="description">' +\
|
|
||||||
photo['description']['_content'] +\
|
|
||||||
'</span>'
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'img_src': img_src,
|
|
||||||
'thumbnail_src': thumbnail_src,
|
|
||||||
'content': content,
|
|
||||||
'template': 'images.html'})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,106 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
"""
|
|
||||||
Flickr (Images)
|
|
||||||
|
|
||||||
@website https://www.flickr.com
|
|
||||||
@provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
|
|
||||||
|
|
||||||
@using-api no
|
|
||||||
@results HTML
|
|
||||||
@stable no
|
|
||||||
@parse url, title, thumbnail, img_src
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
|
||||||
import re
|
|
||||||
from searx.engines import logger
|
|
||||||
|
|
||||||
|
|
||||||
logger = logger.getChild('flickr-noapi')
|
|
||||||
|
|
||||||
categories = ['images']
|
|
||||||
|
|
||||||
url = 'https://www.flickr.com/'
|
|
||||||
search_url = url + 'search?{query}&page={page}'
|
|
||||||
photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
|
|
||||||
regex = re.compile(r"\"search-photos-lite-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL)
|
|
||||||
image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
|
|
||||||
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
|
|
||||||
def build_flickr_url(user_id, photo_id):
|
|
||||||
return photo_url.format(userid=user_id, photoid=photo_id)
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(query=urlencode({'text': query}),
|
|
||||||
page=params['pageno'])
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
matches = regex.search(resp.text)
|
|
||||||
|
|
||||||
if matches is None:
|
|
||||||
return results
|
|
||||||
|
|
||||||
match = matches.group(1)
|
|
||||||
search_results = loads(match)
|
|
||||||
|
|
||||||
if '_data' not in search_results:
|
|
||||||
return []
|
|
||||||
|
|
||||||
photos = search_results['_data']
|
|
||||||
|
|
||||||
for photo in photos:
|
|
||||||
|
|
||||||
# In paged configuration, the first pages' photos
|
|
||||||
# are represented by a None object
|
|
||||||
if photo is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
img_src = None
|
|
||||||
# From the biggest to the lowest format
|
|
||||||
for image_size in image_sizes:
|
|
||||||
if image_size in photo['sizes']:
|
|
||||||
img_src = photo['sizes'][image_size]['url']
|
|
||||||
break
|
|
||||||
|
|
||||||
if not img_src:
|
|
||||||
logger.debug('cannot find valid image size: {0}'.format(repr(photo)))
|
|
||||||
continue
|
|
||||||
|
|
||||||
if 'ownerNsid' not in photo:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# For a bigger thumbnail, keep only the url_z, not the url_n
|
|
||||||
if 'n' in photo['sizes']:
|
|
||||||
thumbnail_src = photo['sizes']['n']['url']
|
|
||||||
elif 'z' in photo['sizes']:
|
|
||||||
thumbnail_src = photo['sizes']['z']['url']
|
|
||||||
else:
|
|
||||||
thumbnail_src = img_src
|
|
||||||
|
|
||||||
url = build_flickr_url(photo['ownerNsid'], photo['id'])
|
|
||||||
|
|
||||||
title = photo.get('title', '')
|
|
||||||
|
|
||||||
content = '<span class="photo-author">' +\
|
|
||||||
photo['username'] +\
|
|
||||||
'</span><br />'
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'img_src': img_src,
|
|
||||||
'thumbnail_src': thumbnail_src,
|
|
||||||
'content': content,
|
|
||||||
'template': 'images.html'})
|
|
||||||
|
|
||||||
return results
|
|
|
@ -1,44 +0,0 @@
|
||||||
"""
|
|
||||||
Frinkiac (Images)
|
|
||||||
|
|
||||||
@website https://www.frinkiac.com
|
|
||||||
@provide-api no
|
|
||||||
@using-api no
|
|
||||||
@results JSON
|
|
||||||
@stable no
|
|
||||||
@parse url, title, img_src
|
|
||||||
"""
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from urllib import urlencode
|
|
||||||
|
|
||||||
categories = ['images']
|
|
||||||
|
|
||||||
BASE = 'https://frinkiac.com/'
|
|
||||||
SEARCH_URL = '{base}api/search?{query}'
|
|
||||||
RESULT_URL = '{base}?{query}'
|
|
||||||
THUMB_URL = '{base}img/{episode}/{timestamp}/medium.jpg'
|
|
||||||
IMAGE_URL = '{base}img/{episode}/{timestamp}.jpg'
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = SEARCH_URL.format(base=BASE, query=urlencode({'q': query}))
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
response_data = loads(resp.text)
|
|
||||||
for result in response_data:
|
|
||||||
episode = result['Episode']
|
|
||||||
timestamp = result['Timestamp']
|
|
||||||
|
|
||||||
results.append({'template': 'images.html',
|
|
||||||
'url': RESULT_URL.format(base=BASE,
|
|
||||||
query=urlencode({'p': 'caption', 'e': episode, 't': timestamp})),
|
|
||||||
'title': episode,
|
|
||||||
'content': '',
|
|
||||||
'thumbnail_src': THUMB_URL.format(base=BASE, episode=episode, timestamp=timestamp),
|
|
||||||
'img_src': IMAGE_URL.format(base=BASE, episode=episode, timestamp=timestamp)})
|
|
||||||
|
|
||||||
return results
|
|
|
@ -1,62 +0,0 @@
|
||||||
"""
|
|
||||||
General Files (Files)
|
|
||||||
|
|
||||||
@website http://www.general-files.org
|
|
||||||
@provide-api no (nothing found)
|
|
||||||
|
|
||||||
@using-api no (because nothing found)
|
|
||||||
@results HTML (using search portal)
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, content
|
|
||||||
|
|
||||||
@todo detect torrents?
|
|
||||||
"""
|
|
||||||
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['files']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'http://www.general-file.com'
|
|
||||||
search_url = base_url + '/files-{letter}/{query}/{pageno}'
|
|
||||||
|
|
||||||
# specific xpath variables
|
|
||||||
result_xpath = '//table[@class="block-file"]'
|
|
||||||
title_xpath = './/h2/a//text()'
|
|
||||||
url_xpath = './/h2/a/@href'
|
|
||||||
content_xpath = './/p//text()'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
|
|
||||||
params['url'] = search_url.format(query=query,
|
|
||||||
letter=query[0],
|
|
||||||
pageno=params['pageno'])
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in dom.xpath(result_xpath):
|
|
||||||
url = result.xpath(url_xpath)[0]
|
|
||||||
|
|
||||||
# skip fast download links
|
|
||||||
if not url.startswith('/'):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': base_url + url,
|
|
||||||
'title': ''.join(result.xpath(title_xpath)),
|
|
||||||
'content': ''.join(result.xpath(content_xpath))})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,85 +0,0 @@
|
||||||
"""
|
|
||||||
Gigablast (Web)
|
|
||||||
|
|
||||||
@website https://gigablast.com
|
|
||||||
@provide-api yes (https://gigablast.com/api.html)
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results XML
|
|
||||||
@stable yes
|
|
||||||
@parse url, title, content
|
|
||||||
"""
|
|
||||||
|
|
||||||
from cgi import escape
|
|
||||||
from json import loads
|
|
||||||
from random import randint
|
|
||||||
from time import time
|
|
||||||
from urllib import urlencode
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['general']
|
|
||||||
paging = True
|
|
||||||
number_of_results = 10
|
|
||||||
language_support = True
|
|
||||||
safesearch = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://gigablast.com/'
|
|
||||||
search_string = 'search?{query}'\
|
|
||||||
'&n={number_of_results}'\
|
|
||||||
'&c=main'\
|
|
||||||
'&s={offset}'\
|
|
||||||
'&format=json'\
|
|
||||||
'&qh=0'\
|
|
||||||
'&qlang={lang}'\
|
|
||||||
'&ff={safesearch}'\
|
|
||||||
'&rxikd={rxikd}' # random number - 9 digits
|
|
||||||
|
|
||||||
# specific xpath variables
|
|
||||||
results_xpath = '//response//result'
|
|
||||||
url_xpath = './/url'
|
|
||||||
title_xpath = './/title'
|
|
||||||
content_xpath = './/sum'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * number_of_results
|
|
||||||
|
|
||||||
if params['language'] == 'all':
|
|
||||||
language = 'xx'
|
|
||||||
else:
|
|
||||||
language = params['language'][0:2]
|
|
||||||
|
|
||||||
if params['safesearch'] >= 1:
|
|
||||||
safesearch = 1
|
|
||||||
else:
|
|
||||||
safesearch = 0
|
|
||||||
|
|
||||||
search_path = search_string.format(query=urlencode({'q': query}),
|
|
||||||
offset=offset,
|
|
||||||
number_of_results=number_of_results,
|
|
||||||
rxikd=str(time())[:9],
|
|
||||||
lang=language,
|
|
||||||
safesearch=safesearch)
|
|
||||||
|
|
||||||
params['url'] = base_url + search_path
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
response_json = loads(resp.text)
|
|
||||||
|
|
||||||
for result in response_json['results']:
|
|
||||||
# append result
|
|
||||||
results.append({'url': result['url'],
|
|
||||||
'title': escape(result['title']),
|
|
||||||
'content': escape(result['sum'])})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,61 +0,0 @@
|
||||||
"""
|
|
||||||
Github (It)
|
|
||||||
|
|
||||||
@website https://github.com/
|
|
||||||
@provide-api yes (https://developer.github.com/v3/)
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes (using api)
|
|
||||||
@parse url, title, content
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
|
||||||
from cgi import escape
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['it']
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}' # noqa
|
|
||||||
|
|
||||||
accept_header = 'application/vnd.github.preview.text-match+json'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}))
|
|
||||||
|
|
||||||
params['headers']['Accept'] = accept_header
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_res = loads(resp.text)
|
|
||||||
|
|
||||||
# check if items are recieved
|
|
||||||
if 'items' not in search_res:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for res in search_res['items']:
|
|
||||||
title = res['name']
|
|
||||||
url = res['html_url']
|
|
||||||
|
|
||||||
if res['description']:
|
|
||||||
content = escape(res['description'][:500])
|
|
||||||
else:
|
|
||||||
content = ''
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,349 +0,0 @@
|
||||||
# Google (Web)
|
|
||||||
#
|
|
||||||
# @website https://www.google.com
|
|
||||||
# @provide-api yes (https://developers.google.com/custom-search/)
|
|
||||||
#
|
|
||||||
# @using-api no
|
|
||||||
# @results HTML
|
|
||||||
# @stable no (HTML can change)
|
|
||||||
# @parse url, title, content, suggestion
|
|
||||||
|
|
||||||
import re
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
|
||||||
from urlparse import urlparse, parse_qsl
|
|
||||||
from lxml import html, etree
|
|
||||||
from searx.engines.xpath import extract_text, extract_url
|
|
||||||
from searx.search import logger
|
|
||||||
|
|
||||||
logger = logger.getChild('google engine')
|
|
||||||
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['general']
|
|
||||||
paging = True
|
|
||||||
language_support = True
|
|
||||||
use_locale_domain = True
|
|
||||||
|
|
||||||
# based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests
|
|
||||||
default_hostname = 'www.google.com'
|
|
||||||
|
|
||||||
country_to_hostname = {
|
|
||||||
'BG': 'www.google.bg', # Bulgaria
|
|
||||||
'CZ': 'www.google.cz', # Czech Republic
|
|
||||||
'DE': 'www.google.de', # Germany
|
|
||||||
'DK': 'www.google.dk', # Denmark
|
|
||||||
'AT': 'www.google.at', # Austria
|
|
||||||
'CH': 'www.google.ch', # Switzerland
|
|
||||||
'GR': 'www.google.gr', # Greece
|
|
||||||
'AU': 'www.google.com.au', # Australia
|
|
||||||
'CA': 'www.google.ca', # Canada
|
|
||||||
'GB': 'www.google.co.uk', # United Kingdom
|
|
||||||
'ID': 'www.google.co.id', # Indonesia
|
|
||||||
'IE': 'www.google.ie', # Ireland
|
|
||||||
'IN': 'www.google.co.in', # India
|
|
||||||
'MY': 'www.google.com.my', # Malaysia
|
|
||||||
'NZ': 'www.google.co.nz', # New Zealand
|
|
||||||
'PH': 'www.google.com.ph', # Philippines
|
|
||||||
'SG': 'www.google.com.sg', # Singapore
|
|
||||||
# 'US': 'www.google.us', # United States, redirect to .com
|
|
||||||
'ZA': 'www.google.co.za', # South Africa
|
|
||||||
'AR': 'www.google.com.ar', # Argentina
|
|
||||||
'CL': 'www.google.cl', # Chile
|
|
||||||
'ES': 'www.google.es', # Spain
|
|
||||||
'MX': 'www.google.com.mx', # Mexico
|
|
||||||
'EE': 'www.google.ee', # Estonia
|
|
||||||
'FI': 'www.google.fi', # Finland
|
|
||||||
'BE': 'www.google.be', # Belgium
|
|
||||||
'FR': 'www.google.fr', # France
|
|
||||||
'IL': 'www.google.co.il', # Israel
|
|
||||||
'HR': 'www.google.hr', # Croatia
|
|
||||||
'HU': 'www.google.hu', # Hungary
|
|
||||||
'IT': 'www.google.it', # Italy
|
|
||||||
'JP': 'www.google.co.jp', # Japan
|
|
||||||
'KR': 'www.google.co.kr', # South Korea
|
|
||||||
'LT': 'www.google.lt', # Lithuania
|
|
||||||
'LV': 'www.google.lv', # Latvia
|
|
||||||
'NO': 'www.google.no', # Norway
|
|
||||||
'NL': 'www.google.nl', # Netherlands
|
|
||||||
'PL': 'www.google.pl', # Poland
|
|
||||||
'BR': 'www.google.com.br', # Brazil
|
|
||||||
'PT': 'www.google.pt', # Portugal
|
|
||||||
'RO': 'www.google.ro', # Romania
|
|
||||||
'RU': 'www.google.ru', # Russia
|
|
||||||
'SK': 'www.google.sk', # Slovakia
|
|
||||||
'SL': 'www.google.si', # Slovenia (SL -> si)
|
|
||||||
'SE': 'www.google.se', # Sweden
|
|
||||||
'TH': 'www.google.co.th', # Thailand
|
|
||||||
'TR': 'www.google.com.tr', # Turkey
|
|
||||||
'UA': 'www.google.com.ua', # Ukraine
|
|
||||||
# 'CN': 'www.google.cn', # China, only from China ?
|
|
||||||
'HK': 'www.google.com.hk', # Hong Kong
|
|
||||||
'TW': 'www.google.com.tw' # Taiwan
|
|
||||||
}
|
|
||||||
|
|
||||||
# osm
|
|
||||||
url_map = 'https://www.openstreetmap.org/'\
|
|
||||||
+ '?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M'
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
search_path = '/search'
|
|
||||||
search_url = ('https://{hostname}' +
|
|
||||||
search_path +
|
|
||||||
'?{query}&start={offset}&gws_rd=cr&gbv=1&lr={lang}&ei=x')
|
|
||||||
|
|
||||||
# other URLs
|
|
||||||
map_hostname_start = 'maps.google.'
|
|
||||||
maps_path = '/maps'
|
|
||||||
redirect_path = '/url'
|
|
||||||
images_path = '/images'
|
|
||||||
|
|
||||||
# specific xpath variables
|
|
||||||
results_xpath = '//div[@class="g"]'
|
|
||||||
url_xpath = './/h3/a/@href'
|
|
||||||
title_xpath = './/h3'
|
|
||||||
content_xpath = './/span[@class="st"]'
|
|
||||||
content_misc_xpath = './/div[@class="f slp"]'
|
|
||||||
suggestion_xpath = '//p[@class="_Bmc"]'
|
|
||||||
|
|
||||||
# map : detail location
|
|
||||||
map_address_xpath = './/div[@class="s"]//table//td[2]/span/text()'
|
|
||||||
map_phone_xpath = './/div[@class="s"]//table//td[2]/span/span'
|
|
||||||
map_website_url_xpath = 'h3[2]/a/@href'
|
|
||||||
map_website_title_xpath = 'h3[2]'
|
|
||||||
|
|
||||||
# map : near the location
|
|
||||||
map_near = 'table[@class="ts"]//tr'
|
|
||||||
map_near_title = './/h4'
|
|
||||||
map_near_url = './/h4/a/@href'
|
|
||||||
map_near_phone = './/span[@class="nobr"]'
|
|
||||||
|
|
||||||
# images
|
|
||||||
images_xpath = './/div/a'
|
|
||||||
image_url_xpath = './@href'
|
|
||||||
image_img_src_xpath = './img/@src'
|
|
||||||
|
|
||||||
# property names
|
|
||||||
# FIXME : no translation
|
|
||||||
property_address = "Address"
|
|
||||||
property_phone = "Phone number"
|
|
||||||
|
|
||||||
|
|
||||||
# remove google-specific tracking-url
|
|
||||||
def parse_url(url_string, google_hostname):
|
|
||||||
# sanity check
|
|
||||||
if url_string is None:
|
|
||||||
return url_string
|
|
||||||
|
|
||||||
# normal case
|
|
||||||
parsed_url = urlparse(url_string)
|
|
||||||
if (parsed_url.netloc in [google_hostname, '']
|
|
||||||
and parsed_url.path == redirect_path):
|
|
||||||
query = dict(parse_qsl(parsed_url.query))
|
|
||||||
return query['q']
|
|
||||||
else:
|
|
||||||
return url_string
|
|
||||||
|
|
||||||
|
|
||||||
# returns extract_text on the first result selected by the xpath or None
|
|
||||||
def extract_text_from_dom(result, xpath):
|
|
||||||
r = result.xpath(xpath)
|
|
||||||
if len(r) > 0:
|
|
||||||
return escape(extract_text(r[0]))
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 10
|
|
||||||
|
|
||||||
if params['language'] == 'all':
|
|
||||||
language = 'en'
|
|
||||||
country = 'US'
|
|
||||||
url_lang = ''
|
|
||||||
else:
|
|
||||||
language_array = params['language'].lower().split('_')
|
|
||||||
if len(language_array) == 2:
|
|
||||||
country = language_array[1]
|
|
||||||
else:
|
|
||||||
country = 'US'
|
|
||||||
language = language_array[0] + ',' + language_array[0] + '-' + country
|
|
||||||
url_lang = 'lang_' + language_array[0]
|
|
||||||
|
|
||||||
if use_locale_domain:
|
|
||||||
google_hostname = country_to_hostname.get(country.upper(), default_hostname)
|
|
||||||
else:
|
|
||||||
google_hostname = default_hostname
|
|
||||||
|
|
||||||
params['url'] = search_url.format(offset=offset,
|
|
||||||
query=urlencode({'q': query}),
|
|
||||||
hostname=google_hostname,
|
|
||||||
lang=url_lang)
|
|
||||||
|
|
||||||
params['headers']['Accept-Language'] = language
|
|
||||||
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
|
|
||||||
|
|
||||||
params['google_hostname'] = google_hostname
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
# detect google sorry
|
|
||||||
resp_url = urlparse(resp.url)
|
|
||||||
if resp_url.netloc == 'sorry.google.com' or resp_url.path == '/sorry/IndexRedirect':
|
|
||||||
raise RuntimeWarning('sorry.google.com')
|
|
||||||
|
|
||||||
# which hostname ?
|
|
||||||
google_hostname = resp.search_params.get('google_hostname')
|
|
||||||
google_url = "https://" + google_hostname
|
|
||||||
|
|
||||||
# convert the text to dom
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in dom.xpath(results_xpath):
|
|
||||||
try:
|
|
||||||
title = extract_text(result.xpath(title_xpath)[0])
|
|
||||||
url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname)
|
|
||||||
parsed_url = urlparse(url, google_hostname)
|
|
||||||
|
|
||||||
# map result
|
|
||||||
if parsed_url.netloc == google_hostname:
|
|
||||||
# TODO fix inside links
|
|
||||||
continue
|
|
||||||
# if parsed_url.path.startswith(maps_path) or parsed_url.netloc.startswith(map_hostname_start):
|
|
||||||
# print "yooooo"*30
|
|
||||||
# x = result.xpath(map_near)
|
|
||||||
# if len(x) > 0:
|
|
||||||
# # map : near the location
|
|
||||||
# results = results + parse_map_near(parsed_url, x, google_hostname)
|
|
||||||
# else:
|
|
||||||
# # map : detail about a location
|
|
||||||
# results = results + parse_map_detail(parsed_url, result, google_hostname)
|
|
||||||
# # google news
|
|
||||||
# elif parsed_url.path == search_path:
|
|
||||||
# # skipping news results
|
|
||||||
# pass
|
|
||||||
|
|
||||||
# # images result
|
|
||||||
# elif parsed_url.path == images_path:
|
|
||||||
# # only thumbnail image provided,
|
|
||||||
# # so skipping image results
|
|
||||||
# # results = results + parse_images(result, google_hostname)
|
|
||||||
# pass
|
|
||||||
|
|
||||||
else:
|
|
||||||
# normal result
|
|
||||||
content = extract_text_from_dom(result, content_xpath)
|
|
||||||
if content is None:
|
|
||||||
continue
|
|
||||||
content_misc = extract_text_from_dom(result, content_misc_xpath)
|
|
||||||
if content_misc is not None:
|
|
||||||
content = content_misc + "<br />" + content
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content
|
|
||||||
})
|
|
||||||
except:
|
|
||||||
logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True))
|
|
||||||
continue
|
|
||||||
|
|
||||||
# parse suggestion
|
|
||||||
for suggestion in dom.xpath(suggestion_xpath):
|
|
||||||
# append suggestion
|
|
||||||
results.append({'suggestion': escape(extract_text(suggestion))})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def parse_images(result, google_hostname):
|
|
||||||
results = []
|
|
||||||
for image in result.xpath(images_xpath):
|
|
||||||
url = parse_url(extract_text(image.xpath(image_url_xpath)[0]), google_hostname)
|
|
||||||
img_src = extract_text(image.xpath(image_img_src_xpath)[0])
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': '',
|
|
||||||
'content': '',
|
|
||||||
'img_src': img_src,
|
|
||||||
'template': 'images.html'
|
|
||||||
})
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def parse_map_near(parsed_url, x, google_hostname):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
for result in x:
|
|
||||||
title = extract_text_from_dom(result, map_near_title)
|
|
||||||
url = parse_url(extract_text_from_dom(result, map_near_url), google_hostname)
|
|
||||||
attributes = []
|
|
||||||
phone = extract_text_from_dom(result, map_near_phone)
|
|
||||||
add_attributes(attributes, property_phone, phone, 'tel:' + phone)
|
|
||||||
results.append({'title': title,
|
|
||||||
'url': url,
|
|
||||||
'content': attributes_to_html(attributes)
|
|
||||||
})
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def parse_map_detail(parsed_url, result, google_hostname):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
# try to parse the geoloc
|
|
||||||
m = re.search('@([0-9\.]+),([0-9\.]+),([0-9]+)', parsed_url.path)
|
|
||||||
if m is None:
|
|
||||||
m = re.search('ll\=([0-9\.]+),([0-9\.]+)\&z\=([0-9]+)', parsed_url.query)
|
|
||||||
|
|
||||||
if m is not None:
|
|
||||||
# geoloc found (ignored)
|
|
||||||
lon = float(m.group(2)) # noqa
|
|
||||||
lat = float(m.group(1)) # noqa
|
|
||||||
zoom = int(m.group(3)) # noqa
|
|
||||||
|
|
||||||
# attributes
|
|
||||||
attributes = []
|
|
||||||
address = extract_text_from_dom(result, map_address_xpath)
|
|
||||||
phone = extract_text_from_dom(result, map_phone_xpath)
|
|
||||||
add_attributes(attributes, property_address, address, 'geo:' + str(lat) + ',' + str(lon))
|
|
||||||
add_attributes(attributes, property_phone, phone, 'tel:' + phone)
|
|
||||||
|
|
||||||
# title / content / url
|
|
||||||
website_title = extract_text_from_dom(result, map_website_title_xpath)
|
|
||||||
content = extract_text_from_dom(result, content_xpath)
|
|
||||||
website_url = parse_url(extract_text_from_dom(result, map_website_url_xpath), google_hostname)
|
|
||||||
|
|
||||||
# add a result if there is a website
|
|
||||||
if website_url is not None:
|
|
||||||
results.append({'title': website_title,
|
|
||||||
'content': (content + '<br />' if content is not None else '')
|
|
||||||
+ attributes_to_html(attributes),
|
|
||||||
'url': website_url
|
|
||||||
})
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def add_attributes(attributes, name, value, url):
|
|
||||||
if value is not None and len(value) > 0:
|
|
||||||
attributes.append({'label': name, 'value': value, 'url': url})
|
|
||||||
|
|
||||||
|
|
||||||
def attributes_to_html(attributes):
|
|
||||||
retval = '<table class="table table-striped">'
|
|
||||||
for a in attributes:
|
|
||||||
value = a.get('value')
|
|
||||||
if 'url' in a:
|
|
||||||
value = '<a href="' + a.get('url') + '">' + value + '</a>'
|
|
||||||
retval = retval + '<tr><th>' + a.get('label') + '</th><td>' + value + '</td></tr>'
|
|
||||||
retval = retval + '</table>'
|
|
||||||
return retval
|
|
|
@ -1,69 +0,0 @@
|
||||||
"""
|
|
||||||
Google (Images)
|
|
||||||
|
|
||||||
@website https://www.google.com
|
|
||||||
@provide-api yes (https://developers.google.com/custom-search/)
|
|
||||||
|
|
||||||
@using-api no
|
|
||||||
@results HTML chunks with JSON inside
|
|
||||||
@stable no
|
|
||||||
@parse url, title, img_src
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from urlparse import parse_qs
|
|
||||||
from json import loads
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['images']
|
|
||||||
paging = True
|
|
||||||
safesearch = True
|
|
||||||
|
|
||||||
search_url = 'https://www.google.com/search'\
|
|
||||||
'?{query}'\
|
|
||||||
'&tbm=isch'\
|
|
||||||
'&ijn=1'\
|
|
||||||
'&start={offset}'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 100
|
|
||||||
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
|
||||||
offset=offset,
|
|
||||||
safesearch=safesearch)
|
|
||||||
|
|
||||||
if safesearch and params['safesearch']:
|
|
||||||
params['url'] += '&' + urlencode({'safe': 'active'})
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in dom.xpath('//div[@data-ved]'):
|
|
||||||
|
|
||||||
metadata = loads(result.xpath('./div[@class="rg_meta"]/text()')[0])
|
|
||||||
|
|
||||||
thumbnail_src = metadata['tu']
|
|
||||||
|
|
||||||
# http to https
|
|
||||||
thumbnail_src = thumbnail_src.replace("http://", "https://")
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': metadata['ru'],
|
|
||||||
'title': metadata['pt'],
|
|
||||||
'content': metadata['s'],
|
|
||||||
'thumbnail_src': thumbnail_src,
|
|
||||||
'img_src': metadata['ou'],
|
|
||||||
'template': 'images.html'})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,67 +0,0 @@
|
||||||
"""
|
|
||||||
Google (News)
|
|
||||||
|
|
||||||
@website https://www.google.com
|
|
||||||
@provide-api yes (https://developers.google.com/web-search/docs/),
|
|
||||||
deprecated!
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes (but deprecated)
|
|
||||||
@parse url, title, content, publishedDate
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
|
||||||
from dateutil import parser
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
categories = ['news']
|
|
||||||
paging = True
|
|
||||||
language_support = True
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
url = 'https://ajax.googleapis.com/'
|
|
||||||
search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={lang}'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 8
|
|
||||||
|
|
||||||
language = 'en-US'
|
|
||||||
if params['language'] != 'all':
|
|
||||||
language = params['language'].replace('_', '-')
|
|
||||||
|
|
||||||
params['url'] = search_url.format(offset=offset,
|
|
||||||
query=urlencode({'q': query}),
|
|
||||||
lang=language)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_res = loads(resp.text)
|
|
||||||
|
|
||||||
# return empty array if there are no results
|
|
||||||
if not search_res.get('responseData', {}).get('results'):
|
|
||||||
return []
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in search_res['responseData']['results']:
|
|
||||||
# parse publishedDate
|
|
||||||
publishedDate = parser.parse(result['publishedDate'])
|
|
||||||
if 'url' not in result:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': result['unescapedUrl'],
|
|
||||||
'title': result['titleNoFormatting'],
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'content': result['content']})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,87 +0,0 @@
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
|
||||||
from collections import Iterable
|
|
||||||
|
|
||||||
search_url = None
|
|
||||||
url_query = None
|
|
||||||
content_query = None
|
|
||||||
title_query = None
|
|
||||||
# suggestion_xpath = ''
|
|
||||||
|
|
||||||
|
|
||||||
def iterate(iterable):
|
|
||||||
if type(iterable) == dict:
|
|
||||||
it = iterable.iteritems()
|
|
||||||
|
|
||||||
else:
|
|
||||||
it = enumerate(iterable)
|
|
||||||
for index, value in it:
|
|
||||||
yield str(index), value
|
|
||||||
|
|
||||||
|
|
||||||
def is_iterable(obj):
|
|
||||||
if type(obj) == str:
|
|
||||||
return False
|
|
||||||
if type(obj) == unicode:
|
|
||||||
return False
|
|
||||||
return isinstance(obj, Iterable)
|
|
||||||
|
|
||||||
|
|
||||||
def parse(query):
|
|
||||||
q = []
|
|
||||||
for part in query.split('/'):
|
|
||||||
if part == '':
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
q.append(part)
|
|
||||||
return q
|
|
||||||
|
|
||||||
|
|
||||||
def do_query(data, q):
|
|
||||||
ret = []
|
|
||||||
if not q:
|
|
||||||
return ret
|
|
||||||
|
|
||||||
qkey = q[0]
|
|
||||||
|
|
||||||
for key, value in iterate(data):
|
|
||||||
|
|
||||||
if len(q) == 1:
|
|
||||||
if key == qkey:
|
|
||||||
ret.append(value)
|
|
||||||
elif is_iterable(value):
|
|
||||||
ret.extend(do_query(value, q))
|
|
||||||
else:
|
|
||||||
if not is_iterable(value):
|
|
||||||
continue
|
|
||||||
if key == qkey:
|
|
||||||
ret.extend(do_query(value, q[1:]))
|
|
||||||
else:
|
|
||||||
ret.extend(do_query(value, q))
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
def query(data, query_string):
|
|
||||||
q = parse(query_string)
|
|
||||||
|
|
||||||
return do_query(data, q)
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
query = urlencode({'q': query})[2:]
|
|
||||||
params['url'] = search_url.format(query=query)
|
|
||||||
params['query'] = query
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
json = loads(resp.text)
|
|
||||||
|
|
||||||
urls = query(json, url_query)
|
|
||||||
contents = query(json, content_query)
|
|
||||||
titles = query(json, title_query)
|
|
||||||
for url, title, content in zip(urls, titles, contents):
|
|
||||||
results.append({'url': url, 'title': title, 'content': content})
|
|
||||||
return results
|
|
|
@ -1,118 +0,0 @@
|
||||||
"""
|
|
||||||
Kickass Torrent (Videos, Music, Files)
|
|
||||||
|
|
||||||
@website https://kickass.so
|
|
||||||
@provide-api no (nothing found)
|
|
||||||
|
|
||||||
@using-api no
|
|
||||||
@results HTML (using search portal)
|
|
||||||
@stable yes (HTML can change)
|
|
||||||
@parse url, title, content, seed, leech, magnetlink
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urlparse import urljoin
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import quote
|
|
||||||
from lxml import html
|
|
||||||
from operator import itemgetter
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['videos', 'music', 'files']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'https://kickass.to/'
|
|
||||||
search_url = url + 'search/{search_term}/{pageno}/'
|
|
||||||
|
|
||||||
# specific xpath variables
|
|
||||||
magnet_xpath = './/a[@title="Torrent magnet link"]'
|
|
||||||
torrent_xpath = './/a[@title="Download torrent file"]'
|
|
||||||
content_xpath = './/span[@class="font11px lightgrey block"]'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(search_term=quote(query),
|
|
||||||
pageno=params['pageno'])
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
search_res = dom.xpath('//table[@class="data"]//tr')
|
|
||||||
|
|
||||||
# return empty array if nothing is found
|
|
||||||
if not search_res:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in search_res[1:]:
|
|
||||||
link = result.xpath('.//a[@class="cellMainLink"]')[0]
|
|
||||||
href = urljoin(url, link.attrib['href'])
|
|
||||||
title = extract_text(link)
|
|
||||||
content = escape(extract_text(result.xpath(content_xpath)))
|
|
||||||
seed = result.xpath('.//td[contains(@class, "green")]/text()')[0]
|
|
||||||
leech = result.xpath('.//td[contains(@class, "red")]/text()')[0]
|
|
||||||
filesize = result.xpath('.//td[contains(@class, "nobr")]/text()')[0]
|
|
||||||
filesize_multiplier = result.xpath('.//td[contains(@class, "nobr")]//span/text()')[0]
|
|
||||||
files = result.xpath('.//td[contains(@class, "center")][2]/text()')[0]
|
|
||||||
|
|
||||||
# convert seed to int if possible
|
|
||||||
if seed.isdigit():
|
|
||||||
seed = int(seed)
|
|
||||||
else:
|
|
||||||
seed = 0
|
|
||||||
|
|
||||||
# convert leech to int if possible
|
|
||||||
if leech.isdigit():
|
|
||||||
leech = int(leech)
|
|
||||||
else:
|
|
||||||
leech = 0
|
|
||||||
|
|
||||||
# convert filesize to byte if possible
|
|
||||||
try:
|
|
||||||
filesize = float(filesize)
|
|
||||||
|
|
||||||
# convert filesize to byte
|
|
||||||
if filesize_multiplier == 'TB':
|
|
||||||
filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
|
|
||||||
elif filesize_multiplier == 'GB':
|
|
||||||
filesize = int(filesize * 1024 * 1024 * 1024)
|
|
||||||
elif filesize_multiplier == 'MB':
|
|
||||||
filesize = int(filesize * 1024 * 1024)
|
|
||||||
elif filesize_multiplier == 'KB':
|
|
||||||
filesize = int(filesize * 1024)
|
|
||||||
except:
|
|
||||||
filesize = None
|
|
||||||
|
|
||||||
# convert files to int if possible
|
|
||||||
if files.isdigit():
|
|
||||||
files = int(files)
|
|
||||||
else:
|
|
||||||
files = None
|
|
||||||
|
|
||||||
magnetlink = result.xpath(magnet_xpath)[0].attrib['href']
|
|
||||||
|
|
||||||
torrentfile = result.xpath(torrent_xpath)[0].attrib['href']
|
|
||||||
torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*")
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': href,
|
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'seed': seed,
|
|
||||||
'leech': leech,
|
|
||||||
'filesize': filesize,
|
|
||||||
'files': files,
|
|
||||||
'magnetlink': magnetlink,
|
|
||||||
'torrentfile': torrentfileurl,
|
|
||||||
'template': 'torrent.html'})
|
|
||||||
|
|
||||||
# return results sorted by seeder
|
|
||||||
return sorted(results, key=itemgetter('seed'), reverse=True)
|
|
|
@ -1,88 +0,0 @@
|
||||||
"""
|
|
||||||
general mediawiki-engine (Web)
|
|
||||||
|
|
||||||
@website websites built on mediawiki (https://www.mediawiki.org)
|
|
||||||
@provide-api yes (http://www.mediawiki.org/wiki/API:Search)
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes
|
|
||||||
@parse url, title
|
|
||||||
|
|
||||||
@todo content
|
|
||||||
"""
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from string import Formatter
|
|
||||||
from urllib import urlencode, quote
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['general']
|
|
||||||
language_support = True
|
|
||||||
paging = True
|
|
||||||
number_of_results = 1
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://{language}.wikipedia.org/'
|
|
||||||
search_postfix = 'w/api.php?action=query'\
|
|
||||||
'&list=search'\
|
|
||||||
'&{query}'\
|
|
||||||
'&format=json'\
|
|
||||||
'&sroffset={offset}'\
|
|
||||||
'&srlimit={limit}'\
|
|
||||||
'&srwhat=nearmatch' # search for a near match in the title
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * number_of_results
|
|
||||||
|
|
||||||
string_args = dict(query=urlencode({'srsearch': query}),
|
|
||||||
offset=offset,
|
|
||||||
limit=number_of_results)
|
|
||||||
|
|
||||||
format_strings = list(Formatter().parse(base_url))
|
|
||||||
|
|
||||||
if params['language'] == 'all':
|
|
||||||
language = 'en'
|
|
||||||
else:
|
|
||||||
language = params['language'].split('_')[0]
|
|
||||||
|
|
||||||
# format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)]
|
|
||||||
if any(x[1] == 'language' for x in format_strings):
|
|
||||||
string_args['language'] = language
|
|
||||||
|
|
||||||
# write search-language back to params, required in response
|
|
||||||
params['language'] = language
|
|
||||||
|
|
||||||
search_url = base_url + search_postfix
|
|
||||||
|
|
||||||
params['url'] = search_url.format(**string_args)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_results = loads(resp.text)
|
|
||||||
|
|
||||||
# return empty array if there are no results
|
|
||||||
if not search_results.get('query', {}).get('search'):
|
|
||||||
return []
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in search_results['query']['search']:
|
|
||||||
if result.get('snippet', '').startswith('#REDIRECT'):
|
|
||||||
continue
|
|
||||||
url = base_url.format(language=resp.search_params['language']) +\
|
|
||||||
'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8'))
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': result['title'],
|
|
||||||
'content': ''})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,61 +0,0 @@
|
||||||
"""
|
|
||||||
Mixcloud (Music)
|
|
||||||
|
|
||||||
@website https://http://www.mixcloud.com/
|
|
||||||
@provide-api yes (http://www.mixcloud.com/developers/
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes
|
|
||||||
@parse url, title, content, embedded, publishedDate
|
|
||||||
"""
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from urllib import urlencode
|
|
||||||
from dateutil import parser
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['music']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'https://api.mixcloud.com/'
|
|
||||||
search_url = url + 'search/?{query}&type=cloudcast&limit=10&offset={offset}'
|
|
||||||
|
|
||||||
embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' +\
|
|
||||||
'data-src="https://www.mixcloud.com/widget/iframe/?feed={url}" width="300" height="300"></iframe>'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 10
|
|
||||||
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
|
||||||
offset=offset)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_res = loads(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in search_res.get('data', []):
|
|
||||||
title = result['name']
|
|
||||||
url = result['url']
|
|
||||||
content = result['user']['name']
|
|
||||||
embedded = embedded_url.format(url=url)
|
|
||||||
publishedDate = parser.parse(result['created_time'])
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'embedded': embedded,
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,119 +0,0 @@
|
||||||
"""
|
|
||||||
Nyaa.se (Anime Bittorrent tracker)
|
|
||||||
|
|
||||||
@website http://www.nyaa.se/
|
|
||||||
@provide-api no
|
|
||||||
@using-api no
|
|
||||||
@results HTML
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, content, seed, leech, torrentfile
|
|
||||||
"""
|
|
||||||
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['files', 'images', 'videos', 'music']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'http://www.nyaa.se/'
|
|
||||||
search_url = base_url + '?page=search&{query}&offset={offset}'
|
|
||||||
|
|
||||||
# xpath queries
|
|
||||||
xpath_results = '//table[@class="tlist"]//tr[contains(@class, "tlistrow")]'
|
|
||||||
xpath_category = './/td[@class="tlisticon"]/a'
|
|
||||||
xpath_title = './/td[@class="tlistname"]/a'
|
|
||||||
xpath_torrent_file = './/td[@class="tlistdownload"]/a'
|
|
||||||
xpath_filesize = './/td[@class="tlistsize"]/text()'
|
|
||||||
xpath_seeds = './/td[@class="tlistsn"]/text()'
|
|
||||||
xpath_leeches = './/td[@class="tlistln"]/text()'
|
|
||||||
xpath_downloads = './/td[@class="tlistdn"]/text()'
|
|
||||||
|
|
||||||
|
|
||||||
# convert a variable to integer or return 0 if it's not a number
|
|
||||||
def int_or_zero(num):
|
|
||||||
if isinstance(num, list):
|
|
||||||
if len(num) < 1:
|
|
||||||
return 0
|
|
||||||
num = num[0]
|
|
||||||
if num.isdigit():
|
|
||||||
return int(num)
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
# get multiplier to convert torrent size to bytes
|
|
||||||
def get_filesize_mul(suffix):
|
|
||||||
return {
|
|
||||||
'KB': 1024,
|
|
||||||
'MB': 1024 ** 2,
|
|
||||||
'GB': 1024 ** 3,
|
|
||||||
'TB': 1024 ** 4,
|
|
||||||
|
|
||||||
'KIB': 1024,
|
|
||||||
'MIB': 1024 ** 2,
|
|
||||||
'GIB': 1024 ** 3,
|
|
||||||
'TIB': 1024 ** 4
|
|
||||||
}[str(suffix).upper()]
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
query = urlencode({'term': query})
|
|
||||||
params['url'] = search_url.format(query=query, offset=params['pageno'])
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
for result in dom.xpath(xpath_results):
|
|
||||||
# category in which our torrent belongs
|
|
||||||
category = result.xpath(xpath_category)[0].attrib.get('title')
|
|
||||||
|
|
||||||
# torrent title
|
|
||||||
page_a = result.xpath(xpath_title)[0]
|
|
||||||
title = escape(extract_text(page_a))
|
|
||||||
|
|
||||||
# link to the page
|
|
||||||
href = page_a.attrib.get('href')
|
|
||||||
|
|
||||||
# link to the torrent file
|
|
||||||
torrent_link = result.xpath(xpath_torrent_file)[0].attrib.get('href')
|
|
||||||
|
|
||||||
# torrent size
|
|
||||||
try:
|
|
||||||
file_size, suffix = result.xpath(xpath_filesize)[0].split(' ')
|
|
||||||
file_size = int(float(file_size) * get_filesize_mul(suffix))
|
|
||||||
except Exception as e:
|
|
||||||
file_size = None
|
|
||||||
|
|
||||||
# seed count
|
|
||||||
seed = int_or_zero(result.xpath(xpath_seeds))
|
|
||||||
|
|
||||||
# leech count
|
|
||||||
leech = int_or_zero(result.xpath(xpath_leeches))
|
|
||||||
|
|
||||||
# torrent downloads count
|
|
||||||
downloads = int_or_zero(result.xpath(xpath_downloads))
|
|
||||||
|
|
||||||
# content string contains all information not included into template
|
|
||||||
content = 'Category: "{category}". Downloaded {downloads} times.'
|
|
||||||
content = content.format(category=category, downloads=downloads)
|
|
||||||
content = escape(content)
|
|
||||||
|
|
||||||
results.append({'url': href,
|
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'seed': seed,
|
|
||||||
'leech': leech,
|
|
||||||
'filesize': file_size,
|
|
||||||
'torrentfile': torrent_link,
|
|
||||||
'template': 'torrent.html'})
|
|
||||||
|
|
||||||
return results
|
|
|
@ -1,99 +0,0 @@
|
||||||
"""
|
|
||||||
OpenStreetMap (Map)
|
|
||||||
|
|
||||||
@website https://openstreetmap.org/
|
|
||||||
@provide-api yes (http://wiki.openstreetmap.org/wiki/Nominatim)
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes
|
|
||||||
@parse url, title
|
|
||||||
"""
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from searx.utils import searx_useragent
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['map']
|
|
||||||
paging = False
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://nominatim.openstreetmap.org/'
|
|
||||||
search_string = 'search/{query}?format=json&polygon_geojson=1&addressdetails=1'
|
|
||||||
result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = base_url + search_string.format(query=query)
|
|
||||||
|
|
||||||
# using searx User-Agent
|
|
||||||
params['headers']['User-Agent'] = searx_useragent()
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
json = loads(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for r in json:
|
|
||||||
if 'display_name' not in r:
|
|
||||||
continue
|
|
||||||
|
|
||||||
title = r['display_name']
|
|
||||||
osm_type = r.get('osm_type', r.get('type'))
|
|
||||||
url = result_base_url.format(osm_type=osm_type,
|
|
||||||
osm_id=r['osm_id'])
|
|
||||||
|
|
||||||
osm = {'type': osm_type,
|
|
||||||
'id': r['osm_id']}
|
|
||||||
|
|
||||||
geojson = r.get('geojson')
|
|
||||||
|
|
||||||
# if no geojson is found and osm_type is a node, add geojson Point
|
|
||||||
if not geojson and osm_type == 'node':
|
|
||||||
geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]}
|
|
||||||
|
|
||||||
address_raw = r.get('address')
|
|
||||||
address = {}
|
|
||||||
|
|
||||||
# get name
|
|
||||||
if r['class'] == 'amenity' or\
|
|
||||||
r['class'] == 'shop' or\
|
|
||||||
r['class'] == 'tourism' or\
|
|
||||||
r['class'] == 'leisure':
|
|
||||||
if address_raw.get('address29'):
|
|
||||||
address = {'name': address_raw.get('address29')}
|
|
||||||
else:
|
|
||||||
address = {'name': address_raw.get(r['type'])}
|
|
||||||
|
|
||||||
# add rest of adressdata, if something is already found
|
|
||||||
if address.get('name'):
|
|
||||||
address.update({'house_number': address_raw.get('house_number'),
|
|
||||||
'road': address_raw.get('road'),
|
|
||||||
'locality': address_raw.get('city',
|
|
||||||
address_raw.get('town', # noqa
|
|
||||||
address_raw.get('village'))), # noqa
|
|
||||||
'postcode': address_raw.get('postcode'),
|
|
||||||
'country': address_raw.get('country'),
|
|
||||||
'country_code': address_raw.get('country_code')})
|
|
||||||
else:
|
|
||||||
address = None
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'template': 'map.html',
|
|
||||||
'title': title,
|
|
||||||
'content': '',
|
|
||||||
'longitude': r['lon'],
|
|
||||||
'latitude': r['lat'],
|
|
||||||
'boundingbox': r['boundingbox'],
|
|
||||||
'geojson': geojson,
|
|
||||||
'address': address,
|
|
||||||
'osm': osm,
|
|
||||||
'url': url})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,131 +0,0 @@
|
||||||
"""
|
|
||||||
Photon (Map)
|
|
||||||
|
|
||||||
@website https://photon.komoot.de
|
|
||||||
@provide-api yes (https://photon.komoot.de/)
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes
|
|
||||||
@parse url, title
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
|
||||||
from searx.utils import searx_useragent
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['map']
|
|
||||||
paging = False
|
|
||||||
language_support = True
|
|
||||||
number_of_results = 10
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://photon.komoot.de/'
|
|
||||||
search_string = 'api/?{query}&limit={limit}'
|
|
||||||
result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
|
|
||||||
|
|
||||||
# list of supported languages
|
|
||||||
allowed_languages = ['de', 'en', 'fr', 'it']
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = base_url +\
|
|
||||||
search_string.format(query=urlencode({'q': query}),
|
|
||||||
limit=number_of_results)
|
|
||||||
|
|
||||||
if params['language'] != 'all':
|
|
||||||
language = params['language'].split('_')[0]
|
|
||||||
if language in allowed_languages:
|
|
||||||
params['url'] = params['url'] + "&lang=" + language
|
|
||||||
|
|
||||||
# using searx User-Agent
|
|
||||||
params['headers']['User-Agent'] = searx_useragent()
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
json = loads(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for r in json.get('features', {}):
|
|
||||||
|
|
||||||
properties = r.get('properties')
|
|
||||||
|
|
||||||
if not properties:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# get title
|
|
||||||
title = properties.get('name')
|
|
||||||
|
|
||||||
# get osm-type
|
|
||||||
if properties.get('osm_type') == 'N':
|
|
||||||
osm_type = 'node'
|
|
||||||
elif properties.get('osm_type') == 'W':
|
|
||||||
osm_type = 'way'
|
|
||||||
elif properties.get('osm_type') == 'R':
|
|
||||||
osm_type = 'relation'
|
|
||||||
else:
|
|
||||||
# continue if invalide osm-type
|
|
||||||
continue
|
|
||||||
|
|
||||||
url = result_base_url.format(osm_type=osm_type,
|
|
||||||
osm_id=properties.get('osm_id'))
|
|
||||||
|
|
||||||
osm = {'type': osm_type,
|
|
||||||
'id': properties.get('osm_id')}
|
|
||||||
|
|
||||||
geojson = r.get('geometry')
|
|
||||||
|
|
||||||
if properties.get('extent'):
|
|
||||||
boundingbox = [properties.get('extent')[3],
|
|
||||||
properties.get('extent')[1],
|
|
||||||
properties.get('extent')[0],
|
|
||||||
properties.get('extent')[2]]
|
|
||||||
else:
|
|
||||||
# TODO: better boundingbox calculation
|
|
||||||
boundingbox = [geojson['coordinates'][1],
|
|
||||||
geojson['coordinates'][1],
|
|
||||||
geojson['coordinates'][0],
|
|
||||||
geojson['coordinates'][0]]
|
|
||||||
|
|
||||||
# address calculation
|
|
||||||
address = {}
|
|
||||||
|
|
||||||
# get name
|
|
||||||
if properties.get('osm_key') == 'amenity' or\
|
|
||||||
properties.get('osm_key') == 'shop' or\
|
|
||||||
properties.get('osm_key') == 'tourism' or\
|
|
||||||
properties.get('osm_key') == 'leisure':
|
|
||||||
address = {'name': properties.get('name')}
|
|
||||||
|
|
||||||
# add rest of adressdata, if something is already found
|
|
||||||
if address.get('name'):
|
|
||||||
address.update({'house_number': properties.get('housenumber'),
|
|
||||||
'road': properties.get('street'),
|
|
||||||
'locality': properties.get('city',
|
|
||||||
properties.get('town', # noqa
|
|
||||||
properties.get('village'))), # noqa
|
|
||||||
'postcode': properties.get('postcode'),
|
|
||||||
'country': properties.get('country')})
|
|
||||||
else:
|
|
||||||
address = None
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'template': 'map.html',
|
|
||||||
'title': title,
|
|
||||||
'content': '',
|
|
||||||
'longitude': geojson['coordinates'][0],
|
|
||||||
'latitude': geojson['coordinates'][1],
|
|
||||||
'boundingbox': boundingbox,
|
|
||||||
'geojson': geojson,
|
|
||||||
'address': address,
|
|
||||||
'osm': osm,
|
|
||||||
'url': url})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,98 +0,0 @@
|
||||||
# Piratebay (Videos, Music, Files)
|
|
||||||
#
|
|
||||||
# @website https://thepiratebay.se
|
|
||||||
# @provide-api no (nothing found)
|
|
||||||
#
|
|
||||||
# @using-api no
|
|
||||||
# @results HTML (using search portal)
|
|
||||||
# @stable yes (HTML can change)
|
|
||||||
# @parse url, title, content, seed, leech, magnetlink
|
|
||||||
|
|
||||||
from urlparse import urljoin
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import quote
|
|
||||||
from lxml import html
|
|
||||||
from operator import itemgetter
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['videos', 'music', 'files']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'https://thepiratebay.se/'
|
|
||||||
search_url = url + 'search/{search_term}/{pageno}/99/{search_type}'
|
|
||||||
|
|
||||||
# piratebay specific type-definitions
|
|
||||||
search_types = {'files': '0',
|
|
||||||
'music': '100',
|
|
||||||
'videos': '200'}
|
|
||||||
|
|
||||||
# specific xpath variables
|
|
||||||
magnet_xpath = './/a[@title="Download this torrent using magnet"]'
|
|
||||||
torrent_xpath = './/a[@title="Download this torrent"]'
|
|
||||||
content_xpath = './/font[@class="detDesc"]'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
search_type = search_types.get(params['category'], '0')
|
|
||||||
|
|
||||||
params['url'] = search_url.format(search_term=quote(query),
|
|
||||||
search_type=search_type,
|
|
||||||
pageno=params['pageno'] - 1)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
search_res = dom.xpath('//table[@id="searchResult"]//tr')
|
|
||||||
|
|
||||||
# return empty array if nothing is found
|
|
||||||
if not search_res:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in search_res[1:]:
|
|
||||||
link = result.xpath('.//div[@class="detName"]//a')[0]
|
|
||||||
href = urljoin(url, link.attrib.get('href'))
|
|
||||||
title = extract_text(link)
|
|
||||||
content = escape(extract_text(result.xpath(content_xpath)))
|
|
||||||
seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
|
|
||||||
|
|
||||||
# convert seed to int if possible
|
|
||||||
if seed.isdigit():
|
|
||||||
seed = int(seed)
|
|
||||||
else:
|
|
||||||
seed = 0
|
|
||||||
|
|
||||||
# convert leech to int if possible
|
|
||||||
if leech.isdigit():
|
|
||||||
leech = int(leech)
|
|
||||||
else:
|
|
||||||
leech = 0
|
|
||||||
|
|
||||||
magnetlink = result.xpath(magnet_xpath)[0]
|
|
||||||
torrentfile_links = result.xpath(torrent_xpath)
|
|
||||||
if torrentfile_links:
|
|
||||||
torrentfile_link = torrentfile_links[0].attrib.get('href')
|
|
||||||
else:
|
|
||||||
torrentfile_link = None
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': href,
|
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'seed': seed,
|
|
||||||
'leech': leech,
|
|
||||||
'magnetlink': magnetlink.attrib.get('href'),
|
|
||||||
'torrentfile': torrentfile_link,
|
|
||||||
'template': 'torrent.html'})
|
|
||||||
|
|
||||||
# return results sorted by seeder
|
|
||||||
return sorted(results, key=itemgetter('seed'), reverse=True)
|
|
|
@ -1,98 +0,0 @@
|
||||||
"""
|
|
||||||
Qwant (Web, Images, News, Social)
|
|
||||||
|
|
||||||
@website https://qwant.com/
|
|
||||||
@provide-api not officially (https://api.qwant.com/api/search/)
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes
|
|
||||||
@parse url, title, content
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = None
|
|
||||||
paging = True
|
|
||||||
language_support = True
|
|
||||||
|
|
||||||
category_to_keyword = {'general': 'web',
|
|
||||||
'images': 'images',
|
|
||||||
'news': 'news',
|
|
||||||
'social media': 'social'}
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'https://api.qwant.com/api/search/{keyword}?count=10&offset={offset}&f=&{query}'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 10
|
|
||||||
|
|
||||||
if categories[0] and categories[0] in category_to_keyword:
|
|
||||||
|
|
||||||
params['url'] = url.format(keyword=category_to_keyword[categories[0]],
|
|
||||||
query=urlencode({'q': query}),
|
|
||||||
offset=offset)
|
|
||||||
else:
|
|
||||||
params['url'] = url.format(keyword='web',
|
|
||||||
query=urlencode({'q': query}),
|
|
||||||
offset=offset)
|
|
||||||
|
|
||||||
# add language tag if specified
|
|
||||||
if params['language'] != 'all':
|
|
||||||
params['url'] += '&locale=' + params['language'].lower()
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_results = loads(resp.text)
|
|
||||||
|
|
||||||
# return empty array if there are no results
|
|
||||||
if 'data' not in search_results:
|
|
||||||
return []
|
|
||||||
|
|
||||||
data = search_results.get('data', {})
|
|
||||||
|
|
||||||
res = data.get('result', {})
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in res.get('items', {}):
|
|
||||||
|
|
||||||
title = result['title']
|
|
||||||
res_url = result['url']
|
|
||||||
content = result['desc']
|
|
||||||
|
|
||||||
if category_to_keyword.get(categories[0], '') == 'web':
|
|
||||||
results.append({'title': title,
|
|
||||||
'content': content,
|
|
||||||
'url': res_url})
|
|
||||||
|
|
||||||
elif category_to_keyword.get(categories[0], '') == 'images':
|
|
||||||
thumbnail_src = result['thumbnail']
|
|
||||||
img_src = result['media']
|
|
||||||
results.append({'template': 'images.html',
|
|
||||||
'url': res_url,
|
|
||||||
'title': title,
|
|
||||||
'content': '',
|
|
||||||
'thumbnail_src': thumbnail_src,
|
|
||||||
'img_src': img_src})
|
|
||||||
|
|
||||||
elif (category_to_keyword.get(categories[0], '') == 'news' or
|
|
||||||
category_to_keyword.get(categories[0], '') == 'social'):
|
|
||||||
published_date = datetime.fromtimestamp(result['date'], None)
|
|
||||||
|
|
||||||
results.append({'url': res_url,
|
|
||||||
'title': title,
|
|
||||||
'publishedDate': published_date,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,79 +0,0 @@
|
||||||
"""
|
|
||||||
Reddit
|
|
||||||
|
|
||||||
@website https://www.reddit.com/
|
|
||||||
@provide-api yes (https://www.reddit.com/dev/api)
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes
|
|
||||||
@parse url, title, content, thumbnail, publishedDate
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
|
||||||
from urlparse import urlparse, urljoin
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['general', 'images', 'news', 'social media']
|
|
||||||
page_size = 25
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://www.reddit.com/'
|
|
||||||
search_url = base_url + 'search.json?{query}'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
query = urlencode({'q': query,
|
|
||||||
'limit': page_size})
|
|
||||||
params['url'] = search_url.format(query=query)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
img_results = []
|
|
||||||
text_results = []
|
|
||||||
|
|
||||||
search_results = json.loads(resp.text)
|
|
||||||
|
|
||||||
# return empty array if there are no results
|
|
||||||
if 'data' not in search_results:
|
|
||||||
return []
|
|
||||||
|
|
||||||
posts = search_results.get('data', {}).get('children', [])
|
|
||||||
|
|
||||||
# process results
|
|
||||||
for post in posts:
|
|
||||||
data = post['data']
|
|
||||||
|
|
||||||
# extract post information
|
|
||||||
params = {
|
|
||||||
'url': urljoin(base_url, data['permalink']),
|
|
||||||
'title': data['title']
|
|
||||||
}
|
|
||||||
|
|
||||||
# if thumbnail field contains a valid URL, we need to change template
|
|
||||||
thumbnail = data['thumbnail']
|
|
||||||
url_info = urlparse(thumbnail)
|
|
||||||
# netloc & path
|
|
||||||
if url_info[1] != '' and url_info[2] != '':
|
|
||||||
params['img_src'] = data['url']
|
|
||||||
params['thumbnail_src'] = thumbnail
|
|
||||||
params['template'] = 'images.html'
|
|
||||||
img_results.append(params)
|
|
||||||
else:
|
|
||||||
created = datetime.fromtimestamp(data['created_utc'])
|
|
||||||
content = escape(data['selftext'])
|
|
||||||
if len(content) > 500:
|
|
||||||
content = content[:500] + '...'
|
|
||||||
params['content'] = content
|
|
||||||
params['publishedDate'] = created
|
|
||||||
text_results.append(params)
|
|
||||||
|
|
||||||
# show images first and text results second
|
|
||||||
return img_results + text_results
|
|
|
@ -1,75 +0,0 @@
|
||||||
"""
|
|
||||||
Searchcode (It)
|
|
||||||
|
|
||||||
@website https://searchcode.com/
|
|
||||||
@provide-api yes (https://searchcode.com/api/)
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes
|
|
||||||
@parse url, title, content
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
|
||||||
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['it']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'https://searchcode.com/'
|
|
||||||
search_url = url + 'api/codesearch_I/?{query}&p={pageno}'
|
|
||||||
|
|
||||||
# special code-endings which are not recognised by the file ending
|
|
||||||
code_endings = {'cs': 'c#',
|
|
||||||
'h': 'c',
|
|
||||||
'hpp': 'cpp',
|
|
||||||
'cxx': 'cpp'}
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
|
||||||
pageno=params['pageno'] - 1)
|
|
||||||
|
|
||||||
# Disable SSL verification
|
|
||||||
# error: (60) SSL certificate problem: unable to get local issuer
|
|
||||||
# certificate
|
|
||||||
params['verify'] = False
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_results = loads(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in search_results.get('results', []):
|
|
||||||
href = result['url']
|
|
||||||
title = "" + result['name'] + " - " + result['filename']
|
|
||||||
repo = result['repo']
|
|
||||||
|
|
||||||
lines = dict()
|
|
||||||
for line, code in result['lines'].items():
|
|
||||||
lines[int(line)] = code
|
|
||||||
|
|
||||||
code_language = code_endings.get(
|
|
||||||
result['filename'].split('.')[-1].lower(),
|
|
||||||
result['filename'].split('.')[-1].lower())
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': href,
|
|
||||||
'title': title,
|
|
||||||
'content': '',
|
|
||||||
'repository': repo,
|
|
||||||
'codelines': sorted(lines.items()),
|
|
||||||
'code_language': code_language,
|
|
||||||
'template': 'code.html'})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,63 +0,0 @@
|
||||||
"""
|
|
||||||
Searchcode (It)
|
|
||||||
|
|
||||||
@website https://searchcode.com/
|
|
||||||
@provide-api yes (https://searchcode.com/api/)
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes
|
|
||||||
@parse url, title, content
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['it']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'https://searchcode.com/'
|
|
||||||
search_url = url + 'api/search_IV/?{query}&p={pageno}'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
|
||||||
pageno=params['pageno'] - 1)
|
|
||||||
|
|
||||||
# Disable SSL verification
|
|
||||||
# error: (60) SSL certificate problem: unable to get local issuer
|
|
||||||
# certificate
|
|
||||||
params['verify'] = False
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_results = loads(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in search_results.get('results', []):
|
|
||||||
href = result['url']
|
|
||||||
title = "[" + result['type'] + "] " +\
|
|
||||||
result['namespace'] +\
|
|
||||||
" " + result['name']
|
|
||||||
content = '<span class="highlight">[' +\
|
|
||||||
result['type'] + "] " +\
|
|
||||||
result['name'] + " " +\
|
|
||||||
result['synopsis'] +\
|
|
||||||
"</span><br />" +\
|
|
||||||
result['description']
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': href,
|
|
||||||
'title': title,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,98 +0,0 @@
|
||||||
"""
|
|
||||||
Soundcloud (Music)
|
|
||||||
|
|
||||||
@website https://soundcloud.com
|
|
||||||
@provide-api yes (https://developers.soundcloud.com/)
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes
|
|
||||||
@parse url, title, content, publishedDate, embedded
|
|
||||||
"""
|
|
||||||
|
|
||||||
import re
|
|
||||||
from StringIO import StringIO
|
|
||||||
from json import loads
|
|
||||||
from lxml import etree
|
|
||||||
from urllib import urlencode, quote_plus
|
|
||||||
from dateutil import parser
|
|
||||||
from searx import logger
|
|
||||||
from searx.poolrequests import get as http_get
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['music']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'https://api.soundcloud.com/'
|
|
||||||
search_url = url + 'search?{query}'\
|
|
||||||
'&facet=model'\
|
|
||||||
'&limit=20'\
|
|
||||||
'&offset={offset}'\
|
|
||||||
'&linked_partitioning=1'\
|
|
||||||
'&client_id={client_id}' # noqa
|
|
||||||
|
|
||||||
embedded_url = '<iframe width="100%" height="166" ' +\
|
|
||||||
'scrolling="no" frameborder="no" ' +\
|
|
||||||
'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
|
|
||||||
|
|
||||||
|
|
||||||
def get_client_id():
|
|
||||||
response = http_get("https://soundcloud.com")
|
|
||||||
rx_namespace = {"re": "http://exslt.org/regular-expressions"}
|
|
||||||
|
|
||||||
if response.ok:
|
|
||||||
tree = etree.parse(StringIO(response.content), etree.HTMLParser())
|
|
||||||
script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace)
|
|
||||||
app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
|
|
||||||
|
|
||||||
# extracts valid app_js urls from soundcloud.com content
|
|
||||||
for app_js_url in app_js_urls:
|
|
||||||
# gets app_js and searches for the clientid
|
|
||||||
response = http_get(app_js_url)
|
|
||||||
if response.ok:
|
|
||||||
cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I)
|
|
||||||
if cids is not None and len(cids.groups()):
|
|
||||||
return cids.groups()[0]
|
|
||||||
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# api-key
|
|
||||||
guest_client_id = get_client_id()
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 20
|
|
||||||
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
|
||||||
offset=offset,
|
|
||||||
client_id=guest_client_id)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_res = loads(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in search_res.get('collection', []):
|
|
||||||
if result['kind'] in ('track', 'playlist'):
|
|
||||||
title = result['title']
|
|
||||||
content = result['description']
|
|
||||||
publishedDate = parser.parse(result['last_modified'])
|
|
||||||
uri = quote_plus(result['uri'])
|
|
||||||
embedded = embedded_url.format(uri=uri)
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': result['permalink_url'],
|
|
||||||
'title': title,
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'embedded': embedded,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,62 +0,0 @@
|
||||||
"""
|
|
||||||
Spotify (Music)
|
|
||||||
|
|
||||||
@website https://spotify.com
|
|
||||||
@provide-api yes (https://developer.spotify.com/web-api/search-item/)
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes
|
|
||||||
@parse url, title, content, embedded
|
|
||||||
"""
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from urllib import urlencode
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['music']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'https://api.spotify.com/'
|
|
||||||
search_url = url + 'v1/search?{query}&type=track&offset={offset}'
|
|
||||||
|
|
||||||
embedded_url = '<iframe data-src="https://embed.spotify.com/?uri=spotify:track:{audioid}"\
|
|
||||||
width="300" height="80" frameborder="0" allowtransparency="true"></iframe>'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 20
|
|
||||||
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
|
||||||
offset=offset)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_res = loads(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in search_res.get('tracks', {}).get('items', {}):
|
|
||||||
if result['type'] == 'track':
|
|
||||||
title = result['name']
|
|
||||||
url = result['external_urls']['spotify']
|
|
||||||
content = result['artists'][0]['name'] +\
|
|
||||||
" • " +\
|
|
||||||
result['album']['name'] +\
|
|
||||||
" • " + result['name']
|
|
||||||
embedded = embedded_url.format(audioid=result['id'])
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'embedded': embedded,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,60 +0,0 @@
|
||||||
"""
|
|
||||||
Stackoverflow (It)
|
|
||||||
|
|
||||||
@website https://stackoverflow.com/
|
|
||||||
@provide-api not clear (https://api.stackexchange.com/docs/advanced-search)
|
|
||||||
|
|
||||||
@using-api no
|
|
||||||
@results HTML
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, content
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urlparse import urljoin
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['it']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'https://stackoverflow.com/'
|
|
||||||
search_url = url + 'search?{query}&page={pageno}'
|
|
||||||
|
|
||||||
# specific xpath variables
|
|
||||||
results_xpath = '//div[contains(@class,"question-summary")]'
|
|
||||||
link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a'
|
|
||||||
content_xpath = './/div[@class="excerpt"]'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
|
||||||
pageno=params['pageno'])
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in dom.xpath(results_xpath):
|
|
||||||
link = result.xpath(link_xpath)[0]
|
|
||||||
href = urljoin(url, link.attrib.get('href'))
|
|
||||||
title = escape(extract_text(link))
|
|
||||||
content = escape(extract_text(result.xpath(content_xpath)))
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': href,
|
|
||||||
'title': title,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,124 +0,0 @@
|
||||||
# Startpage (Web)
|
|
||||||
#
|
|
||||||
# @website https://startpage.com
|
|
||||||
# @provide-api no (nothing found)
|
|
||||||
#
|
|
||||||
# @using-api no
|
|
||||||
# @results HTML
|
|
||||||
# @stable no (HTML can change)
|
|
||||||
# @parse url, title, content
|
|
||||||
#
|
|
||||||
# @todo paging
|
|
||||||
|
|
||||||
from lxml import html
|
|
||||||
from cgi import escape
|
|
||||||
from dateutil import parser
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
import re
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['general']
|
|
||||||
# there is a mechanism to block "bot" search
|
|
||||||
# (probably the parameter qid), require
|
|
||||||
# storing of qid's between mulitble search-calls
|
|
||||||
|
|
||||||
# paging = False
|
|
||||||
language_support = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://startpage.com/'
|
|
||||||
search_url = base_url + 'do/search'
|
|
||||||
|
|
||||||
# specific xpath variables
|
|
||||||
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
|
|
||||||
# not ads: div[@class="result"] are the direct childs of div[@id="results"]
|
|
||||||
results_xpath = '//div[@class="result"]'
|
|
||||||
link_xpath = './/h3/a'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 10
|
|
||||||
|
|
||||||
params['url'] = search_url
|
|
||||||
params['method'] = 'POST'
|
|
||||||
params['data'] = {'query': query,
|
|
||||||
'startat': offset}
|
|
||||||
|
|
||||||
# set language if specified
|
|
||||||
if params['language'] != 'all':
|
|
||||||
params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.content)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in dom.xpath(results_xpath):
|
|
||||||
links = result.xpath(link_xpath)
|
|
||||||
if not links:
|
|
||||||
continue
|
|
||||||
link = links[0]
|
|
||||||
url = link.attrib.get('href')
|
|
||||||
|
|
||||||
# block google-ad url's
|
|
||||||
if re.match("^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# block startpage search url's
|
|
||||||
if re.match("^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# block ixquick search url's
|
|
||||||
if re.match("^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url):
|
|
||||||
continue
|
|
||||||
|
|
||||||
title = escape(extract_text(link))
|
|
||||||
|
|
||||||
if result.xpath('./p[@class="desc clk"]'):
|
|
||||||
content = escape(extract_text(result.xpath('./p[@class="desc clk"]')))
|
|
||||||
else:
|
|
||||||
content = ''
|
|
||||||
|
|
||||||
published_date = None
|
|
||||||
|
|
||||||
# check if search result starts with something like: "2 Sep 2014 ... "
|
|
||||||
if re.match("^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content):
|
|
||||||
date_pos = content.find('...') + 4
|
|
||||||
date_string = content[0:date_pos - 5]
|
|
||||||
published_date = parser.parse(date_string, dayfirst=True)
|
|
||||||
|
|
||||||
# fix content string
|
|
||||||
content = content[date_pos:]
|
|
||||||
|
|
||||||
# check if search result starts with something like: "5 days ago ... "
|
|
||||||
elif re.match("^[0-9]+ days? ago \.\.\. ", content):
|
|
||||||
date_pos = content.find('...') + 4
|
|
||||||
date_string = content[0:date_pos - 5]
|
|
||||||
|
|
||||||
# calculate datetime
|
|
||||||
published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group()))
|
|
||||||
|
|
||||||
# fix content string
|
|
||||||
content = content[date_pos:]
|
|
||||||
|
|
||||||
if published_date:
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'publishedDate': published_date})
|
|
||||||
else:
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,81 +0,0 @@
|
||||||
"""
|
|
||||||
Subtitleseeker (Video)
|
|
||||||
|
|
||||||
@website http://www.subtitleseeker.com
|
|
||||||
@provide-api no
|
|
||||||
|
|
||||||
@using-api no
|
|
||||||
@results HTML
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, content
|
|
||||||
"""
|
|
||||||
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import quote_plus
|
|
||||||
from lxml import html
|
|
||||||
from searx.languages import language_codes
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['videos']
|
|
||||||
paging = True
|
|
||||||
language = ""
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'http://www.subtitleseeker.com/'
|
|
||||||
search_url = url + 'search/TITLES/{query}&p={pageno}'
|
|
||||||
|
|
||||||
# specific xpath variables
|
|
||||||
results_xpath = '//div[@class="boxRows"]'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(query=quote_plus(query),
|
|
||||||
pageno=params['pageno'])
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
search_lang = ""
|
|
||||||
|
|
||||||
if resp.search_params['language'] != 'all':
|
|
||||||
search_lang = [lc[1]
|
|
||||||
for lc in language_codes
|
|
||||||
if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in dom.xpath(results_xpath):
|
|
||||||
link = result.xpath(".//a")[0]
|
|
||||||
href = link.attrib.get('href')
|
|
||||||
|
|
||||||
if language is not "":
|
|
||||||
href = href + language + '/'
|
|
||||||
elif search_lang:
|
|
||||||
href = href + search_lang + '/'
|
|
||||||
|
|
||||||
title = escape(extract_text(link))
|
|
||||||
|
|
||||||
content = extract_text(result.xpath('.//div[contains(@class,"red")]'))
|
|
||||||
content = content + " - "
|
|
||||||
text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0])
|
|
||||||
content = content + text
|
|
||||||
|
|
||||||
if result.xpath(".//span") != []:
|
|
||||||
content = content +\
|
|
||||||
" - (" +\
|
|
||||||
extract_text(result.xpath(".//span")) +\
|
|
||||||
")"
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': href,
|
|
||||||
'title': title,
|
|
||||||
'content': escape(content)})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,109 +0,0 @@
|
||||||
"""
|
|
||||||
Swisscows (Web, Images)
|
|
||||||
|
|
||||||
@website https://swisscows.ch
|
|
||||||
@provide-api no
|
|
||||||
|
|
||||||
@using-api no
|
|
||||||
@results HTML (using search portal)
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, content
|
|
||||||
"""
|
|
||||||
|
|
||||||
from cgi import escape
|
|
||||||
from json import loads
|
|
||||||
from urllib import urlencode, unquote
|
|
||||||
import re
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['general', 'images']
|
|
||||||
paging = True
|
|
||||||
language_support = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://swisscows.ch/'
|
|
||||||
search_string = '?{query}&page={page}'
|
|
||||||
|
|
||||||
# regex
|
|
||||||
regex_json = re.compile('initialData: {"Request":(.|\n)*},\s*environment')
|
|
||||||
regex_json_remove_start = re.compile('^initialData:\s*')
|
|
||||||
regex_json_remove_end = re.compile(',\s*environment$')
|
|
||||||
regex_img_url_remove_start = re.compile('^https?://i\.swisscows\.ch/\?link=')
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
if params['language'] == 'all':
|
|
||||||
ui_language = 'browser'
|
|
||||||
region = 'browser'
|
|
||||||
else:
|
|
||||||
region = params['language'].replace('_', '-')
|
|
||||||
ui_language = params['language'].split('_')[0]
|
|
||||||
|
|
||||||
search_path = search_string.format(
|
|
||||||
query=urlencode({'query': query,
|
|
||||||
'uiLanguage': ui_language,
|
|
||||||
'region': region}),
|
|
||||||
page=params['pageno'])
|
|
||||||
|
|
||||||
# image search query is something like 'image?{query}&page={page}'
|
|
||||||
if params['category'] == 'images':
|
|
||||||
search_path = 'image' + search_path
|
|
||||||
|
|
||||||
params['url'] = base_url + search_path
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
json_regex = regex_json.search(resp.content)
|
|
||||||
|
|
||||||
# check if results are returned
|
|
||||||
if not json_regex:
|
|
||||||
return []
|
|
||||||
|
|
||||||
json_raw = regex_json_remove_end.sub('', regex_json_remove_start.sub('', json_regex.group()))
|
|
||||||
json = loads(json_raw)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in json['Results'].get('items', []):
|
|
||||||
result_title = result['Title'].replace(u'\uE000', '').replace(u'\uE001', '')
|
|
||||||
|
|
||||||
# parse image results
|
|
||||||
if result.get('ContentType', '').startswith('image'):
|
|
||||||
img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': result['SourceUrl'],
|
|
||||||
'title': escape(result['Title']),
|
|
||||||
'content': '',
|
|
||||||
'img_src': img_url,
|
|
||||||
'template': 'images.html'})
|
|
||||||
|
|
||||||
# parse general results
|
|
||||||
else:
|
|
||||||
result_url = result['Url'].replace(u'\uE000', '').replace(u'\uE001', '')
|
|
||||||
result_content = result['Description'].replace(u'\uE000', '').replace(u'\uE001', '')
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': result_url,
|
|
||||||
'title': escape(result_title),
|
|
||||||
'content': escape(result_content)})
|
|
||||||
|
|
||||||
# parse images
|
|
||||||
for result in json.get('Images', []):
|
|
||||||
# decode image url
|
|
||||||
img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': result['SourceUrl'],
|
|
||||||
'title': escape(result['Title']),
|
|
||||||
'content': '',
|
|
||||||
'img_src': img_url,
|
|
||||||
'template': 'images.html'})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,102 +0,0 @@
|
||||||
"""
|
|
||||||
Tokyo Toshokan (A BitTorrent Library for Japanese Media)
|
|
||||||
|
|
||||||
@website https://www.tokyotosho.info/
|
|
||||||
@provide-api no
|
|
||||||
@using-api no
|
|
||||||
@results HTML
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, publishedDate, seed, leech,
|
|
||||||
filesize, magnetlink, content
|
|
||||||
"""
|
|
||||||
|
|
||||||
import re
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
from datetime import datetime
|
|
||||||
from searx.engines.nyaa import int_or_zero, get_filesize_mul
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['files', 'videos', 'music']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://www.tokyotosho.info/'
|
|
||||||
search_url = base_url + 'search.php?{query}'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
query = urlencode({'page': params['pageno'],
|
|
||||||
'terms': query})
|
|
||||||
params['url'] = search_url.format(query=query)
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
rows = dom.xpath('//table[@class="listing"]//tr[contains(@class, "category_0")]')
|
|
||||||
|
|
||||||
# check if there are no results or page layout was changed so we cannot parse it
|
|
||||||
# currently there are two rows for each result, so total count must be even
|
|
||||||
if len(rows) == 0 or len(rows) % 2 != 0:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# regular expression for parsing torrent size strings
|
|
||||||
size_re = re.compile('Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
|
|
||||||
|
|
||||||
# processing the results, two rows at a time
|
|
||||||
for i in xrange(0, len(rows), 2):
|
|
||||||
# parse the first row
|
|
||||||
name_row = rows[i]
|
|
||||||
|
|
||||||
links = name_row.xpath('./td[@class="desc-top"]/a')
|
|
||||||
params = {
|
|
||||||
'template': 'torrent.html',
|
|
||||||
'url': links[-1].attrib.get('href'),
|
|
||||||
'title': extract_text(links[-1])
|
|
||||||
}
|
|
||||||
# I have not yet seen any torrents without magnet links, but
|
|
||||||
# it's better to be prepared to stumble upon one some day
|
|
||||||
if len(links) == 2:
|
|
||||||
magnet = links[0].attrib.get('href')
|
|
||||||
if magnet.startswith('magnet'):
|
|
||||||
# okay, we have a valid magnet link, let's add it to the result
|
|
||||||
params['magnetlink'] = magnet
|
|
||||||
|
|
||||||
# no more info in the first row, start parsing the second one
|
|
||||||
info_row = rows[i + 1]
|
|
||||||
desc = extract_text(info_row.xpath('./td[@class="desc-bot"]')[0])
|
|
||||||
for item in desc.split('|'):
|
|
||||||
item = item.strip()
|
|
||||||
if item.startswith('Size:'):
|
|
||||||
try:
|
|
||||||
# ('1.228', 'GB')
|
|
||||||
groups = size_re.match(item).groups()
|
|
||||||
multiplier = get_filesize_mul(groups[1])
|
|
||||||
params['filesize'] = int(multiplier * float(groups[0]))
|
|
||||||
except Exception as e:
|
|
||||||
pass
|
|
||||||
elif item.startswith('Date:'):
|
|
||||||
try:
|
|
||||||
# Date: 2016-02-21 21:44 UTC
|
|
||||||
date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC')
|
|
||||||
params['publishedDate'] = date
|
|
||||||
except Exception as e:
|
|
||||||
pass
|
|
||||||
elif item.startswith('Comment:'):
|
|
||||||
params['content'] = item
|
|
||||||
stats = info_row.xpath('./td[@class="stats"]/span')
|
|
||||||
# has the layout not changed yet?
|
|
||||||
if len(stats) == 3:
|
|
||||||
params['seed'] = int_or_zero(extract_text(stats[0]))
|
|
||||||
params['leech'] = int_or_zero(extract_text(stats[1]))
|
|
||||||
|
|
||||||
results.append(params)
|
|
||||||
|
|
||||||
return results
|
|
|
@ -1,93 +0,0 @@
|
||||||
"""
|
|
||||||
Torrentz.eu (BitTorrent meta-search engine)
|
|
||||||
|
|
||||||
@website https://torrentz.eu/
|
|
||||||
@provide-api no
|
|
||||||
|
|
||||||
@using-api no
|
|
||||||
@results HTML
|
|
||||||
@stable no (HTML can change, although unlikely,
|
|
||||||
see https://torrentz.eu/torrentz.btsearch)
|
|
||||||
@parse url, title, publishedDate, seed, leech, filesize, magnetlink
|
|
||||||
"""
|
|
||||||
|
|
||||||
import re
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
from datetime import datetime
|
|
||||||
from searx.engines.nyaa import int_or_zero, get_filesize_mul
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['files', 'videos', 'music']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
# https://torrentz.eu/search?f=EXAMPLE&p=6
|
|
||||||
base_url = 'https://torrentz.eu/'
|
|
||||||
search_url = base_url + 'search?{query}'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
page = params['pageno'] - 1
|
|
||||||
query = urlencode({'q': query, 'p': page})
|
|
||||||
params['url'] = search_url.format(query=query)
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
for result in dom.xpath('//div[@class="results"]/dl'):
|
|
||||||
name_cell = result.xpath('./dt')[0]
|
|
||||||
title = extract_text(name_cell)
|
|
||||||
|
|
||||||
# skip rows that do not contain a link to a torrent
|
|
||||||
links = name_cell.xpath('./a')
|
|
||||||
if len(links) != 1:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# extract url and remove a slash in the beginning
|
|
||||||
link = links[0].attrib.get('href').lstrip('/')
|
|
||||||
|
|
||||||
seed = result.xpath('./dd/span[@class="u"]/text()')[0].replace(',', '')
|
|
||||||
leech = result.xpath('./dd/span[@class="d"]/text()')[0].replace(',', '')
|
|
||||||
|
|
||||||
params = {
|
|
||||||
'url': base_url + link,
|
|
||||||
'title': title,
|
|
||||||
'seed': int_or_zero(seed),
|
|
||||||
'leech': int_or_zero(leech),
|
|
||||||
'template': 'torrent.html'
|
|
||||||
}
|
|
||||||
|
|
||||||
# let's try to calculate the torrent size
|
|
||||||
try:
|
|
||||||
size_str = result.xpath('./dd/span[@class="s"]/text()')[0]
|
|
||||||
size, suffix = size_str.split()
|
|
||||||
params['filesize'] = int(size) * get_filesize_mul(suffix)
|
|
||||||
except Exception as e:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# does our link contain a valid SHA1 sum?
|
|
||||||
if re.compile('[0-9a-fA-F]{40}').match(link):
|
|
||||||
# add a magnet link to the result
|
|
||||||
params['magnetlink'] = 'magnet:?xt=urn:btih:' + link
|
|
||||||
|
|
||||||
# extract and convert creation date
|
|
||||||
try:
|
|
||||||
date_str = result.xpath('./dd/span[@class="a"]/span')[0].attrib.get('title')
|
|
||||||
# Fri, 25 Mar 2016 16:29:01
|
|
||||||
date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')
|
|
||||||
params['publishedDate'] = date
|
|
||||||
except Exception as e:
|
|
||||||
pass
|
|
||||||
|
|
||||||
results.append(params)
|
|
||||||
|
|
||||||
return results
|
|
|
@ -1,83 +0,0 @@
|
||||||
"""
|
|
||||||
Twitter (Social media)
|
|
||||||
|
|
||||||
@website https://twitter.com/
|
|
||||||
@provide-api yes (https://dev.twitter.com/docs/using-search)
|
|
||||||
|
|
||||||
@using-api no
|
|
||||||
@results HTML (using search portal)
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, content
|
|
||||||
|
|
||||||
@todo publishedDate
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urlparse import urljoin
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
|
||||||
from datetime import datetime
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['social media']
|
|
||||||
language_support = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://twitter.com/'
|
|
||||||
search_url = base_url + 'search?'
|
|
||||||
|
|
||||||
# specific xpath variables
|
|
||||||
results_xpath = '//li[@data-item-type="tweet"]'
|
|
||||||
link_xpath = './/small[@class="time"]//a'
|
|
||||||
title_xpath = './/span[contains(@class, "username")]'
|
|
||||||
content_xpath = './/p[contains(@class, "tweet-text")]'
|
|
||||||
timestamp_xpath = './/span[contains(@class,"_timestamp")]'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url + urlencode({'q': query})
|
|
||||||
|
|
||||||
# set language if specified
|
|
||||||
if params['language'] != 'all':
|
|
||||||
params['cookies']['lang'] = params['language'].split('_')[0]
|
|
||||||
else:
|
|
||||||
params['cookies']['lang'] = 'en'
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for tweet in dom.xpath(results_xpath):
|
|
||||||
try:
|
|
||||||
link = tweet.xpath(link_xpath)[0]
|
|
||||||
content = extract_text(tweet.xpath(content_xpath)[0])
|
|
||||||
except Exception:
|
|
||||||
continue
|
|
||||||
|
|
||||||
url = urljoin(base_url, link.attrib.get('href'))
|
|
||||||
title = extract_text(tweet.xpath(title_xpath))
|
|
||||||
|
|
||||||
pubdate = tweet.xpath(timestamp_xpath)
|
|
||||||
if len(pubdate) > 0:
|
|
||||||
timestamp = float(pubdate[0].attrib.get('data-time'))
|
|
||||||
publishedDate = datetime.fromtimestamp(timestamp, None)
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'publishedDate': publishedDate})
|
|
||||||
else:
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,75 +0,0 @@
|
||||||
# Vimeo (Videos)
|
|
||||||
#
|
|
||||||
# @website https://vimeo.com/
|
|
||||||
# @provide-api yes (http://developer.vimeo.com/api),
|
|
||||||
# they have a maximum count of queries/hour
|
|
||||||
#
|
|
||||||
# @using-api no (TODO, rewrite to api)
|
|
||||||
# @results HTML (using search portal)
|
|
||||||
# @stable no (HTML can change)
|
|
||||||
# @parse url, title, publishedDate, thumbnail, embedded
|
|
||||||
#
|
|
||||||
# @todo rewrite to api
|
|
||||||
# @todo set content-parameter with correct data
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
|
||||||
from HTMLParser import HTMLParser
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
from dateutil import parser
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['videos']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://vimeo.com'
|
|
||||||
search_url = base_url + '/search/page:{pageno}?{query}'
|
|
||||||
|
|
||||||
# specific xpath variables
|
|
||||||
results_xpath = '//div[contains(@class,"results_grid")]/ul/li'
|
|
||||||
url_xpath = './/a/@href'
|
|
||||||
title_xpath = './/span[@class="title"]'
|
|
||||||
thumbnail_xpath = './/img[@class="js-clip_thumbnail_image"]/@src'
|
|
||||||
publishedDate_xpath = './/time/attribute::datetime'
|
|
||||||
|
|
||||||
embedded_url = '<iframe data-src="//player.vimeo.com/video{videoid}" ' +\
|
|
||||||
'width="540" height="304" frameborder="0" ' +\
|
|
||||||
'webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(pageno=params['pageno'],
|
|
||||||
query=urlencode({'q': query}))
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
p = HTMLParser()
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in dom.xpath(results_xpath):
|
|
||||||
videoid = result.xpath(url_xpath)[0]
|
|
||||||
url = base_url + videoid
|
|
||||||
title = p.unescape(extract_text(result.xpath(title_xpath)))
|
|
||||||
thumbnail = extract_text(result.xpath(thumbnail_xpath)[0])
|
|
||||||
publishedDate = parser.parse(extract_text(result.xpath(publishedDate_xpath)[0]))
|
|
||||||
embedded = embedded_url.format(videoid=videoid)
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': '',
|
|
||||||
'template': 'videos.html',
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'embedded': embedded,
|
|
||||||
'thumbnail': thumbnail})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,323 +0,0 @@
|
||||||
import json
|
|
||||||
|
|
||||||
from searx import logger
|
|
||||||
from searx.poolrequests import get
|
|
||||||
from searx.utils import format_date_by_locale
|
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
from dateutil.parser import parse as dateutil_parse
|
|
||||||
from urllib import urlencode
|
|
||||||
|
|
||||||
|
|
||||||
logger = logger.getChild('wikidata')
|
|
||||||
result_count = 1
|
|
||||||
wikidata_host = 'https://www.wikidata.org'
|
|
||||||
wikidata_api = wikidata_host + '/w/api.php'
|
|
||||||
url_search = wikidata_api \
|
|
||||||
+ '?action=query&list=search&format=json'\
|
|
||||||
+ '&srnamespace=0&srprop=sectiontitle&{query}'
|
|
||||||
url_detail = wikidata_api\
|
|
||||||
+ '?action=wbgetentities&format=json'\
|
|
||||||
+ '&props=labels%7Cinfo%7Csitelinks'\
|
|
||||||
+ '%7Csitelinks%2Furls%7Cdescriptions%7Cclaims'\
|
|
||||||
+ '&{query}'
|
|
||||||
url_map = 'https://www.openstreetmap.org/'\
|
|
||||||
+ '?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M'
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = url_search.format(
|
|
||||||
query=urlencode({'srsearch': query,
|
|
||||||
'srlimit': result_count}))
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
search_res = json.loads(resp.text)
|
|
||||||
|
|
||||||
wikidata_ids = set()
|
|
||||||
for r in search_res.get('query', {}).get('search', {}):
|
|
||||||
wikidata_ids.add(r.get('title', ''))
|
|
||||||
|
|
||||||
language = resp.search_params['language'].split('_')[0]
|
|
||||||
if language == 'all':
|
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
url = url_detail.format(query=urlencode({'ids': '|'.join(wikidata_ids),
|
|
||||||
'languages': language + '|en'}))
|
|
||||||
|
|
||||||
htmlresponse = get(url)
|
|
||||||
jsonresponse = json.loads(htmlresponse.content)
|
|
||||||
for wikidata_id in wikidata_ids:
|
|
||||||
results = results + getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'])
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def getDetail(jsonresponse, wikidata_id, language, locale):
|
|
||||||
results = []
|
|
||||||
urls = []
|
|
||||||
attributes = []
|
|
||||||
|
|
||||||
result = jsonresponse.get('entities', {}).get(wikidata_id, {})
|
|
||||||
|
|
||||||
title = result.get('labels', {}).get(language, {}).get('value', None)
|
|
||||||
if title is None:
|
|
||||||
title = result.get('labels', {}).get('en', {}).get('value', None)
|
|
||||||
if title is None:
|
|
||||||
return results
|
|
||||||
|
|
||||||
description = result\
|
|
||||||
.get('descriptions', {})\
|
|
||||||
.get(language, {})\
|
|
||||||
.get('value', None)
|
|
||||||
|
|
||||||
if description is None:
|
|
||||||
description = result\
|
|
||||||
.get('descriptions', {})\
|
|
||||||
.get('en', {})\
|
|
||||||
.get('value', '')
|
|
||||||
|
|
||||||
claims = result.get('claims', {})
|
|
||||||
official_website = get_string(claims, 'P856', None)
|
|
||||||
if official_website is not None:
|
|
||||||
urls.append({'title': 'Official site', 'url': official_website})
|
|
||||||
results.append({'title': title, 'url': official_website})
|
|
||||||
|
|
||||||
wikipedia_link_count = 0
|
|
||||||
wikipedia_link = get_wikilink(result, language + 'wiki')
|
|
||||||
wikipedia_link_count += add_url(urls,
|
|
||||||
'Wikipedia (' + language + ')',
|
|
||||||
wikipedia_link)
|
|
||||||
if language != 'en':
|
|
||||||
wikipedia_en_link = get_wikilink(result, 'enwiki')
|
|
||||||
wikipedia_link_count += add_url(urls,
|
|
||||||
'Wikipedia (en)',
|
|
||||||
wikipedia_en_link)
|
|
||||||
if wikipedia_link_count == 0:
|
|
||||||
misc_language = get_wiki_firstlanguage(result, 'wiki')
|
|
||||||
if misc_language is not None:
|
|
||||||
add_url(urls,
|
|
||||||
'Wikipedia (' + misc_language + ')',
|
|
||||||
get_wikilink(result, misc_language + 'wiki'))
|
|
||||||
|
|
||||||
if language != 'en':
|
|
||||||
add_url(urls,
|
|
||||||
'Wiki voyage (' + language + ')',
|
|
||||||
get_wikilink(result, language + 'wikivoyage'))
|
|
||||||
|
|
||||||
add_url(urls,
|
|
||||||
'Wiki voyage (en)',
|
|
||||||
get_wikilink(result, 'enwikivoyage'))
|
|
||||||
|
|
||||||
if language != 'en':
|
|
||||||
add_url(urls,
|
|
||||||
'Wikiquote (' + language + ')',
|
|
||||||
get_wikilink(result, language + 'wikiquote'))
|
|
||||||
|
|
||||||
add_url(urls,
|
|
||||||
'Wikiquote (en)',
|
|
||||||
get_wikilink(result, 'enwikiquote'))
|
|
||||||
|
|
||||||
add_url(urls,
|
|
||||||
'Commons wiki',
|
|
||||||
get_wikilink(result, 'commonswiki'))
|
|
||||||
|
|
||||||
add_url(urls,
|
|
||||||
'Location',
|
|
||||||
get_geolink(claims, 'P625', None))
|
|
||||||
|
|
||||||
add_url(urls,
|
|
||||||
'Wikidata',
|
|
||||||
'https://www.wikidata.org/wiki/'
|
|
||||||
+ wikidata_id + '?uselang=' + language)
|
|
||||||
|
|
||||||
musicbrainz_work_id = get_string(claims, 'P435')
|
|
||||||
if musicbrainz_work_id is not None:
|
|
||||||
add_url(urls,
|
|
||||||
'MusicBrainz',
|
|
||||||
'http://musicbrainz.org/work/'
|
|
||||||
+ musicbrainz_work_id)
|
|
||||||
|
|
||||||
musicbrainz_artist_id = get_string(claims, 'P434')
|
|
||||||
if musicbrainz_artist_id is not None:
|
|
||||||
add_url(urls,
|
|
||||||
'MusicBrainz',
|
|
||||||
'http://musicbrainz.org/artist/'
|
|
||||||
+ musicbrainz_artist_id)
|
|
||||||
|
|
||||||
musicbrainz_release_group_id = get_string(claims, 'P436')
|
|
||||||
if musicbrainz_release_group_id is not None:
|
|
||||||
add_url(urls,
|
|
||||||
'MusicBrainz',
|
|
||||||
'http://musicbrainz.org/release-group/'
|
|
||||||
+ musicbrainz_release_group_id)
|
|
||||||
|
|
||||||
musicbrainz_label_id = get_string(claims, 'P966')
|
|
||||||
if musicbrainz_label_id is not None:
|
|
||||||
add_url(urls,
|
|
||||||
'MusicBrainz',
|
|
||||||
'http://musicbrainz.org/label/'
|
|
||||||
+ musicbrainz_label_id)
|
|
||||||
|
|
||||||
# musicbrainz_area_id = get_string(claims, 'P982')
|
|
||||||
# P1407 MusicBrainz series ID
|
|
||||||
# P1004 MusicBrainz place ID
|
|
||||||
# P1330 MusicBrainz instrument ID
|
|
||||||
# P1407 MusicBrainz series ID
|
|
||||||
|
|
||||||
postal_code = get_string(claims, 'P281', None)
|
|
||||||
if postal_code is not None:
|
|
||||||
attributes.append({'label': 'Postal code(s)', 'value': postal_code})
|
|
||||||
|
|
||||||
date_of_birth = get_time(claims, 'P569', locale, None)
|
|
||||||
if date_of_birth is not None:
|
|
||||||
attributes.append({'label': 'Date of birth', 'value': date_of_birth})
|
|
||||||
|
|
||||||
date_of_death = get_time(claims, 'P570', locale, None)
|
|
||||||
if date_of_death is not None:
|
|
||||||
attributes.append({'label': 'Date of death', 'value': date_of_death})
|
|
||||||
|
|
||||||
if len(attributes) == 0 and len(urls) == 2 and len(description) == 0:
|
|
||||||
results.append({
|
|
||||||
'url': urls[0]['url'],
|
|
||||||
'title': title,
|
|
||||||
'content': description
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
results.append({
|
|
||||||
'infobox': title,
|
|
||||||
'id': wikipedia_link,
|
|
||||||
'content': description,
|
|
||||||
'attributes': attributes,
|
|
||||||
'urls': urls
|
|
||||||
})
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def add_url(urls, title, url):
|
|
||||||
if url is not None:
|
|
||||||
urls.append({'title': title, 'url': url})
|
|
||||||
return 1
|
|
||||||
else:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def get_mainsnak(claims, propertyName):
|
|
||||||
propValue = claims.get(propertyName, {})
|
|
||||||
if len(propValue) == 0:
|
|
||||||
return None
|
|
||||||
|
|
||||||
propValue = propValue[0].get('mainsnak', None)
|
|
||||||
return propValue
|
|
||||||
|
|
||||||
|
|
||||||
def get_string(claims, propertyName, defaultValue=None):
|
|
||||||
propValue = claims.get(propertyName, {})
|
|
||||||
if len(propValue) == 0:
|
|
||||||
return defaultValue
|
|
||||||
|
|
||||||
result = []
|
|
||||||
for e in propValue:
|
|
||||||
mainsnak = e.get('mainsnak', {})
|
|
||||||
|
|
||||||
datavalue = mainsnak.get('datavalue', {})
|
|
||||||
if datavalue is not None:
|
|
||||||
result.append(datavalue.get('value', ''))
|
|
||||||
|
|
||||||
if len(result) == 0:
|
|
||||||
return defaultValue
|
|
||||||
else:
|
|
||||||
# TODO handle multiple urls
|
|
||||||
return result[0]
|
|
||||||
|
|
||||||
|
|
||||||
def get_time(claims, propertyName, locale, defaultValue=None):
|
|
||||||
propValue = claims.get(propertyName, {})
|
|
||||||
if len(propValue) == 0:
|
|
||||||
return defaultValue
|
|
||||||
|
|
||||||
result = []
|
|
||||||
for e in propValue:
|
|
||||||
mainsnak = e.get('mainsnak', {})
|
|
||||||
|
|
||||||
datavalue = mainsnak.get('datavalue', {})
|
|
||||||
if datavalue is not None:
|
|
||||||
value = datavalue.get('value', '')
|
|
||||||
result.append(value.get('time', ''))
|
|
||||||
|
|
||||||
if len(result) == 0:
|
|
||||||
date_string = defaultValue
|
|
||||||
else:
|
|
||||||
date_string = ', '.join(result)
|
|
||||||
|
|
||||||
try:
|
|
||||||
parsed_date = datetime.strptime(date_string, "+%Y-%m-%dT%H:%M:%SZ")
|
|
||||||
except:
|
|
||||||
if date_string.startswith('-'):
|
|
||||||
return date_string.split('T')[0]
|
|
||||||
try:
|
|
||||||
parsed_date = dateutil_parse(date_string, fuzzy=False, default=False)
|
|
||||||
except:
|
|
||||||
logger.debug('could not parse date %s', date_string)
|
|
||||||
return date_string.split('T')[0]
|
|
||||||
|
|
||||||
return format_date_by_locale(parsed_date, locale)
|
|
||||||
|
|
||||||
|
|
||||||
def get_geolink(claims, propertyName, defaultValue=''):
|
|
||||||
mainsnak = get_mainsnak(claims, propertyName)
|
|
||||||
|
|
||||||
if mainsnak is None:
|
|
||||||
return defaultValue
|
|
||||||
|
|
||||||
datatype = mainsnak.get('datatype', '')
|
|
||||||
datavalue = mainsnak.get('datavalue', {})
|
|
||||||
|
|
||||||
if datatype != 'globe-coordinate':
|
|
||||||
return defaultValue
|
|
||||||
|
|
||||||
value = datavalue.get('value', {})
|
|
||||||
|
|
||||||
precision = value.get('precision', 0.0002)
|
|
||||||
|
|
||||||
# there is no zoom information, deduce from precision (error prone)
|
|
||||||
# samples :
|
|
||||||
# 13 --> 5
|
|
||||||
# 1 --> 6
|
|
||||||
# 0.016666666666667 --> 9
|
|
||||||
# 0.00027777777777778 --> 19
|
|
||||||
# wolframalpha :
|
|
||||||
# quadratic fit { {13, 5}, {1, 6}, {0.0166666, 9}, {0.0002777777,19}}
|
|
||||||
# 14.1186-8.8322 x+0.625447 x^2
|
|
||||||
if precision < 0.0003:
|
|
||||||
zoom = 19
|
|
||||||
else:
|
|
||||||
zoom = int(15 - precision * 8.8322 + precision * precision * 0.625447)
|
|
||||||
|
|
||||||
url = url_map\
|
|
||||||
.replace('{latitude}', str(value.get('latitude', 0)))\
|
|
||||||
.replace('{longitude}', str(value.get('longitude', 0)))\
|
|
||||||
.replace('{zoom}', str(zoom))
|
|
||||||
|
|
||||||
return url
|
|
||||||
|
|
||||||
|
|
||||||
def get_wikilink(result, wikiid):
|
|
||||||
url = result.get('sitelinks', {}).get(wikiid, {}).get('url', None)
|
|
||||||
if url is None:
|
|
||||||
return url
|
|
||||||
elif url.startswith('http://'):
|
|
||||||
url = url.replace('http://', 'https://')
|
|
||||||
elif url.startswith('//'):
|
|
||||||
url = 'https:' + url
|
|
||||||
return url
|
|
||||||
|
|
||||||
|
|
||||||
def get_wiki_firstlanguage(result, wikipatternid):
|
|
||||||
for k in result.get('sitelinks', {}).keys():
|
|
||||||
if k.endswith(wikipatternid) and len(k) == (2 + len(wikipatternid)):
|
|
||||||
return k[0:2]
|
|
||||||
return None
|
|
|
@ -1,114 +0,0 @@
|
||||||
"""
|
|
||||||
Wikipedia (Web)
|
|
||||||
|
|
||||||
@website https://{language}.wikipedia.org
|
|
||||||
@provide-api yes
|
|
||||||
|
|
||||||
@using-api yes
|
|
||||||
@results JSON
|
|
||||||
@stable yes
|
|
||||||
@parse url, infobox
|
|
||||||
"""
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from urllib import urlencode, quote
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://{language}.wikipedia.org/'
|
|
||||||
search_postfix = 'w/api.php?'\
|
|
||||||
'action=query'\
|
|
||||||
'&format=json'\
|
|
||||||
'&{query}'\
|
|
||||||
'&prop=extracts|pageimages'\
|
|
||||||
'&exintro'\
|
|
||||||
'&explaintext'\
|
|
||||||
'&pithumbsize=300'\
|
|
||||||
'&redirects'
|
|
||||||
|
|
||||||
|
|
||||||
# set language in base_url
|
|
||||||
def url_lang(lang):
|
|
||||||
if lang == 'all':
|
|
||||||
language = 'en'
|
|
||||||
else:
|
|
||||||
language = lang.split('_')[0]
|
|
||||||
|
|
||||||
return base_url.format(language=language)
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
if query.islower():
|
|
||||||
query += '|' + query.title()
|
|
||||||
|
|
||||||
params['url'] = url_lang(params['language']) \
|
|
||||||
+ search_postfix.format(query=urlencode({'titles': query}))
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get first meaningful paragraph
|
|
||||||
# this should filter out disambiguation pages and notes above first paragraph
|
|
||||||
# "magic numbers" were obtained by fine tuning
|
|
||||||
def extract_first_paragraph(content, title, image):
|
|
||||||
first_paragraph = None
|
|
||||||
|
|
||||||
failed_attempts = 0
|
|
||||||
for paragraph in content.split('\n'):
|
|
||||||
|
|
||||||
starts_with_title = paragraph.lower().find(title.lower(), 0, len(title) + 35)
|
|
||||||
length = len(paragraph)
|
|
||||||
|
|
||||||
if length >= 200 or (starts_with_title >= 0 and (image or length >= 150)):
|
|
||||||
first_paragraph = paragraph
|
|
||||||
break
|
|
||||||
|
|
||||||
failed_attempts += 1
|
|
||||||
if failed_attempts > 3:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return first_paragraph
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_result = loads(resp.content)
|
|
||||||
|
|
||||||
# wikipedia article's unique id
|
|
||||||
# first valid id is assumed to be the requested article
|
|
||||||
for article_id in search_result['query']['pages']:
|
|
||||||
page = search_result['query']['pages'][article_id]
|
|
||||||
if int(article_id) > 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
if int(article_id) < 0:
|
|
||||||
return []
|
|
||||||
|
|
||||||
title = page.get('title')
|
|
||||||
|
|
||||||
image = page.get('thumbnail')
|
|
||||||
if image:
|
|
||||||
image = image.get('source')
|
|
||||||
|
|
||||||
extract = page.get('extract')
|
|
||||||
|
|
||||||
summary = extract_first_paragraph(extract, title, image)
|
|
||||||
if not summary:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# link to wikipedia article
|
|
||||||
# parenthesis are not quoted to make infobox mergeable with wikidata's
|
|
||||||
wikipedia_link = url_lang(resp.search_params['language']) \
|
|
||||||
+ 'wiki/' + quote(title.replace(' ', '_').encode('utf8')).replace('%28', '(').replace('%29', ')')
|
|
||||||
|
|
||||||
results.append({'url': wikipedia_link, 'title': title})
|
|
||||||
|
|
||||||
results.append({'infobox': title,
|
|
||||||
'id': wikipedia_link,
|
|
||||||
'content': summary,
|
|
||||||
'img_src': image,
|
|
||||||
'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]})
|
|
||||||
|
|
||||||
return results
|
|
|
@ -1,122 +0,0 @@
|
||||||
# Wolfram Alpha (Science)
|
|
||||||
#
|
|
||||||
# @website https://www.wolframalpha.com
|
|
||||||
# @provide-api yes (https://api.wolframalpha.com/v2/)
|
|
||||||
#
|
|
||||||
# @using-api yes
|
|
||||||
# @results XML
|
|
||||||
# @stable yes
|
|
||||||
# @parse url, infobox
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
|
|
||||||
site_url = 'https://www.wolframalpha.com/input/?{query}'
|
|
||||||
api_key = '' # defined in settings.yml
|
|
||||||
|
|
||||||
# xpath variables
|
|
||||||
failure_xpath = '/queryresult[attribute::success="false"]'
|
|
||||||
answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext'
|
|
||||||
input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext'
|
|
||||||
pods_xpath = '//pod'
|
|
||||||
subpods_xpath = './subpod'
|
|
||||||
pod_id_xpath = './@id'
|
|
||||||
pod_title_xpath = './@title'
|
|
||||||
plaintext_xpath = './plaintext'
|
|
||||||
image_xpath = './img'
|
|
||||||
img_src_xpath = './@src'
|
|
||||||
img_alt_xpath = './@alt'
|
|
||||||
|
|
||||||
# pods to display as image in infobox
|
|
||||||
# this pods do return a plaintext, but they look better and are more useful as images
|
|
||||||
image_pods = {'VisualRepresentation',
|
|
||||||
'Illustration'}
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(query=urlencode({'input': query}),
|
|
||||||
api_key=api_key)
|
|
||||||
params['headers']['Referer'] = site_url.format(query=urlencode({'i': query}))
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# replace private user area characters to make text legible
|
|
||||||
def replace_pua_chars(text):
|
|
||||||
pua_chars = {u'\uf522': u'\u2192', # rigth arrow
|
|
||||||
u'\uf7b1': u'\u2115', # set of natural numbers
|
|
||||||
u'\uf7b4': u'\u211a', # set of rational numbers
|
|
||||||
u'\uf7b5': u'\u211d', # set of real numbers
|
|
||||||
u'\uf7bd': u'\u2124', # set of integer numbers
|
|
||||||
u'\uf74c': 'd', # differential
|
|
||||||
u'\uf74d': u'\u212f', # euler's number
|
|
||||||
u'\uf74e': 'i', # imaginary number
|
|
||||||
u'\uf7d9': '='} # equals sign
|
|
||||||
|
|
||||||
for k, v in pua_chars.iteritems():
|
|
||||||
text = text.replace(k, v)
|
|
||||||
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_results = etree.XML(resp.content)
|
|
||||||
|
|
||||||
# return empty array if there are no results
|
|
||||||
if search_results.xpath(failure_xpath):
|
|
||||||
return []
|
|
||||||
|
|
||||||
try:
|
|
||||||
infobox_title = search_results.xpath(input_xpath)[0].text
|
|
||||||
except:
|
|
||||||
infobox_title = None
|
|
||||||
|
|
||||||
pods = search_results.xpath(pods_xpath)
|
|
||||||
result_chunks = []
|
|
||||||
for pod in pods:
|
|
||||||
pod_id = pod.xpath(pod_id_xpath)[0]
|
|
||||||
pod_title = pod.xpath(pod_title_xpath)[0]
|
|
||||||
|
|
||||||
subpods = pod.xpath(subpods_xpath)
|
|
||||||
if not subpods:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Appends either a text or an image, depending on which one is more suitable
|
|
||||||
for subpod in subpods:
|
|
||||||
content = subpod.xpath(plaintext_xpath)[0].text
|
|
||||||
image = subpod.xpath(image_xpath)
|
|
||||||
|
|
||||||
if content and pod_id not in image_pods:
|
|
||||||
|
|
||||||
# if no input pod was found, title is first plaintext pod
|
|
||||||
if not infobox_title:
|
|
||||||
infobox_title = content
|
|
||||||
|
|
||||||
content = replace_pua_chars(content)
|
|
||||||
result_chunks.append({'label': pod_title, 'value': content})
|
|
||||||
|
|
||||||
elif image:
|
|
||||||
result_chunks.append({'label': pod_title,
|
|
||||||
'image': {'src': image[0].xpath(img_src_xpath)[0],
|
|
||||||
'alt': image[0].xpath(img_alt_xpath)[0]}})
|
|
||||||
|
|
||||||
if not result_chunks:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# append infobox
|
|
||||||
results.append({'infobox': infobox_title,
|
|
||||||
'attributes': result_chunks,
|
|
||||||
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
|
|
||||||
|
|
||||||
# append link to site
|
|
||||||
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
|
|
||||||
'title': 'Wolfram|Alpha',
|
|
||||||
'content': infobox_title})
|
|
||||||
|
|
||||||
return results
|
|
|
@ -1,116 +0,0 @@
|
||||||
# Wolfram|Alpha (Science)
|
|
||||||
#
|
|
||||||
# @website https://www.wolframalpha.com/
|
|
||||||
# @provide-api yes (https://api.wolframalpha.com/v2/)
|
|
||||||
#
|
|
||||||
# @using-api no
|
|
||||||
# @results JSON
|
|
||||||
# @stable no
|
|
||||||
# @parse url, infobox
|
|
||||||
|
|
||||||
from cgi import escape
|
|
||||||
from json import loads
|
|
||||||
from time import time
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml.etree import XML
|
|
||||||
|
|
||||||
from searx.poolrequests import get as http_get
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'https://www.wolframalpha.com/'
|
|
||||||
|
|
||||||
search_url = url + 'input/json.jsp'\
|
|
||||||
'?async=false'\
|
|
||||||
'&banners=raw'\
|
|
||||||
'&debuggingdata=false'\
|
|
||||||
'&format=image,plaintext,imagemap,minput,moutput'\
|
|
||||||
'&formattimeout=2'\
|
|
||||||
'&{query}'\
|
|
||||||
'&output=JSON'\
|
|
||||||
'&parsetimeout=2'\
|
|
||||||
'&proxycode={token}'\
|
|
||||||
'&scantimeout=0.5'\
|
|
||||||
'&sponsorcategories=true'\
|
|
||||||
'&statemethod=deploybutton'
|
|
||||||
|
|
||||||
referer_url = url + 'input/?{query}'
|
|
||||||
|
|
||||||
token = {'value': '',
|
|
||||||
'last_updated': None}
|
|
||||||
|
|
||||||
# pods to display as image in infobox
|
|
||||||
# this pods do return a plaintext, but they look better and are more useful as images
|
|
||||||
image_pods = {'VisualRepresentation',
|
|
||||||
'Illustration',
|
|
||||||
'Symbol'}
|
|
||||||
|
|
||||||
|
|
||||||
# seems, wolframalpha resets its token in every hour
|
|
||||||
def obtain_token():
|
|
||||||
update_time = time() - (time() % 3600)
|
|
||||||
try:
|
|
||||||
token_response = http_get('https://www.wolframalpha.com/input/api/v1/code?ts=9999999999999999999', timeout=2.0)
|
|
||||||
token['value'] = loads(token_response.text)['code']
|
|
||||||
token['last_updated'] = update_time
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return token
|
|
||||||
|
|
||||||
|
|
||||||
obtain_token()
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
# obtain token if last update was more than an hour
|
|
||||||
if time() - token['last_updated'] > 3600:
|
|
||||||
obtain_token()
|
|
||||||
params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
|
|
||||||
params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
resp_json = loads(resp.text)
|
|
||||||
|
|
||||||
if not resp_json['queryresult']['success']:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# TODO handle resp_json['queryresult']['assumptions']
|
|
||||||
result_chunks = []
|
|
||||||
infobox_title = None
|
|
||||||
for pod in resp_json['queryresult']['pods']:
|
|
||||||
pod_id = pod.get('id', '')
|
|
||||||
pod_title = pod.get('title', '')
|
|
||||||
|
|
||||||
if 'subpods' not in pod:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if pod_id == 'Input' or not infobox_title:
|
|
||||||
infobox_title = pod['subpods'][0]['plaintext']
|
|
||||||
|
|
||||||
for subpod in pod['subpods']:
|
|
||||||
if subpod['plaintext'] != '' and pod_id not in image_pods:
|
|
||||||
# append unless it's not an actual answer
|
|
||||||
if subpod['plaintext'] != '(requires interactivity)':
|
|
||||||
result_chunks.append({'label': pod_title, 'value': subpod['plaintext']})
|
|
||||||
|
|
||||||
elif 'img' in subpod:
|
|
||||||
result_chunks.append({'label': pod_title, 'image': subpod['img']})
|
|
||||||
|
|
||||||
if not result_chunks:
|
|
||||||
return []
|
|
||||||
|
|
||||||
results.append({'infobox': infobox_title,
|
|
||||||
'attributes': result_chunks,
|
|
||||||
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
|
|
||||||
|
|
||||||
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
|
|
||||||
'title': 'Wolfram|Alpha',
|
|
||||||
'content': infobox_title})
|
|
||||||
|
|
||||||
return results
|
|
|
@ -1,83 +0,0 @@
|
||||||
"""
|
|
||||||
1x (Images)
|
|
||||||
|
|
||||||
@website http://1x.com/
|
|
||||||
@provide-api no
|
|
||||||
|
|
||||||
@using-api no
|
|
||||||
@results HTML
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, thumbnail, img_src, content
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from urlparse import urljoin
|
|
||||||
from lxml import html
|
|
||||||
import string
|
|
||||||
import re
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['images']
|
|
||||||
paging = False
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://1x.com'
|
|
||||||
search_url = base_url + '/backend/search.php?{query}'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}))
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
# get links from result-text
|
|
||||||
regex = re.compile('(</a>|<a)')
|
|
||||||
results_parts = re.split(regex, resp.text)
|
|
||||||
|
|
||||||
cur_element = ''
|
|
||||||
|
|
||||||
# iterate over link parts
|
|
||||||
for result_part in results_parts:
|
|
||||||
# processed start and end of link
|
|
||||||
if result_part == '<a':
|
|
||||||
cur_element = result_part
|
|
||||||
continue
|
|
||||||
elif result_part != '</a>':
|
|
||||||
cur_element += result_part
|
|
||||||
continue
|
|
||||||
|
|
||||||
cur_element += result_part
|
|
||||||
|
|
||||||
# fix xml-error
|
|
||||||
cur_element = string.replace(cur_element, '"></a>', '"/></a>')
|
|
||||||
|
|
||||||
dom = html.fromstring(cur_element)
|
|
||||||
link = dom.xpath('//a')[0]
|
|
||||||
|
|
||||||
url = urljoin(base_url, link.attrib.get('href'))
|
|
||||||
title = link.attrib.get('title', '')
|
|
||||||
|
|
||||||
thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src'])
|
|
||||||
# TODO: get image with higher resolution
|
|
||||||
img_src = thumbnail_src
|
|
||||||
|
|
||||||
# check if url is showing to a photo
|
|
||||||
if '/photo/' not in url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'img_src': img_src,
|
|
||||||
'content': '',
|
|
||||||
'thumbnail_src': thumbnail_src,
|
|
||||||
'template': 'images.html'})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,66 +0,0 @@
|
||||||
"""
|
|
||||||
500px (Images)
|
|
||||||
|
|
||||||
@website https://500px.com
|
|
||||||
@provide-api yes (https://developers.500px.com/)
|
|
||||||
|
|
||||||
@using-api no
|
|
||||||
@results HTML
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, thumbnail, img_src, content
|
|
||||||
|
|
||||||
@todo rewrite to api
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from urlparse import urljoin
|
|
||||||
from lxml import html
|
|
||||||
import re
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['images']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://500px.com'
|
|
||||||
search_url = base_url + '/search?search?page={pageno}&type=photos&{query}'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(pageno=params['pageno'],
|
|
||||||
query=urlencode({'q': query}))
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
regex = re.compile('3\.jpg.*$')
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in dom.xpath('//div[@class="photo"]'):
|
|
||||||
link = result.xpath('.//a')[0]
|
|
||||||
url = urljoin(base_url, link.attrib.get('href'))
|
|
||||||
title = extract_text(result.xpath('.//div[@class="title"]'))
|
|
||||||
thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
|
|
||||||
# To have a bigger thumbnail, uncomment the next line
|
|
||||||
# thumbnail_src = regex.sub('4.jpg', thumbnail_src)
|
|
||||||
content = extract_text(result.xpath('.//div[@class="info"]'))
|
|
||||||
img_src = regex.sub('2048.jpg', thumbnail_src)
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'img_src': img_src,
|
|
||||||
'content': content,
|
|
||||||
'thumbnail_src': thumbnail_src,
|
|
||||||
'template': 'images.html'})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,120 +0,0 @@
|
||||||
from lxml import html
|
|
||||||
from urllib import urlencode, unquote
|
|
||||||
from urlparse import urlparse, urljoin
|
|
||||||
from lxml.etree import _ElementStringResult, _ElementUnicodeResult
|
|
||||||
from searx.utils import html_to_text
|
|
||||||
|
|
||||||
search_url = None
|
|
||||||
url_xpath = None
|
|
||||||
content_xpath = None
|
|
||||||
title_xpath = None
|
|
||||||
suggestion_xpath = ''
|
|
||||||
results_xpath = ''
|
|
||||||
|
|
||||||
# parameters for engines with paging support
|
|
||||||
#
|
|
||||||
# number of results on each page
|
|
||||||
# (only needed if the site requires not a page number, but an offset)
|
|
||||||
page_size = 1
|
|
||||||
# number of the first page (usually 0 or 1)
|
|
||||||
first_page_num = 1
|
|
||||||
|
|
||||||
|
|
||||||
'''
|
|
||||||
if xpath_results is list, extract the text from each result and concat the list
|
|
||||||
if xpath_results is a xml element, extract all the text node from it
|
|
||||||
( text_content() method from lxml )
|
|
||||||
if xpath_results is a string element, then it's already done
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
def extract_text(xpath_results):
|
|
||||||
if type(xpath_results) == list:
|
|
||||||
# it's list of result : concat everything using recursive call
|
|
||||||
if not xpath_results:
|
|
||||||
raise Exception('Empty url resultset')
|
|
||||||
result = ''
|
|
||||||
for e in xpath_results:
|
|
||||||
result = result + extract_text(e)
|
|
||||||
return result.strip()
|
|
||||||
elif type(xpath_results) in [_ElementStringResult, _ElementUnicodeResult]:
|
|
||||||
# it's a string
|
|
||||||
return ''.join(xpath_results)
|
|
||||||
else:
|
|
||||||
# it's a element
|
|
||||||
return html_to_text(xpath_results.text_content()).strip()
|
|
||||||
|
|
||||||
|
|
||||||
def extract_url(xpath_results, search_url):
|
|
||||||
url = extract_text(xpath_results)
|
|
||||||
|
|
||||||
if url.startswith('//'):
|
|
||||||
# add http or https to this kind of url //example.com/
|
|
||||||
parsed_search_url = urlparse(search_url)
|
|
||||||
url = parsed_search_url.scheme + url
|
|
||||||
elif url.startswith('/'):
|
|
||||||
# fix relative url to the search engine
|
|
||||||
url = urljoin(search_url, url)
|
|
||||||
|
|
||||||
# normalize url
|
|
||||||
url = normalize_url(url)
|
|
||||||
|
|
||||||
return url
|
|
||||||
|
|
||||||
|
|
||||||
def normalize_url(url):
|
|
||||||
parsed_url = urlparse(url)
|
|
||||||
|
|
||||||
# add a / at this end of the url if there is no path
|
|
||||||
if not parsed_url.netloc:
|
|
||||||
raise Exception('Cannot parse url')
|
|
||||||
if not parsed_url.path:
|
|
||||||
url += '/'
|
|
||||||
|
|
||||||
# FIXME : hack for yahoo
|
|
||||||
if parsed_url.hostname == 'search.yahoo.com'\
|
|
||||||
and parsed_url.path.startswith('/r'):
|
|
||||||
p = parsed_url.path
|
|
||||||
mark = p.find('/**')
|
|
||||||
if mark != -1:
|
|
||||||
return unquote(p[mark + 3:]).decode('utf-8')
|
|
||||||
|
|
||||||
return url
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
query = urlencode({'q': query})[2:]
|
|
||||||
|
|
||||||
fp = {'query': query}
|
|
||||||
if paging and search_url.find('{pageno}') >= 0:
|
|
||||||
fp['pageno'] = (params['pageno'] + first_page_num - 1) * page_size
|
|
||||||
|
|
||||||
params['url'] = search_url.format(**fp)
|
|
||||||
params['query'] = query
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
if results_xpath:
|
|
||||||
for result in dom.xpath(results_xpath):
|
|
||||||
url = extract_url(result.xpath(url_xpath), search_url)
|
|
||||||
title = extract_text(result.xpath(title_xpath)[0])
|
|
||||||
content = extract_text(result.xpath(content_xpath)[0])
|
|
||||||
results.append({'url': url, 'title': title, 'content': content})
|
|
||||||
else:
|
|
||||||
for url, title, content in zip(
|
|
||||||
(extract_url(x, search_url) for
|
|
||||||
x in dom.xpath(url_xpath)),
|
|
||||||
map(extract_text, dom.xpath(title_xpath)),
|
|
||||||
map(extract_text, dom.xpath(content_xpath))
|
|
||||||
):
|
|
||||||
results.append({'url': url, 'title': title, 'content': content})
|
|
||||||
|
|
||||||
if not suggestion_xpath:
|
|
||||||
return results
|
|
||||||
for suggestion in dom.xpath(suggestion_xpath):
|
|
||||||
results.append({'suggestion': extract_text(suggestion)})
|
|
||||||
return results
|
|
|
@ -1,97 +0,0 @@
|
||||||
# Yacy (Web, Images, Videos, Music, Files)
|
|
||||||
#
|
|
||||||
# @website http://yacy.net
|
|
||||||
# @provide-api yes
|
|
||||||
# (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch)
|
|
||||||
#
|
|
||||||
# @using-api yes
|
|
||||||
# @results JSON
|
|
||||||
# @stable yes
|
|
||||||
# @parse (general) url, title, content, publishedDate
|
|
||||||
# @parse (images) url, title, img_src
|
|
||||||
#
|
|
||||||
# @todo parse video, audio and file results
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from urllib import urlencode
|
|
||||||
from dateutil import parser
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['general', 'images'] # TODO , 'music', 'videos', 'files'
|
|
||||||
paging = True
|
|
||||||
language_support = True
|
|
||||||
number_of_results = 5
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'http://localhost:8090'
|
|
||||||
search_url = '/yacysearch.json?{query}'\
|
|
||||||
'&startRecord={offset}'\
|
|
||||||
'&maximumRecords={limit}'\
|
|
||||||
'&contentdom={search_type}'\
|
|
||||||
'&resource=global'
|
|
||||||
|
|
||||||
# yacy specific type-definitions
|
|
||||||
search_types = {'general': 'text',
|
|
||||||
'images': 'image',
|
|
||||||
'files': 'app',
|
|
||||||
'music': 'audio',
|
|
||||||
'videos': 'video'}
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * number_of_results
|
|
||||||
search_type = search_types.get(params.get('category'), '0')
|
|
||||||
|
|
||||||
params['url'] = base_url +\
|
|
||||||
search_url.format(query=urlencode({'query': query}),
|
|
||||||
offset=offset,
|
|
||||||
limit=number_of_results,
|
|
||||||
search_type=search_type)
|
|
||||||
|
|
||||||
# add language tag if specified
|
|
||||||
if params['language'] != 'all':
|
|
||||||
params['url'] += '&lr=lang_' + params['language'].split('_')[0]
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
raw_search_results = loads(resp.text)
|
|
||||||
|
|
||||||
# return empty array if there are no results
|
|
||||||
if not raw_search_results:
|
|
||||||
return []
|
|
||||||
|
|
||||||
search_results = raw_search_results.get('channels', [])
|
|
||||||
|
|
||||||
if len(search_results) == 0:
|
|
||||||
return []
|
|
||||||
|
|
||||||
for result in search_results[0].get('items', []):
|
|
||||||
# parse image results
|
|
||||||
if result.get('image'):
|
|
||||||
# append result
|
|
||||||
results.append({'url': result['url'],
|
|
||||||
'title': result['title'],
|
|
||||||
'content': '',
|
|
||||||
'img_src': result['image'],
|
|
||||||
'template': 'images.html'})
|
|
||||||
|
|
||||||
# parse general results
|
|
||||||
else:
|
|
||||||
publishedDate = parser.parse(result['pubDate'])
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': result['link'],
|
|
||||||
'title': result['title'],
|
|
||||||
'content': result['description'],
|
|
||||||
'publishedDate': publishedDate})
|
|
||||||
|
|
||||||
# TODO parse video, audio and file results
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,113 +0,0 @@
|
||||||
"""
|
|
||||||
Yahoo (Web)
|
|
||||||
|
|
||||||
@website https://search.yahoo.com/web
|
|
||||||
@provide-api yes (https://developer.yahoo.com/boss/search/),
|
|
||||||
$0.80/1000 queries
|
|
||||||
|
|
||||||
@using-api no (because pricing)
|
|
||||||
@results HTML (using search portal)
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, content, suggestion
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from urlparse import unquote
|
|
||||||
from lxml import html
|
|
||||||
from searx.engines.xpath import extract_text, extract_url
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['general']
|
|
||||||
paging = True
|
|
||||||
language_support = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://search.yahoo.com/'
|
|
||||||
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
|
|
||||||
|
|
||||||
# specific xpath variables
|
|
||||||
results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
|
|
||||||
url_xpath = './/h3/a/@href'
|
|
||||||
title_xpath = './/h3/a'
|
|
||||||
content_xpath = './/div[@class="compText aAbs"]'
|
|
||||||
suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
|
|
||||||
|
|
||||||
|
|
||||||
# remove yahoo-specific tracking-url
|
|
||||||
def parse_url(url_string):
|
|
||||||
endings = ['/RS', '/RK']
|
|
||||||
endpositions = []
|
|
||||||
start = url_string.find('http', url_string.find('/RU=') + 1)
|
|
||||||
|
|
||||||
for ending in endings:
|
|
||||||
endpos = url_string.rfind(ending)
|
|
||||||
if endpos > -1:
|
|
||||||
endpositions.append(endpos)
|
|
||||||
|
|
||||||
if start == 0 or len(endpositions) == 0:
|
|
||||||
return url_string
|
|
||||||
else:
|
|
||||||
end = min(endpositions)
|
|
||||||
return unquote(url_string[start:end])
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 10 + 1
|
|
||||||
|
|
||||||
if params['language'] == 'all':
|
|
||||||
language = 'en'
|
|
||||||
else:
|
|
||||||
language = params['language'].split('_')[0]
|
|
||||||
|
|
||||||
params['url'] = base_url + search_url.format(offset=offset,
|
|
||||||
query=urlencode({'p': query}),
|
|
||||||
lang=language)
|
|
||||||
|
|
||||||
# TODO required?
|
|
||||||
params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\
|
|
||||||
.format(lang=language)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
try:
|
|
||||||
results_num = int(dom.xpath('//div[@class="compPagination"]/span[last()]/text()')[0]
|
|
||||||
.split()[0].replace(',', ''))
|
|
||||||
results.append({'number_of_results': results_num})
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in dom.xpath(results_xpath):
|
|
||||||
try:
|
|
||||||
url = parse_url(extract_url(result.xpath(url_xpath), search_url))
|
|
||||||
title = extract_text(result.xpath(title_xpath)[0])
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
|
|
||||||
content = extract_text(result.xpath(content_xpath)[0])
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# if no suggestion found, return results
|
|
||||||
suggestions = dom.xpath(suggestion_xpath)
|
|
||||||
if not suggestions:
|
|
||||||
return results
|
|
||||||
|
|
||||||
# parse suggestion
|
|
||||||
for suggestion in suggestions:
|
|
||||||
# append suggestion
|
|
||||||
results.append({'suggestion': extract_text(suggestion)})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,104 +0,0 @@
|
||||||
# Yahoo (News)
|
|
||||||
#
|
|
||||||
# @website https://news.yahoo.com
|
|
||||||
# @provide-api yes (https://developer.yahoo.com/boss/search/)
|
|
||||||
# $0.80/1000 queries
|
|
||||||
#
|
|
||||||
# @using-api no (because pricing)
|
|
||||||
# @results HTML (using search portal)
|
|
||||||
# @stable no (HTML can change)
|
|
||||||
# @parse url, title, content, publishedDate
|
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
|
||||||
from searx.engines.xpath import extract_text, extract_url
|
|
||||||
from searx.engines.yahoo import parse_url
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
import re
|
|
||||||
from dateutil import parser
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['news']
|
|
||||||
paging = True
|
|
||||||
language_support = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
search_url = 'https://news.search.yahoo.com/search?{query}&b={offset}&{lang}=uh3_news_web_gs_1&pz=10&xargs=0&vl=lang_{lang}' # noqa
|
|
||||||
|
|
||||||
# specific xpath variables
|
|
||||||
results_xpath = '//ol[contains(@class,"searchCenterMiddle")]//li'
|
|
||||||
url_xpath = './/h3/a/@href'
|
|
||||||
title_xpath = './/h3/a'
|
|
||||||
content_xpath = './/div[@class="compText"]'
|
|
||||||
publishedDate_xpath = './/span[contains(@class,"tri")]'
|
|
||||||
suggestion_xpath = '//div[contains(@class,"VerALSOTRY")]//a'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 10 + 1
|
|
||||||
|
|
||||||
if params['language'] == 'all':
|
|
||||||
language = 'en'
|
|
||||||
else:
|
|
||||||
language = params['language'].split('_')[0]
|
|
||||||
|
|
||||||
params['url'] = search_url.format(offset=offset,
|
|
||||||
query=urlencode({'p': query}),
|
|
||||||
lang=language)
|
|
||||||
|
|
||||||
# TODO required?
|
|
||||||
params['cookies']['sB'] = '"v=1&vm=p&fl=1&vl=lang_{lang}&sh=1&pn=10&rw=new'\
|
|
||||||
.format(lang=language)
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def sanitize_url(url):
|
|
||||||
if ".yahoo.com/" in url:
|
|
||||||
return re.sub(u"\;\_ylt\=.+$", "", url)
|
|
||||||
else:
|
|
||||||
return url
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in dom.xpath(results_xpath):
|
|
||||||
urls = result.xpath(url_xpath)
|
|
||||||
if len(urls) != 1:
|
|
||||||
continue
|
|
||||||
url = sanitize_url(parse_url(extract_url(urls, search_url)))
|
|
||||||
title = extract_text(result.xpath(title_xpath)[0])
|
|
||||||
content = extract_text(result.xpath(content_xpath)[0])
|
|
||||||
|
|
||||||
# parse publishedDate
|
|
||||||
publishedDate = extract_text(result.xpath(publishedDate_xpath)[0])
|
|
||||||
|
|
||||||
# still useful ?
|
|
||||||
if re.match("^[0-9]+ minute(s|) ago$", publishedDate):
|
|
||||||
publishedDate = datetime.now() - timedelta(minutes=int(re.match(r'\d+', publishedDate).group())) # noqa
|
|
||||||
else:
|
|
||||||
if re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$",
|
|
||||||
publishedDate):
|
|
||||||
timeNumbers = re.findall(r'\d+', publishedDate)
|
|
||||||
publishedDate = datetime.now()\
|
|
||||||
- timedelta(hours=int(timeNumbers[0]))\
|
|
||||||
- timedelta(minutes=int(timeNumbers[1]))
|
|
||||||
else:
|
|
||||||
publishedDate = parser.parse(publishedDate)
|
|
||||||
|
|
||||||
if publishedDate.year == 1900:
|
|
||||||
publishedDate = publishedDate.replace(year=datetime.now().year)
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'publishedDate': publishedDate})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,63 +0,0 @@
|
||||||
"""
|
|
||||||
Yahoo (Web)
|
|
||||||
|
|
||||||
@website https://yandex.ru/
|
|
||||||
@provide-api ?
|
|
||||||
@using-api no
|
|
||||||
@results HTML (using search portal)
|
|
||||||
@stable no (HTML can change)
|
|
||||||
@parse url, title, content
|
|
||||||
"""
|
|
||||||
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
|
||||||
from searx.search import logger
|
|
||||||
|
|
||||||
logger = logger.getChild('yandex engine')
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['general']
|
|
||||||
paging = True
|
|
||||||
language_support = True # TODO
|
|
||||||
|
|
||||||
default_tld = 'com'
|
|
||||||
language_map = {'ru': 'ru',
|
|
||||||
'ua': 'uk',
|
|
||||||
'tr': 'com.tr'}
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://yandex.{tld}/'
|
|
||||||
search_url = 'search/?{query}&p={page}'
|
|
||||||
|
|
||||||
results_xpath = '//div[@class="serp-item serp-item_plain_yes clearfix i-bem"]'
|
|
||||||
url_xpath = './/h2/a/@href'
|
|
||||||
title_xpath = './/h2/a//text()'
|
|
||||||
content_xpath = './/div[@class="serp-item__text"]//text()'
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
lang = params['language'].split('_')[0]
|
|
||||||
host = base_url.format(tld=language_map.get(lang) or default_tld)
|
|
||||||
params['url'] = host + search_url.format(page=params['pageno'] - 1,
|
|
||||||
query=urlencode({'text': query}))
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
results = []
|
|
||||||
|
|
||||||
for result in dom.xpath(results_xpath):
|
|
||||||
try:
|
|
||||||
res = {'url': result.xpath(url_xpath)[0],
|
|
||||||
'title': escape(''.join(result.xpath(title_xpath))),
|
|
||||||
'content': escape(''.join(result.xpath(content_xpath)))}
|
|
||||||
except:
|
|
||||||
logger.exception('yandex parse crash')
|
|
||||||
continue
|
|
||||||
|
|
||||||
results.append(res)
|
|
||||||
|
|
||||||
return results
|
|
|
@ -1,83 +0,0 @@
|
||||||
# Youtube (Videos)
|
|
||||||
#
|
|
||||||
# @website https://www.youtube.com/
|
|
||||||
# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list)
|
|
||||||
#
|
|
||||||
# @using-api yes
|
|
||||||
# @results JSON
|
|
||||||
# @stable yes
|
|
||||||
# @parse url, title, content, publishedDate, thumbnail, embedded
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from urllib import urlencode
|
|
||||||
from dateutil import parser
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['videos', 'music']
|
|
||||||
paging = False
|
|
||||||
language_support = True
|
|
||||||
api_key = None
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://www.googleapis.com/youtube/v3/search'
|
|
||||||
search_url = base_url + '?part=snippet&{query}&maxResults=20&key={api_key}'
|
|
||||||
|
|
||||||
embedded_url = '<iframe width="540" height="304" ' +\
|
|
||||||
'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
|
|
||||||
'frameborder="0" allowfullscreen></iframe>'
|
|
||||||
|
|
||||||
base_youtube_url = 'https://www.youtube.com/watch?v='
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
|
||||||
api_key=api_key)
|
|
||||||
|
|
||||||
# add language tag if specified
|
|
||||||
if params['language'] != 'all':
|
|
||||||
params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0]
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_results = loads(resp.text)
|
|
||||||
|
|
||||||
# return empty array if there are no results
|
|
||||||
if 'items' not in search_results:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in search_results['items']:
|
|
||||||
videoid = result['id']['videoId']
|
|
||||||
|
|
||||||
title = result['snippet']['title']
|
|
||||||
content = ''
|
|
||||||
thumbnail = ''
|
|
||||||
|
|
||||||
pubdate = result['snippet']['publishedAt']
|
|
||||||
publishedDate = parser.parse(pubdate)
|
|
||||||
|
|
||||||
thumbnail = result['snippet']['thumbnails']['high']['url']
|
|
||||||
|
|
||||||
content = result['snippet']['description']
|
|
||||||
|
|
||||||
url = base_youtube_url + videoid
|
|
||||||
|
|
||||||
embedded = embedded_url.format(videoid=videoid)
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'template': 'videos.html',
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'embedded': embedded,
|
|
||||||
'thumbnail': thumbnail})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,81 +0,0 @@
|
||||||
# Youtube (Videos)
|
|
||||||
#
|
|
||||||
# @website https://www.youtube.com/
|
|
||||||
# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list)
|
|
||||||
#
|
|
||||||
# @using-api no
|
|
||||||
# @results HTML
|
|
||||||
# @stable no
|
|
||||||
# @parse url, title, content, publishedDate, thumbnail, embedded
|
|
||||||
|
|
||||||
from urllib import quote_plus
|
|
||||||
from lxml import html
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
from searx.utils import list_get
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['videos', 'music']
|
|
||||||
paging = True
|
|
||||||
language_support = False
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://www.youtube.com/results'
|
|
||||||
search_url = base_url + '?search_query={query}&page={page}'
|
|
||||||
|
|
||||||
embedded_url = '<iframe width="540" height="304" ' +\
|
|
||||||
'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
|
|
||||||
'frameborder="0" allowfullscreen></iframe>'
|
|
||||||
|
|
||||||
base_youtube_url = 'https://www.youtube.com/watch?v='
|
|
||||||
|
|
||||||
# specific xpath variables
|
|
||||||
results_xpath = "//ol/li/div[contains(@class, 'yt-lockup yt-lockup-tile yt-lockup-video vve-check')]"
|
|
||||||
url_xpath = './/h3/a/@href'
|
|
||||||
title_xpath = './/div[@class="yt-lockup-content"]/h3/a'
|
|
||||||
content_xpath = './/div[@class="yt-lockup-content"]/div[@class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2"]'
|
|
||||||
|
|
||||||
|
|
||||||
# returns extract_text on the first result selected by the xpath or None
|
|
||||||
def extract_text_from_dom(result, xpath):
|
|
||||||
r = result.xpath(xpath)
|
|
||||||
if len(r) > 0:
|
|
||||||
return extract_text(r[0])
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(query=quote_plus(query),
|
|
||||||
page=params['pageno'])
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in dom.xpath(results_xpath):
|
|
||||||
videoid = list_get(result.xpath('@data-context-item-id'), 0)
|
|
||||||
if videoid is not None:
|
|
||||||
url = base_youtube_url + videoid
|
|
||||||
thumbnail = 'https://i.ytimg.com/vi/' + videoid + '/hqdefault.jpg'
|
|
||||||
|
|
||||||
title = extract_text_from_dom(result, title_xpath) or videoid
|
|
||||||
content = extract_text_from_dom(result, content_xpath)
|
|
||||||
|
|
||||||
embedded = embedded_url.format(videoid=videoid)
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'template': 'videos.html',
|
|
||||||
'embedded': embedded,
|
|
||||||
'thumbnail': thumbnail})
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
|
@ -1,78 +0,0 @@
|
||||||
'''
|
|
||||||
searx is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU Affero General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
searx is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU Affero General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Affero General Public License
|
|
||||||
along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
|
||||||
|
|
||||||
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
|
|
||||||
'''
|
|
||||||
|
|
||||||
# list of language codes
|
|
||||||
language_codes = (
|
|
||||||
("ar_XA", "Arabic", "Arabia"),
|
|
||||||
("bg_BG", "Bulgarian", "Bulgaria"),
|
|
||||||
("cs_CZ", "Czech", "Czech Republic"),
|
|
||||||
("da_DK", "Danish", "Denmark"),
|
|
||||||
("de_AT", "German", "Austria"),
|
|
||||||
("de_CH", "German", "Switzerland"),
|
|
||||||
("de_DE", "German", "Germany"),
|
|
||||||
("el_GR", "Greek", "Greece"),
|
|
||||||
("en_AU", "English", "Australia"),
|
|
||||||
("en_CA", "English", "Canada"),
|
|
||||||
("en_GB", "English", "United Kingdom"),
|
|
||||||
("en_ID", "English", "Indonesia"),
|
|
||||||
("en_IE", "English", "Ireland"),
|
|
||||||
("en_IN", "English", "India"),
|
|
||||||
("en_MY", "English", "Malaysia"),
|
|
||||||
("en_NZ", "English", "New Zealand"),
|
|
||||||
("en_PH", "English", "Philippines"),
|
|
||||||
("en_SG", "English", "Singapore"),
|
|
||||||
("en_US", "English", "United States"),
|
|
||||||
("en_XA", "English", "Arabia"),
|
|
||||||
("en_ZA", "English", "South Africa"),
|
|
||||||
("es_AR", "Spanish", "Argentina"),
|
|
||||||
("es_CL", "Spanish", "Chile"),
|
|
||||||
("es_ES", "Spanish", "Spain"),
|
|
||||||
("es_MX", "Spanish", "Mexico"),
|
|
||||||
("es_US", "Spanish", "United States"),
|
|
||||||
("es_XL", "Spanish", "Latin America"),
|
|
||||||
("et_EE", "Estonian", "Estonia"),
|
|
||||||
("fi_FI", "Finnish", "Finland"),
|
|
||||||
("fr_BE", "French", "Belgium"),
|
|
||||||
("fr_CA", "French", "Canada"),
|
|
||||||
("fr_CH", "French", "Switzerland"),
|
|
||||||
("fr_FR", "French", "France"),
|
|
||||||
("he_IL", "Hebrew", "Israel"),
|
|
||||||
("hr_HR", "Croatian", "Croatia"),
|
|
||||||
("hu_HU", "Hungarian", "Hungary"),
|
|
||||||
("it_IT", "Italian", "Italy"),
|
|
||||||
("ja_JP", "Japanese", "Japan"),
|
|
||||||
("ko_KR", "Korean", "Korea"),
|
|
||||||
("lt_LT", "Lithuanian", "Lithuania"),
|
|
||||||
("lv_LV", "Latvian", "Latvia"),
|
|
||||||
("nb_NO", "Norwegian", "Norway"),
|
|
||||||
("nl_BE", "Dutch", "Belgium"),
|
|
||||||
("nl_NL", "Dutch", "Netherlands"),
|
|
||||||
("oc_OC", "Occitan", "Occitan"),
|
|
||||||
("pl_PL", "Polish", "Poland"),
|
|
||||||
("pt_BR", "Portuguese", "Brazil"),
|
|
||||||
("pt_PT", "Portuguese", "Portugal"),
|
|
||||||
("ro_RO", "Romanian", "Romania"),
|
|
||||||
("ru_RU", "Russian", "Russia"),
|
|
||||||
("sk_SK", "Slovak", "Slovak Republic"),
|
|
||||||
("sl_SL", "Slovenian", "Slovenia"),
|
|
||||||
("sv_SE", "Swedish", "Sweden"),
|
|
||||||
("th_TH", "Thai", "Thailand"),
|
|
||||||
("tr_TR", "Turkish", "Turkey"),
|
|
||||||
("uk_UA", "Ukrainian", "Ukraine"),
|
|
||||||
("zh_CN", "Chinese", "China"),
|
|
||||||
("zh_HK", "Chinese", "Hong Kong SAR"),
|
|
||||||
("zh_TW", "Chinese", "Taiwan"))
|
|
|
@ -1,81 +0,0 @@
|
||||||
'''
|
|
||||||
searx is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU Affero General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
searx is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU Affero General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Affero General Public License
|
|
||||||
along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
|
||||||
|
|
||||||
(C) 2015 by Adam Tauber, <asciimoo@gmail.com>
|
|
||||||
'''
|
|
||||||
from sys import exit
|
|
||||||
from searx import logger
|
|
||||||
|
|
||||||
logger = logger.getChild('plugins')
|
|
||||||
|
|
||||||
from searx.plugins import (https_rewrite,
|
|
||||||
open_results_on_new_tab,
|
|
||||||
self_info,
|
|
||||||
search_on_category_select,
|
|
||||||
tracker_url_remover,
|
|
||||||
vim_hotkeys)
|
|
||||||
|
|
||||||
required_attrs = (('name', str),
|
|
||||||
('description', str),
|
|
||||||
('default_on', bool))
|
|
||||||
|
|
||||||
optional_attrs = (('js_dependencies', tuple),
|
|
||||||
('css_dependencies', tuple))
|
|
||||||
|
|
||||||
|
|
||||||
class Plugin():
|
|
||||||
default_on = False
|
|
||||||
name = 'Default plugin'
|
|
||||||
description = 'Default plugin description'
|
|
||||||
|
|
||||||
|
|
||||||
class PluginStore():
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.plugins = []
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
for plugin in self.plugins:
|
|
||||||
yield plugin
|
|
||||||
|
|
||||||
def register(self, *plugins):
|
|
||||||
for plugin in plugins:
|
|
||||||
for plugin_attr, plugin_attr_type in required_attrs:
|
|
||||||
if not hasattr(plugin, plugin_attr) or not isinstance(getattr(plugin, plugin_attr), plugin_attr_type):
|
|
||||||
logger.critical('missing attribute "{0}", cannot load plugin: {1}'.format(plugin_attr, plugin))
|
|
||||||
exit(3)
|
|
||||||
for plugin_attr, plugin_attr_type in optional_attrs:
|
|
||||||
if not hasattr(plugin, plugin_attr) or not isinstance(getattr(plugin, plugin_attr), plugin_attr_type):
|
|
||||||
setattr(plugin, plugin_attr, plugin_attr_type())
|
|
||||||
plugin.id = plugin.name.replace(' ', '_')
|
|
||||||
self.plugins.append(plugin)
|
|
||||||
|
|
||||||
def call(self, plugin_type, request, *args, **kwargs):
|
|
||||||
ret = True
|
|
||||||
for plugin in request.user_plugins:
|
|
||||||
if hasattr(plugin, plugin_type):
|
|
||||||
ret = getattr(plugin, plugin_type)(request, *args, **kwargs)
|
|
||||||
if not ret:
|
|
||||||
break
|
|
||||||
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
plugins = PluginStore()
|
|
||||||
plugins.register(https_rewrite)
|
|
||||||
plugins.register(open_results_on_new_tab)
|
|
||||||
plugins.register(self_info)
|
|
||||||
plugins.register(search_on_category_select)
|
|
||||||
plugins.register(tracker_url_remover)
|
|
||||||
plugins.register(vim_hotkeys)
|
|
|
@ -1,230 +0,0 @@
|
||||||
'''
|
|
||||||
searx is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU Affero General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
searx is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU Affero General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Affero General Public License
|
|
||||||
along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
|
||||||
|
|
||||||
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
from urlparse import urlparse
|
|
||||||
from lxml import etree
|
|
||||||
from os import listdir, environ
|
|
||||||
from os.path import isfile, isdir, join
|
|
||||||
from searx.plugins import logger
|
|
||||||
from flask.ext.babel import gettext
|
|
||||||
from searx import searx_dir
|
|
||||||
|
|
||||||
|
|
||||||
name = "HTTPS rewrite"
|
|
||||||
description = gettext('Rewrite HTTP links to HTTPS if possible')
|
|
||||||
default_on = True
|
|
||||||
|
|
||||||
if 'SEARX_HTTPS_REWRITE_PATH' in environ:
|
|
||||||
rules_path = environ['SEARX_rules_path']
|
|
||||||
else:
|
|
||||||
rules_path = join(searx_dir, 'plugins/https_rules')
|
|
||||||
|
|
||||||
logger = logger.getChild("https_rewrite")
|
|
||||||
|
|
||||||
# https://gitweb.torproject.org/\
|
|
||||||
# pde/https-everywhere.git/tree/4.0:/src/chrome/content/rules
|
|
||||||
|
|
||||||
# HTTPS rewrite rules
|
|
||||||
https_rules = []
|
|
||||||
|
|
||||||
|
|
||||||
# load single ruleset from a xml file
|
|
||||||
def load_single_https_ruleset(rules_path):
|
|
||||||
ruleset = ()
|
|
||||||
|
|
||||||
# init parser
|
|
||||||
parser = etree.XMLParser()
|
|
||||||
|
|
||||||
# load and parse xml-file
|
|
||||||
try:
|
|
||||||
tree = etree.parse(rules_path, parser)
|
|
||||||
except:
|
|
||||||
# TODO, error message
|
|
||||||
return ()
|
|
||||||
|
|
||||||
# get root node
|
|
||||||
root = tree.getroot()
|
|
||||||
|
|
||||||
# check if root is a node with the name ruleset
|
|
||||||
# TODO improve parsing
|
|
||||||
if root.tag != 'ruleset':
|
|
||||||
return ()
|
|
||||||
|
|
||||||
# check if rule is deactivated by default
|
|
||||||
if root.attrib.get('default_off'):
|
|
||||||
return ()
|
|
||||||
|
|
||||||
# check if rule does only work for specific platforms
|
|
||||||
if root.attrib.get('platform'):
|
|
||||||
return ()
|
|
||||||
|
|
||||||
hosts = []
|
|
||||||
rules = []
|
|
||||||
exclusions = []
|
|
||||||
|
|
||||||
# parse childs from ruleset
|
|
||||||
for ruleset in root:
|
|
||||||
# this child define a target
|
|
||||||
if ruleset.tag == 'target':
|
|
||||||
# check if required tags available
|
|
||||||
if not ruleset.attrib.get('host'):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# convert host-rule to valid regex
|
|
||||||
host = ruleset.attrib.get('host')\
|
|
||||||
.replace('.', '\.').replace('*', '.*')
|
|
||||||
|
|
||||||
# append to host list
|
|
||||||
hosts.append(host)
|
|
||||||
|
|
||||||
# this child define a rule
|
|
||||||
elif ruleset.tag == 'rule':
|
|
||||||
# check if required tags available
|
|
||||||
if not ruleset.attrib.get('from')\
|
|
||||||
or not ruleset.attrib.get('to'):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# TODO hack, which convert a javascript regex group
|
|
||||||
# into a valid python regex group
|
|
||||||
rule_from = ruleset.attrib['from'].replace('$', '\\')
|
|
||||||
if rule_from.endswith('\\'):
|
|
||||||
rule_from = rule_from[:-1] + '$'
|
|
||||||
rule_to = ruleset.attrib['to'].replace('$', '\\')
|
|
||||||
if rule_to.endswith('\\'):
|
|
||||||
rule_to = rule_to[:-1] + '$'
|
|
||||||
|
|
||||||
# TODO, not working yet because of the hack above,
|
|
||||||
# currently doing that in webapp.py
|
|
||||||
# rule_from_rgx = re.compile(rule_from, re.I)
|
|
||||||
|
|
||||||
# append rule
|
|
||||||
try:
|
|
||||||
rules.append((re.compile(rule_from, re.I | re.U), rule_to))
|
|
||||||
except:
|
|
||||||
# TODO log regex error
|
|
||||||
continue
|
|
||||||
|
|
||||||
# this child define an exclusion
|
|
||||||
elif ruleset.tag == 'exclusion':
|
|
||||||
# check if required tags available
|
|
||||||
if not ruleset.attrib.get('pattern'):
|
|
||||||
continue
|
|
||||||
|
|
||||||
exclusion_rgx = re.compile(ruleset.attrib.get('pattern'))
|
|
||||||
|
|
||||||
# append exclusion
|
|
||||||
exclusions.append(exclusion_rgx)
|
|
||||||
|
|
||||||
# convert list of possible hosts to a simple regex
|
|
||||||
# TODO compress regex to improve performance
|
|
||||||
try:
|
|
||||||
target_hosts = re.compile('^(' + '|'.join(hosts) + ')', re.I | re.U)
|
|
||||||
except:
|
|
||||||
return ()
|
|
||||||
|
|
||||||
# return ruleset
|
|
||||||
return (target_hosts, rules, exclusions)
|
|
||||||
|
|
||||||
|
|
||||||
# load all https rewrite rules
|
|
||||||
def load_https_rules(rules_path):
|
|
||||||
# check if directory exists
|
|
||||||
if not isdir(rules_path):
|
|
||||||
logger.error("directory not found: '" + rules_path + "'")
|
|
||||||
return
|
|
||||||
|
|
||||||
# search all xml files which are stored in the https rule directory
|
|
||||||
xml_files = [join(rules_path, f)
|
|
||||||
for f in listdir(rules_path)
|
|
||||||
if isfile(join(rules_path, f)) and f[-4:] == '.xml']
|
|
||||||
|
|
||||||
# load xml-files
|
|
||||||
for ruleset_file in xml_files:
|
|
||||||
# calculate rewrite-rules
|
|
||||||
ruleset = load_single_https_ruleset(ruleset_file)
|
|
||||||
|
|
||||||
# skip if no ruleset returned
|
|
||||||
if not ruleset:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# append ruleset
|
|
||||||
https_rules.append(ruleset)
|
|
||||||
|
|
||||||
logger.info('{n} rules loaded'.format(n=len(https_rules)))
|
|
||||||
|
|
||||||
|
|
||||||
def https_url_rewrite(result):
|
|
||||||
skip_https_rewrite = False
|
|
||||||
# check if HTTPS rewrite is possible
|
|
||||||
for target, rules, exclusions in https_rules:
|
|
||||||
|
|
||||||
# check if target regex match with url
|
|
||||||
if target.match(result['parsed_url'].netloc):
|
|
||||||
# process exclusions
|
|
||||||
for exclusion in exclusions:
|
|
||||||
# check if exclusion match with url
|
|
||||||
if exclusion.match(result['url']):
|
|
||||||
skip_https_rewrite = True
|
|
||||||
break
|
|
||||||
|
|
||||||
# skip https rewrite if required
|
|
||||||
if skip_https_rewrite:
|
|
||||||
break
|
|
||||||
|
|
||||||
# process rules
|
|
||||||
for rule in rules:
|
|
||||||
try:
|
|
||||||
new_result_url = rule[0].sub(rule[1], result['url'])
|
|
||||||
except:
|
|
||||||
break
|
|
||||||
|
|
||||||
# parse new url
|
|
||||||
new_parsed_url = urlparse(new_result_url)
|
|
||||||
|
|
||||||
# continiue if nothing was rewritten
|
|
||||||
if result['url'] == new_result_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# get domainname from result
|
|
||||||
# TODO, does only work correct with TLD's like
|
|
||||||
# asdf.com, not for asdf.com.de
|
|
||||||
# TODO, using publicsuffix instead of this rewrite rule
|
|
||||||
old_result_domainname = '.'.join(
|
|
||||||
result['parsed_url'].hostname.split('.')[-2:])
|
|
||||||
new_result_domainname = '.'.join(
|
|
||||||
new_parsed_url.hostname.split('.')[-2:])
|
|
||||||
|
|
||||||
# check if rewritten hostname is the same,
|
|
||||||
# to protect against wrong or malicious rewrite rules
|
|
||||||
if old_result_domainname == new_result_domainname:
|
|
||||||
# set new url
|
|
||||||
result['url'] = new_result_url
|
|
||||||
|
|
||||||
# target has matched, do not search over the other rules
|
|
||||||
break
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def on_result(request, ctx):
|
|
||||||
result = ctx['result']
|
|
||||||
if result['parsed_url'].scheme == 'http':
|
|
||||||
https_url_rewrite(result)
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
load_https_rules(rules_path)
|
|
|
@ -1,17 +0,0 @@
|
||||||
<!--
|
|
||||||
This directory contains web site rewriting rules for the
|
|
||||||
HTTPS Everywhere software, available from
|
|
||||||
https://www.eff.org/https-everywhere
|
|
||||||
|
|
||||||
These rules were contributed to the project by users and aim to
|
|
||||||
enable routine secure access to as many different web sites as
|
|
||||||
possible. They are automatically installed together with the
|
|
||||||
HTTPS Everywhere software. The presence of these rules does not
|
|
||||||
mean that an HTTPS Everywhere user accessed, or intended to
|
|
||||||
access, any particular web site.
|
|
||||||
|
|
||||||
For information about how to create additional HTTPS Everywhere
|
|
||||||
rewriting rules to add support for new sites, please see
|
|
||||||
|
|
||||||
https://www.eff.org/https-everywhere/rulesets
|
|
||||||
-->
|
|
|
@ -1,56 +0,0 @@
|
||||||
<!--
|
|
||||||
For other Microsoft coverage, see Microsoft.xml.
|
|
||||||
|
|
||||||
|
|
||||||
CDN buckets:
|
|
||||||
|
|
||||||
- a134.lm.akamai.net
|
|
||||||
|
|
||||||
- akam.bing.com
|
|
||||||
- *.mm.bing.net
|
|
||||||
|
|
||||||
|
|
||||||
Nonfunctional domains:
|
|
||||||
|
|
||||||
- m2.cn.bing.com
|
|
||||||
- origin.bj1.bing.com
|
|
||||||
- blogs.bing.com
|
|
||||||
|
|
||||||
|
|
||||||
Fully covered domains:
|
|
||||||
|
|
||||||
- bing.com subdomains:
|
|
||||||
|
|
||||||
- (www.)
|
|
||||||
- c.bing (tracking beacons)
|
|
||||||
- cn.bing
|
|
||||||
- h.bing
|
|
||||||
- ssl
|
|
||||||
- testfamilysafety.bing
|
|
||||||
- udc.bing
|
|
||||||
- (www.)bing
|
|
||||||
|
|
||||||
- *.mm.bing.net
|
|
||||||
- api.bing.com
|
|
||||||
|
|
||||||
-->
|
|
||||||
<ruleset name="Bing">
|
|
||||||
|
|
||||||
<target host="bing.com" />
|
|
||||||
<target host="*.bing.com" />
|
|
||||||
<target host="*.mm.bing.net" />
|
|
||||||
|
|
||||||
|
|
||||||
<securecookie host=".*\.bing\.com$" name=".+" />
|
|
||||||
|
|
||||||
|
|
||||||
<rule from="^http://((?:c|cn|h|ssl|testfamilysafety|udc|www)\.)?bing\.com/"
|
|
||||||
to="https://$1bing.com/" />
|
|
||||||
|
|
||||||
<rule from="^http://([^/:@]*)\.mm\.bing\.net/"
|
|
||||||
to="https://$1.mm.bing.com/"/>
|
|
||||||
|
|
||||||
<rule from="^http://([^/:@]*)\.api\.bing\.net/"
|
|
||||||
to="https://$1.api.bing.com/"/>
|
|
||||||
|
|
||||||
</ruleset>
|
|
|
@ -1,69 +0,0 @@
|
||||||
<!--
|
|
||||||
Nonfunctional domains:
|
|
||||||
|
|
||||||
- blog.dailymotion.com
|
|
||||||
- press.dailymotion.com (shows steaw.com, CN: www.steaw.com)
|
|
||||||
- proxy-46.dailymotion.com
|
|
||||||
- publicite.dailymotion.com
|
|
||||||
- publisher.dailymotion.com (reset)
|
|
||||||
- vid.ak.dmcdn.net (403, Akamai)
|
|
||||||
- vid2.ak.dmcdn.net (504, akamai)
|
|
||||||
|
|
||||||
|
|
||||||
Problematic domains:
|
|
||||||
|
|
||||||
- ak2.static.dailymotion.com (mismatched, CN: *.dmcdn.net)
|
|
||||||
- support.dmcloud.net (mismatched, CN: *.zendesk.com)
|
|
||||||
|
|
||||||
|
|
||||||
Partially covered domains:
|
|
||||||
|
|
||||||
- (www.)dailymotion.com
|
|
||||||
|
|
||||||
- cdn/manifest/video/\w+.mnft 403s
|
|
||||||
- crossdomain.xml breaks videos
|
|
||||||
|
|
||||||
-->
|
|
||||||
<ruleset name="Dailymotion (default off)" default_off="breaks some embedded videos">
|
|
||||||
|
|
||||||
<target host="dailymotion.com" />
|
|
||||||
<!--
|
|
||||||
* for cross-domain cookie.
|
|
||||||
-->
|
|
||||||
<target host="*.dailymotion.com" />
|
|
||||||
<!--
|
|
||||||
https://mail1.eff.org/pipermail/https-everywhere-rules/2012-July/001241.html
|
|
||||||
-->
|
|
||||||
<exclusion pattern="^http://(?:www\.)?dailymotion\.com/(?:cdn/[\w-]+/video/|crossdomain\.xml$)" />
|
|
||||||
<target host="ak2.static.dailymotion.com" />
|
|
||||||
<target host="*.dmcdn.net" />
|
|
||||||
<target host="dmcloud.net" />
|
|
||||||
<target host="*.dmcloud.net" />
|
|
||||||
|
|
||||||
|
|
||||||
<!-- Testing wrt embedded breakage.
|
|
||||||
|
|
||||||
securecookie host="^.*\.dailymotion\.com$" name=".+" /-->
|
|
||||||
<!--
|
|
||||||
Omniture tracking cookies:
|
|
||||||
-->
|
|
||||||
<securecookie host="^\.dailymotion\.com$" name="^s_\w+$" />
|
|
||||||
<securecookie host="^www\.dailymotion\.com$" name=".+" />
|
|
||||||
|
|
||||||
|
|
||||||
<rule from="^http://(erroracct\.|www\.)?dailymotion\.com/"
|
|
||||||
to="https://$1dailymotion.com/" />
|
|
||||||
|
|
||||||
<rule from="^http://(s\d|static(?:\d|s\d-ssl))\.dmcdn\.net/"
|
|
||||||
to="https://$1.dmcdn.net/" />
|
|
||||||
|
|
||||||
<rule from="^https?://ak2\.static\.dailymotion\.com/"
|
|
||||||
to="https://static1-ssl.dmcdn.net/" />
|
|
||||||
|
|
||||||
<rule from="^http://(s\.|www\.)?dmcloud\.net/"
|
|
||||||
to="https://$1dmcloud.net/" />
|
|
||||||
|
|
||||||
<rule from="^https?://support\.dmcloud\.net/"
|
|
||||||
to="https://dmcloud.zendesk.com/" />
|
|
||||||
|
|
||||||
</ruleset>
|
|
|
@ -1,53 +0,0 @@
|
||||||
<!--
|
|
||||||
For problematic rules, see Deviantart-mismatches.xml.
|
|
||||||
|
|
||||||
|
|
||||||
Other deviantArt rulesets:
|
|
||||||
|
|
||||||
- Sta.sh.xml
|
|
||||||
|
|
||||||
|
|
||||||
ToDo: Find edgecast URL for /(fc|th)\d+.
|
|
||||||
|
|
||||||
|
|
||||||
Mixed content:
|
|
||||||
|
|
||||||
- Images on *.....com from e.deviantart.net *
|
|
||||||
|
|
||||||
* Secured by us
|
|
||||||
|
|
||||||
-->
|
|
||||||
<ruleset name="DeviantArt (pending)" default_off="site operator says not ready yet">
|
|
||||||
|
|
||||||
<target host="deviantart.com" />
|
|
||||||
<target host="*.deviantart.com" />
|
|
||||||
<target host="deviantart.net" />
|
|
||||||
<target host="*.deviantart.net" />
|
|
||||||
|
|
||||||
|
|
||||||
<!-- Not secured by server:
|
|
||||||
-->
|
|
||||||
<!--securecookie host="^\.deviantart\.com$" name="^userinfo$" /-->
|
|
||||||
|
|
||||||
<securecookie host="^\.deviantart\.com$" name=".*" />
|
|
||||||
|
|
||||||
|
|
||||||
<!-- Redirects from com to net, but does so successfully by itself.
|
|
||||||
-->
|
|
||||||
<rule from="^http://([aei]|fc\d\d|s[ht]|th\d\d)\.deviantart\.(com|net)/"
|
|
||||||
to="https://$1.deviantart.$2/" />
|
|
||||||
|
|
||||||
<!-- This handles everything that isn't in the first rule.
|
|
||||||
Namely, usernames, backend, fc, th, and (www.).
|
|
||||||
These domains present a cert that is only
|
|
||||||
valid for .com.
|
|
||||||
Note that .net isn't used on DA, but.net does
|
|
||||||
redirect to .com, and we shouldn't break what would
|
|
||||||
otherwise work.
|
|
||||||
Mustn't rewrite from https here, as doing so
|
|
||||||
would conflict with the first rule.
|
|
||||||
-->
|
|
||||||
<rule from="^http://([^/:@\.]+\.)?deviantart\.(?:com|net)/"
|
|
||||||
to="https://$1deviantart.com/" />
|
|
||||||
|
|
||||||
</ruleset>
|
|
|
@ -1,38 +0,0 @@
|
||||||
<!--
|
|
||||||
Problematic domains:
|
|
||||||
|
|
||||||
- www.dukgo.com (mismatched, CN: dukgo.com)
|
|
||||||
|
|
||||||
|
|
||||||
Fully covered domains:
|
|
||||||
|
|
||||||
- (www.)dukgo.com (www → ^)
|
|
||||||
|
|
||||||
-->
|
|
||||||
<ruleset name="DuckDuckGo">
|
|
||||||
<target host="duckduckgo.com" />
|
|
||||||
<target host="*.duckduckgo.com" />
|
|
||||||
<target host="ddg.gg" />
|
|
||||||
<target host="duck.co" />
|
|
||||||
<target host="i.duck.co" />
|
|
||||||
<target host="dukgo.com" />
|
|
||||||
<target host="www.dukgo.com" />
|
|
||||||
|
|
||||||
<exclusion pattern="^http://(help|meme)\.duckduckgo\.com/" />
|
|
||||||
|
|
||||||
<securecookie host="^duck\.co$" name=".*"/>
|
|
||||||
|
|
||||||
<rule from="^http://duckduckgo\.com/" to="https://duckduckgo.com/"/>
|
|
||||||
<rule from="^http://([^/:@\.]+)\.duckduckgo\.com/" to="https://$1.duckduckgo.com/"/>
|
|
||||||
<!-- TODO: What does ddg.gg/foo do? Runs query foo, redirects to homepage, or error? -->
|
|
||||||
<rule from="^http://ddg\.gg/$" to="https://duckduckgo.com/" />
|
|
||||||
|
|
||||||
<rule from="^http://duck\.co/" to="https://duck.co/" />
|
|
||||||
|
|
||||||
<rule from="^http://i\.duck\.co/"
|
|
||||||
to="https://duckduckgo.com/"/>
|
|
||||||
|
|
||||||
<rule from="^http://(?:www\.)?dukgo\.com/"
|
|
||||||
to="https://dukgo.com/" />
|
|
||||||
|
|
||||||
</ruleset>
|
|
|
@ -1,44 +0,0 @@
|
||||||
<!--
|
|
||||||
For other Yahoo coverage, see Yahoo.xml.
|
|
||||||
|
|
||||||
|
|
||||||
These altnames don't exist:
|
|
||||||
|
|
||||||
- www.blog.flickr.net
|
|
||||||
- www.code.flickr.net
|
|
||||||
|
|
||||||
-->
|
|
||||||
<ruleset name="Flickr">
|
|
||||||
|
|
||||||
<target host="flic.kr" />
|
|
||||||
<target host="*.flic.kr" />
|
|
||||||
<target host="flickr.com" />
|
|
||||||
<target host="*.flickr.com" />
|
|
||||||
<target host="*.flickr.net" />
|
|
||||||
<target host="*.staticflickr.com" />
|
|
||||||
|
|
||||||
|
|
||||||
<!-- Not secured by server:
|
|
||||||
-->
|
|
||||||
<!--securecookie host="^\.flic\.kr$" name="^BX$" /-->
|
|
||||||
|
|
||||||
<securecookie host="^\.flic\.kr$" name=".+" />
|
|
||||||
<securecookie host=".*\.flickr\.com$" name=".+" />
|
|
||||||
|
|
||||||
|
|
||||||
<rule from="^http://flic\.kr/"
|
|
||||||
to="https://flic.kr/" />
|
|
||||||
|
|
||||||
<rule from="^http://(api\.|www\.)?flickr\.com/"
|
|
||||||
to="https://$1flickr.com/" />
|
|
||||||
|
|
||||||
<rule from="^http://s(ecure|tatic)\.flickr\.com/"
|
|
||||||
to="https://s$1.flickr.com/" />
|
|
||||||
|
|
||||||
<rule from="^http://(c2|farm\d+)\.static(\.)?flickr\.com/"
|
|
||||||
to="https://$1.static$2flickr.com/" />
|
|
||||||
|
|
||||||
<rule from="^http://(blog|code)\.flickr\.net/"
|
|
||||||
to="https://$1.flickr.net/" />
|
|
||||||
|
|
||||||
</ruleset>
|
|
|
@ -1,11 +0,0 @@
|
||||||
<!--
|
|
||||||
For other GitHub coverage, see Github.xml.
|
|
||||||
-->
|
|
||||||
<ruleset name="GitHub Pages">
|
|
||||||
|
|
||||||
<target host="*.github.io" />
|
|
||||||
|
|
||||||
<rule from="^http://([^/@:\.]+)\.github\.io/"
|
|
||||||
to="https://$1.github.io/" />
|
|
||||||
|
|
||||||
</ruleset>
|
|
|
@ -1,94 +0,0 @@
|
||||||
<!--
|
|
||||||
Other GitHub rulesets:
|
|
||||||
|
|
||||||
- Github-Pages.xml
|
|
||||||
- Guag.es.xml
|
|
||||||
- Speaker_Deck.com.xml
|
|
||||||
|
|
||||||
|
|
||||||
CDN buckets:
|
|
||||||
|
|
||||||
- github-images.s3.amazonaws.com
|
|
||||||
- github.global.ssl.fastly.net
|
|
||||||
- a248.e.akamai.net/assets.github.com/
|
|
||||||
- a248.e.akamai.net/camo.github.com/
|
|
||||||
- s3.amazonaws.com/github/ | d24z2fz21y4fag.cloudfront.net
|
|
||||||
- github.myshopify.com
|
|
||||||
|
|
||||||
|
|
||||||
Fully covered domains:
|
|
||||||
|
|
||||||
- github.com subdomains:
|
|
||||||
|
|
||||||
- (www.)
|
|
||||||
- assets\d+
|
|
||||||
- assets-cdn
|
|
||||||
- bounty
|
|
||||||
- cloud
|
|
||||||
- f.cloud
|
|
||||||
- codeload
|
|
||||||
- developer
|
|
||||||
- eclipse
|
|
||||||
- enterprise
|
|
||||||
- gist
|
|
||||||
- gist-assets
|
|
||||||
- help
|
|
||||||
- identicons
|
|
||||||
- jobs
|
|
||||||
- mac
|
|
||||||
- mobile
|
|
||||||
- nodeload
|
|
||||||
- octodex
|
|
||||||
- pages
|
|
||||||
- raw
|
|
||||||
- rg3
|
|
||||||
- shop
|
|
||||||
- status
|
|
||||||
- support
|
|
||||||
- training
|
|
||||||
- try
|
|
||||||
- wiki
|
|
||||||
- windows
|
|
||||||
|
|
||||||
- collector.githubapp.com
|
|
||||||
|
|
||||||
- githubusercontent.com
|
|
||||||
|
|
||||||
-->
|
|
||||||
<ruleset name="GitHub">
|
|
||||||
|
|
||||||
<target host="github.com" />
|
|
||||||
<target host="*.github.com" />
|
|
||||||
<target host="github.io" />
|
|
||||||
<target host="*.githubusercontent.com" />
|
|
||||||
<target host="collector.githubapp.com" />
|
|
||||||
|
|
||||||
|
|
||||||
<!-- Secured by server:
|
|
||||||
-->
|
|
||||||
<!--securecookie host="^github\.com$" name="^(_gh_sess|tz|user_session)$" /-->
|
|
||||||
<!--securecookie host="^\.github\.com$" name="^(dotcom_user|logged_in)$" /-->
|
|
||||||
<!--securecookie host="^enterprise\.github\.com$" name="^(_enterprise_web|request_method)$" /-->
|
|
||||||
<!--securecookie host="^gist\.github\.com$" name="^_gist_session$" /-->
|
|
||||||
<!--securecookie host="^help\.github\.com$" name="^_help_session$" /-->
|
|
||||||
<!--
|
|
||||||
Not secured by server:
|
|
||||||
-->
|
|
||||||
<!--securecookie host="^status\.github\.com$" name="^rack\.session$" /-->
|
|
||||||
|
|
||||||
<securecookie host="^(?:.*\.)?github\.com$" name=".+" />
|
|
||||||
|
|
||||||
|
|
||||||
<rule from="^http://((?:assets\d+|assets-cdn|bounty|cloud|f\.cloud|codeload|developer|eclipse|enterprise|gist|gist-assets|help|identicons|jobs|mac|mobile|nodeload|octodex|pages|raw|rg3|shop|status|support|training|try|wiki|windows|www)\.)?github\.com/"
|
|
||||||
to="https://$1github.com/" />
|
|
||||||
|
|
||||||
<rule from="^http://collector\.githubapp\.com/"
|
|
||||||
to="https://collector.githubapp.com/" />
|
|
||||||
|
|
||||||
<rule from="^https?://github\.io/"
|
|
||||||
to="https://pages.github.com/" />
|
|
||||||
|
|
||||||
<rule from="^http://([^/@:\.]+)\.githubusercontent\.com/"
|
|
||||||
to="https://$1.githubusercontent.com/" />
|
|
||||||
|
|
||||||
</ruleset>
|
|
|
@ -1,26 +0,0 @@
|
||||||
<!--
|
|
||||||
|
|
||||||
Problematic domains:
|
|
||||||
|
|
||||||
- (www.)apture.com (works, mismatched, CN: *.google.com)
|
|
||||||
|
|
||||||
-->
|
|
||||||
<ruleset name="Google (mismatches)" default_off="mismatches">
|
|
||||||
|
|
||||||
<!-- Akamai -->
|
|
||||||
<target host="js.admeld.com"/>
|
|
||||||
<target host="apture.com" />
|
|
||||||
<target host="www.apture.com" />
|
|
||||||
<target host="googleartproject.com"/>
|
|
||||||
<target host="www.googleartproject.com"/>
|
|
||||||
|
|
||||||
<rule from="^http://js\.admeld\.com/"
|
|
||||||
to="https://js.admeld.com/"/>
|
|
||||||
|
|
||||||
<rule from="^https?://(?:www\.)?apture\.com/"
|
|
||||||
to="https://apture.com/" />
|
|
||||||
|
|
||||||
<rule from="^http://(?:www\.)?googleartproject\.com/"
|
|
||||||
to="https://www.googleartproject.com/"/>
|
|
||||||
|
|
||||||
</ruleset>
|
|
|
@ -1,14 +0,0 @@
|
||||||
<!--
|
|
||||||
For other Google coverage, see GoogleServices.xml.
|
|
||||||
|
|
||||||
-->
|
|
||||||
<ruleset name="Google.org">
|
|
||||||
|
|
||||||
<target host="google.org" />
|
|
||||||
<target host="www.google.org" />
|
|
||||||
|
|
||||||
|
|
||||||
<rule from="^http://(www\.)?google\.org/"
|
|
||||||
to="https://$1google.org/" />
|
|
||||||
|
|
||||||
</ruleset>
|
|
|
@ -1,143 +0,0 @@
|
||||||
<!--
|
|
||||||
For other Google coverage, see GoogleServices.xml.
|
|
||||||
|
|
||||||
|
|
||||||
Nonfunctional domains:
|
|
||||||
|
|
||||||
- hosted.gmodules.com *
|
|
||||||
- img0.gmodules.com *
|
|
||||||
- p.gmodules.com *
|
|
||||||
|
|
||||||
* 404; mismatched, CN: *.googleusercontent.com
|
|
||||||
|
|
||||||
|
|
||||||
Problematic domains:
|
|
||||||
|
|
||||||
- gmodules.com (503, CN: www.google.com)
|
|
||||||
- www.gmodules.com (503, CN: *.googleusercontent.com)
|
|
||||||
- gstatic.com (404, valid cert)
|
|
||||||
- api.recaptcha.net (works; mismatched, CN: google.com)
|
|
||||||
|
|
||||||
|
|
||||||
Partially covered domains:
|
|
||||||
|
|
||||||
- (www.)gmodules.com (→ www.google.com)
|
|
||||||
- (www.)google.com
|
|
||||||
- chart.apis.google.com (→ chart.googleapis.com)
|
|
||||||
|
|
||||||
|
|
||||||
Fully covered domains:
|
|
||||||
|
|
||||||
- api.google.com
|
|
||||||
|
|
||||||
- *.clients.google.com:
|
|
||||||
|
|
||||||
- linkhelp
|
|
||||||
|
|
||||||
- ssl.google-analytics.com
|
|
||||||
- www.google-analytics.com
|
|
||||||
|
|
||||||
- googleapis.com subdomains:
|
|
||||||
|
|
||||||
- ajax
|
|
||||||
- chart
|
|
||||||
- *.commondatastorage
|
|
||||||
- fonts
|
|
||||||
- *.storage
|
|
||||||
- www
|
|
||||||
|
|
||||||
- gstatic.com subdomains:
|
|
||||||
|
|
||||||
- (www.) (^ → www)
|
|
||||||
- csi
|
|
||||||
- encrypted-tbn\d
|
|
||||||
- g0
|
|
||||||
- *.metric
|
|
||||||
- ssl
|
|
||||||
- t\d
|
|
||||||
|
|
||||||
- api.recaptcha.net (→ www.google.com)
|
|
||||||
- api-secure.recaptcha.net
|
|
||||||
- gdata.youtube.com
|
|
||||||
|
|
||||||
|
|
||||||
ssl.google-analytics.com/ga.js sets __utm\w wildcard
|
|
||||||
cookies on whichever domain it is loaded from.
|
|
||||||
|
|
||||||
-->
|
|
||||||
<ruleset name="Google APIs">
|
|
||||||
|
|
||||||
<target host="gmodules.com" />
|
|
||||||
<target host="www.gmodules.com" />
|
|
||||||
<target host="google.com" />
|
|
||||||
<target host="apis.google.com" />
|
|
||||||
<target host="*.apis.google.com" />
|
|
||||||
<target host="*.clients.google.com" />
|
|
||||||
<target host="www.google.com" />
|
|
||||||
<target host="*.google-analytics.com" />
|
|
||||||
<target host="*.googleapis.com" />
|
|
||||||
<target host="gstatic.com" />
|
|
||||||
<target host="*.gstatic.com" />
|
|
||||||
<!-- Captive portal detection redirects to this URL, and many captive
|
|
||||||
portals break TLS, so exempt this redirect URL.
|
|
||||||
See GitHub bug #368
|
|
||||||
-->
|
|
||||||
<exclusion pattern="^http://www\.gstatic\.com/generate_204" />
|
|
||||||
<target host="*.recaptcha.net" />
|
|
||||||
<target host="gdata.youtube.com" />
|
|
||||||
<exclusion pattern="^http://gdata\.youtube\.com/crossdomain\.xml" />
|
|
||||||
|
|
||||||
|
|
||||||
<securecookie host="^ssl\.google-analytics\.com$" name=".+" />
|
|
||||||
|
|
||||||
|
|
||||||
<rule from="^http://(?:www\.)?gmodules\.com/ig/images/"
|
|
||||||
to="https://www.google.com/ig/images/" />
|
|
||||||
|
|
||||||
<!-- jsapi was causing problems on some sites that embed google maps:
|
|
||||||
https://trac.torproject.org/projects/tor/ticket/2335
|
|
||||||
Apparently now fixed; thanks, Google!
|
|
||||||
-->
|
|
||||||
<rule from="^http://(?:www\.)?google\.com/(afsonline/|chart|jsapi|recaptcha/|uds)"
|
|
||||||
to="https://www.google.com/$1" />
|
|
||||||
|
|
||||||
<rule from="^http://(api|[\w-]+\.client)s\.google\.com/"
|
|
||||||
to="https://$1s.google.com/" />
|
|
||||||
|
|
||||||
<rule from="^http://chart\.apis\.google\.com/chart"
|
|
||||||
to="https://chart.googleapis.com/chart" />
|
|
||||||
|
|
||||||
<rule from="^http://(ssl|www)\.google-analytics\.com/"
|
|
||||||
to="https://$1.google-analytics.com/" />
|
|
||||||
|
|
||||||
<rule from="^http://(ajax|chart|fonts|www)\.googleapis\.com/"
|
|
||||||
to="https://$1.googleapis.com/" />
|
|
||||||
|
|
||||||
<rule from="^http://([^@:\./]+\.)?(commondata)?storage\.googleapis\.com/"
|
|
||||||
to="https://$1$2storage.googleapis.com/" />
|
|
||||||
|
|
||||||
<!-- There is an interesting question about whether we should
|
|
||||||
append &strip=1 to all cache URLs. This causes them to load
|
|
||||||
without images and styles, which is more secure but can look
|
|
||||||
worse.
|
|
||||||
Without &strip=1, the images and styles from the cached
|
|
||||||
pages still load from the original, typically unencrypted, page.
|
|
||||||
With &strip=1, the cached page will be text-only and
|
|
||||||
will come exclusively from Google's HTTPS server.
|
|
||||||
-->
|
|
||||||
<rule from="^http://(?:www\.)?gstatic\.com/"
|
|
||||||
to="https://www.gstatic.com/" />
|
|
||||||
|
|
||||||
<rule from="^http://(csi|encrypted-tbn\d|g0|[\w-]+\.metric|ssl|t\d)\.gstatic\.com/"
|
|
||||||
to="https://$1.gstatic.com/" />
|
|
||||||
|
|
||||||
<rule from="^http://api\.recaptcha\.net/"
|
|
||||||
to="https://www.google.com/recaptcha/api/" />
|
|
||||||
|
|
||||||
<rule from="^http://api-secure\.recaptcha\.net/"
|
|
||||||
to="https://api-secure.recaptcha.net/" />
|
|
||||||
|
|
||||||
<rule from="^http://gdata\.youtube\.com/"
|
|
||||||
to="https://gdata.youtube.com/" />
|
|
||||||
|
|
||||||
</ruleset>
|
|
|
@ -1,6 +0,0 @@
|
||||||
<ruleset name="GoogleCanada">
|
|
||||||
<target host="google.ca" />
|
|
||||||
<target host="*.google.ca" />
|
|
||||||
<rule from="^http://([^/:@\.]+)\.google\.ca/finance" to="https://$1.google.ca/finance"/>
|
|
||||||
</ruleset>
|
|
||||||
|
|
|
@ -1,65 +0,0 @@
|
||||||
<!--
|
|
||||||
For other Google coverage, see GoogleServices.xml.
|
|
||||||
|
|
||||||
|
|
||||||
Problematic domains:
|
|
||||||
|
|
||||||
- www.google.bo *
|
|
||||||
- www.google.co *
|
|
||||||
- www.google.ec *
|
|
||||||
- www.google.in *
|
|
||||||
- www.google.kr *
|
|
||||||
- www.google.com.kz **
|
|
||||||
- www.google.com.lk *
|
|
||||||
- www.google.mx **
|
|
||||||
- www.google.sg *
|
|
||||||
- www.google.sl *
|
|
||||||
- www.google.ug *
|
|
||||||
- www.google.vn *
|
|
||||||
|
|
||||||
* 404; mismatched, CN: google.com
|
|
||||||
** Works; mismatched, CN: google.com
|
|
||||||
|
|
||||||
-->
|
|
||||||
<ruleset name="Google Images">
|
|
||||||
|
|
||||||
<target host="google.*" />
|
|
||||||
<target host="www.google.*" />
|
|
||||||
<target host="google.co.*" />
|
|
||||||
<target host="www.google.co.*" />
|
|
||||||
<target host="google.com" />
|
|
||||||
<target host="images.google.com" />
|
|
||||||
<target host="google.com.*" />
|
|
||||||
<target host="www.google.com.*" />
|
|
||||||
<!--
|
|
||||||
Only handle image-related paths in this ruleset:
|
|
||||||
-->
|
|
||||||
<exclusion pattern="^http://(?:www\.)?google(?:\.com?)?\.\w{2,3}/(?!(?:advanced_image_search|imghp|.*tb(?:m=isch|s=sbi)))" />
|
|
||||||
|
|
||||||
|
|
||||||
<rule from="^http://(?:www\.)?google\.com/"
|
|
||||||
to="https://www.google.com/" />
|
|
||||||
|
|
||||||
<rule from="^http://images\.google\.com/"
|
|
||||||
to="https://images.google.com/" />
|
|
||||||
|
|
||||||
<!-- First handle problematic domains:
|
|
||||||
-->
|
|
||||||
<rule from="^http://(?:www\.)?google\.co/"
|
|
||||||
to="https://www.google.com/" />
|
|
||||||
|
|
||||||
<rule from="^http://(?:www\.)?google\.(?:co\.)?(in|kr|ug)/"
|
|
||||||
to="https://www.google.co.$1/" />
|
|
||||||
|
|
||||||
<rule from="^http://(?:www\.)?google\.(?:com\.)?(kz|lk)/"
|
|
||||||
to="https://www.google.$1/" />
|
|
||||||
|
|
||||||
<rule from="^http://(?:www\.)?google\.(?:com\.)?(bo|ec|mx|sg|sl|vn)/"
|
|
||||||
to="https://www.google.com.$1/" />
|
|
||||||
|
|
||||||
<!-- And then the rest:
|
|
||||||
-->
|
|
||||||
<rule from="^http://(?:www\.)?google\.(com?\.)?(ae|ar|at|au|bg|bh|br|ca|ch|cl|co|cr|cu|de|eg|es|fi|fr|gh|gt|hr|id|ie|il|it|jo|jp|jm|ke|kw|lb|ly|my|na|ng|nl|no|nz|om|pa|pe|pk|pl|pt|py|qa|ro|ru|rw|sa|se|sv|th|tr|uk|uy|ve|za|zw)/"
|
|
||||||
to="https://www.google.$1$2/" />
|
|
||||||
|
|
||||||
</ruleset>
|
|
|
@ -1,78 +0,0 @@
|
||||||
<ruleset name="Search www.google.com">
|
|
||||||
|
|
||||||
<!--
|
|
||||||
Enabling this ruleset should cause searches to go to
|
|
||||||
https://www.google.com rather than https://encrypted.google.com. Note that
|
|
||||||
the filename is important; it must be before GoogleSearch.xml in a bash
|
|
||||||
expansion of src/chrome/content/rules/*.xml in order to take precedence.
|
|
||||||
-->
|
|
||||||
|
|
||||||
<target host="*.google.com" />
|
|
||||||
<target host="google.com" />
|
|
||||||
<target host="www.google.com.*" />
|
|
||||||
<target host="google.com.*" />
|
|
||||||
<target host="www.google.co.*" />
|
|
||||||
<target host="google.co.*" />
|
|
||||||
<target host="www.google.*" />
|
|
||||||
<target host="google.*" />
|
|
||||||
<!-- beyond clients1 these do not currently exist in the ccTLDs,
|
|
||||||
but just in case... -->
|
|
||||||
<target host="clients1.google.com.*" />
|
|
||||||
<target host="clients2.google.com.*" />
|
|
||||||
<target host="clients3.google.com.*" />
|
|
||||||
<target host="clients4.google.com.*" />
|
|
||||||
<target host="clients5.google.com.*" />
|
|
||||||
<target host="clients6.google.com.*" />
|
|
||||||
<target host="clients1.google.co.*" />
|
|
||||||
<target host="clients2.google.co.*" />
|
|
||||||
<target host="clients3.google.co.*" />
|
|
||||||
<target host="clients4.google.co.*" />
|
|
||||||
<target host="clients5.google.co.*" />
|
|
||||||
<target host="clients6.google.co.*" />
|
|
||||||
<target host="clients1.google.*" />
|
|
||||||
<target host="clients2.google.*" />
|
|
||||||
<target host="clients3.google.*" />
|
|
||||||
<target host="clients4.google.*" />
|
|
||||||
<target host="clients5.google.*" />
|
|
||||||
<target host="clients6.google.*" />
|
|
||||||
|
|
||||||
<rule from="^http://www\.google\.com/$"
|
|
||||||
to="https://www.google.com/"/>
|
|
||||||
|
|
||||||
<!-- The most basic case. -->
|
|
||||||
|
|
||||||
<rule from="^http://(?:www\.)?google\.com/search"
|
|
||||||
to="https://www.google.com/search"/>
|
|
||||||
|
|
||||||
<!-- A very annoying exception that we seem to need for the basic case -->
|
|
||||||
|
|
||||||
<exclusion pattern="^http://(?:www\.)?google\.com/search.*tbs=shop" />
|
|
||||||
<exclusion pattern="^http://clients[0-9]\.google\.com/.*client=products.*" />
|
|
||||||
<exclusion pattern="^http://suggestqueries\.google\.com/.*client=.*" />
|
|
||||||
|
|
||||||
<!-- https://trac.torproject.org/projects/tor/ticket/9713 -->
|
|
||||||
|
|
||||||
<exclusion pattern="^http://clients[0-9]\.google\.com/ocsp" />
|
|
||||||
|
|
||||||
<!-- This is necessary for image results links from web search results -->
|
|
||||||
|
|
||||||
<exclusion pattern="^http://(?:www\.)?google\.com/search.*tbm=isch.*" />
|
|
||||||
|
|
||||||
<rule from="^http://(?:www\.)?google\.com/webhp"
|
|
||||||
to="https://www.google.com/webhp"/>
|
|
||||||
|
|
||||||
<rule from="^http://(?:www\.)?google\.com/#"
|
|
||||||
to="https://www.google.com/#"/>
|
|
||||||
|
|
||||||
<rule from="^http://(?:www\.)?google\.com/$"
|
|
||||||
to="https://www.google.com/"/>
|
|
||||||
|
|
||||||
<!-- Completion urls look like this:
|
|
||||||
|
|
||||||
http://clients2.google.co.jp/complete/search?hl=ja&client=hp&expIds=17259,24660,24729,24745&q=m&cp=1 HTTP/1.1\r\n
|
|
||||||
|
|
||||||
-->
|
|
||||||
<rule from="^http://clients[0-9]\.google\.com/complete/search"
|
|
||||||
to="https://clients1.google.com/complete/search"/>
|
|
||||||
|
|
||||||
</ruleset>
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue