rpi/pihole/blocklists/build/updateWhitelist.sh

196 lines
7.7 KiB
Bash
Raw Normal View History

2021-10-19 21:54:26 +02:00
#!/bin/bash
SCRIPT=$(readlink -f "$0")
SCRIPTPATH=$(dirname "$SCRIPT")
cd "$SCRIPTPATH" || exit
# Test internet connection
if ! (ping -c1 -w2 google.de >/dev/null) && ! (ping -c1 -w2 cloudflare.com >/dev/null); then
echo "No Internet connection! The script will be terminated!"
exit 1
fi
# Helpfunctions
generateWhitelist() {
dos2unix -q "$1"
if [ -s "$1" ]; then
if [ "$(sed $= -n "$1")" != "0" ]; then
cat <"$1" | sed -e 's/^[[:space:]]*//' | awk '{print $1}' | grep -Ev '^\s*$|^#|^!' >"$2"
sortList "$2"
fi
fi
rm -f "$1"
}
sortList() {
if [ -s "$1" ]; then
sort -uf "$1" >"$1.sorted"
mv "$1.sorted" "$1"
fi
}
convertWhiteToAdBlock() {
rm -f "$2"
2021-10-25 12:36:45 +02:00
includesubdomains=$3
if [ -z "$3" ]; then
includesubdomains="0"
fi
2021-10-19 21:54:26 +02:00
while IFS= read -r domain || [ -n "$domain" ]; do
if [ "${domain:0:1}" == "#" ] || [ "${domain:0:1}" == "" ]; then
echo "$domain" | sed 's/^\#/\!/' >>"$2"
continue
fi
if [ "${domain:0:2}" == "*." ]; then
2021-10-25 12:36:45 +02:00
echo "@@||$(echo "$domain" | sed 's/^\*\.//')^" >>"$2"
2021-10-23 22:15:32 +02:00
else
2021-10-25 12:36:45 +02:00
if [ $includesubdomains == "0" ]; then
echo "@@|$domain^" >>"$2"
2021-10-23 22:15:32 +02:00
else
2021-10-25 12:36:45 +02:00
echo "@@||$domain^" >>"$2"
2021-10-23 22:15:32 +02:00
fi
fi
done <"$1"
}
2021-10-19 21:54:26 +02:00
# Start
echo '==========================================='
echo 'Whitelist ...'
echo '==========================================='
echo ""
# Config
regex='(?=^.{4,253}$)(^(?:[a-zA-Z0-9](?:(?:[a-zA-Z0-9\-]){0,61}[a-zA-Z0-9])?\.)+([a-zA-Z]{2,}|xn--[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])$)'
data=/media/nas/git/rpi/pihole/blocklists/data
tmp=$data/tmp
# Referral
2021-11-12 05:01:57 +01:00
#curl -s -L https://raw.githubusercontent.com/nextdns/metadata/master/privacy/affiliate-tracking-domains >$tmp
2021-10-23 22:15:32 +02:00
curl -s -L https://raw.githubusercontent.com/nextdns/metadata/master/privacy/affiliate-tracking-domains |
2021-11-12 05:01:57 +01:00
awk '{print $1}' | grep -Ev '^\s*$|^#|^!|^www' | sed -e 's/^/\*\./' >$tmp
2021-10-19 21:54:26 +02:00
generateWhitelist $tmp $tmp.1
cat <$tmp.1 >>$data/white.list.referral
rm -f $tmp.1
sortList $data/white.list.referral
wc -l $data/white.list.referral
2021-11-09 13:52:20 +01:00
# KEES
curl -s -L https://raw.githubusercontent.com/Kees1958/W3C_annual_most_used_survey_blocklist/master/RemovedDomains | grep -P "$regex" >$tmp
generateWhitelist $tmp $data/white.list.kees
wc -l $data/white.list.kees
2021-10-30 13:08:28 +02:00
# NT
curl -s -L https://raw.githubusercontent.com/notracking/hosts-blocklists-scripts/master/hostnames.whitelist.txt >$tmp
generateWhitelist $tmp $data/white.list.nt
wc -l $data/white.list.nt
2021-11-01 13:12:06 +01:00
# EGP
curl -s -L https://raw.githubusercontent.com/EnergizedProtection/unblock/master/basic/formats/domains.txt >$tmp
generateWhitelist $tmp $data/white.list.egp
wc -l $data/white.list.egp
2021-10-28 19:07:43 +02:00
# BMJ
curl -s -L https://raw.githubusercontent.com/badmojr/1Hosts/master/submit_here/exclude_for_all.txt >$tmp
curl -s -L https://raw.githubusercontent.com/badmojr/1Hosts/master/submit_here/exclude_for_mini_Lite_only.txt >>$tmp
curl -s -L https://raw.githubusercontent.com/badmojr/1Hosts/master/submit_here/include_for_Pro_Xtra_only.txt >>$tmp
generateWhitelist $tmp $data/white.list.bmj
wc -l $data/white.list.bmj
2021-11-06 16:11:17 +01:00
# SW
curl -s -L https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Whitelists/Filter >$tmp
curl -s -L https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Whitelists/Whitelist >>$tmp
generateWhitelist $tmp $data/white.list.sw
wc -l $data/white.list.sw
2021-10-19 21:54:26 +02:00
# SHC
curl -s -L https://someonewhocares.org/hosts/zero/ | grep -P '^#0' | awk '{print $2}' | grep -P "$regex" >$tmp
generateWhitelist $tmp $data/white.list.shc
wc -l $data/white.list.shc
# OISD
lynx -dump https://oisd.nl/excludes.php | sed -e 's/^[[:space:]]*//' | grep -P '^\[' | awk -F']' '{print $2}' | grep -P "$regex" >$tmp
generateWhitelist $tmp $data/white.list.oisd
wc -l $data/white.list.oisd
# AdGuard
curl -s -L https://adguardteam.github.io/AdGuardSDNSFilter/Filters/filter.txt |
2021-10-25 12:36:45 +02:00
grep -P '^\@\@' | sed 's/[\|^\@]//g' | sed 's/$important//' |
grep -P "$regex" >$tmp
2021-10-19 21:54:26 +02:00
curl -s -L https://adguardteam.github.io/AdGuardSDNSFilter/Filters/filter.txt |
2021-10-25 12:36:45 +02:00
grep -P '^\@\@\|\|' | sed 's/[\|^\@]//g' | sed 's/$important//' |
grep -P "$regex" | sed -e 's/^/*./' >>$tmp
2021-10-19 21:54:26 +02:00
curl -s -L https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exclusions.txt |
2021-10-25 12:36:45 +02:00
grep -Ev '^\s*$|^#|^!' | sed 's/[\|^]//g' | sed 's/$important//' |
grep -P "$regex" >>$tmp
2021-10-19 21:54:26 +02:00
2021-10-22 13:53:30 +02:00
curl -s -L https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exclusions.txt |
2021-10-25 12:36:45 +02:00
grep -Ev '^\s*$|^#|^!|^\||^\/' | grep -P "$regex" | sed -e 's/^/*./' >>$tmp
2021-10-22 13:53:30 +02:00
2021-10-19 21:54:26 +02:00
curl -s -L https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt |
2021-10-25 12:36:45 +02:00
grep -P '^\@\@' | sed 's/[\|^\@]//g' | sed 's/$important//' |
grep -P "$regex" >>$tmp
2021-10-19 21:54:26 +02:00
curl -s -L https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt |
2021-10-25 12:36:45 +02:00
grep -P '^\@\@\|\|' | sed 's/[\|^\@]//g' | sed 's/$important//' |
grep -P "$regex" | sed -e 's/^/*./' >>$tmp
2021-10-19 21:54:26 +02:00
curl -s -L https://raw.githubusercontent.com/DandelionSprout/adfilt/master/AdGuard%20Home%20Compilation%20List/AdGuardHomeCompilationList.txt |
2021-10-25 12:36:45 +02:00
grep -P '^\@\@' | sed 's/[\|^\@]//g' | sed 's/$important//' |
grep -P "$regex" >>$tmp
2021-10-19 21:54:26 +02:00
curl -s -L https://raw.githubusercontent.com/DandelionSprout/adfilt/master/AdGuard%20Home%20Compilation%20List/AdGuardHomeCompilationList.txt |
2021-10-25 12:36:45 +02:00
grep -P '^\@\@\|\|' | sed 's/[\|^\@]//g' | sed 's/$important//' |
grep -P "$regex" | sed -e 's/^/*./' >>$tmp
2021-10-19 21:54:26 +02:00
generateWhitelist $tmp $data/white.list.adguard
wc -l $data/white.list.adguard
2021-10-27 18:56:57 +02:00
# hl2guide
curl -s -L https://raw.githubusercontent.com/hl2guide/AdGuard-Home-Whitelist/main/whitelist.txt |
grep -P '^\@\@' | sed 's/[\|^\@]//g' | sed 's/$important//' >$tmp
generateWhitelist $tmp $data/white.list.hl2guide
wc -l $data/white.list.hl2guide
2021-10-19 21:54:26 +02:00
# Build exclude list
./buildList.sh exclude
# Push personal whitelist to local repositories
cp $data/white.list /media/nas/git/hosts
cp $data/white.list.referral /media/nas/git/hosts
# Convert personal whitelist to AdBlock format
2021-11-12 05:01:57 +01:00
convertWhiteToAdBlock "$data/white.list" /media/nas/git/adguard/whitelist.adguard
convertWhiteToAdBlock "$data/white.list.referral" /media/nas/git/adguard/whitelist.referral.adguard 1
sortList /media/nas/git/adguard/whitelist.referral.adguard
2021-10-19 21:54:26 +02:00
2021-10-23 12:49:56 +02:00
# Build host-compiler exclusion lists
2021-10-25 12:36:45 +02:00
cat <"$data/white.list" | grep -Ev '^\s*$|^#|^!' | sed 's/\*\.//' | sort -u | sed -e 's/^/|/' | sed -e 's/$/^/' >"$data/adblock.exclusions.list"
cat <"$data/white.list" | grep -Ev '^\s*$|^#|^!' | grep -E '^\*\.' |
sed 's/^\*//' | sed 's/^.//' | sed 's/\./\\./g' | sed -e 's/^/\(\\|\|\\.\|\^\)/' |
sed -e 's/^/\//' | sed -e 's/$/\(\$\|\\\^\)\//' |
sort -u >>"$data/adblock.exclusions.list"
2021-10-24 09:52:24 +02:00
2021-10-25 12:36:45 +02:00
cat <"$data/white.list.important" | grep -Ev '^\s*$|^#|^!' | sed 's/\*\.//' | sort -u | sed -e 's/^/|/' | sed -e 's/$/^/' >"$data/adblock.exclusions.important"
2021-10-24 09:52:24 +02:00
cat <"$data/white.list.important" | grep -Ev '^\s*$|^#|^!' | grep -E '^\*\.' |
2021-10-25 12:36:45 +02:00
sed 's/^\*//' | sed 's/^.//' | sed 's/\./\\./g' | sed -e 's/^/\(\\|\|\\.\|\^\)/' |
sed -e 's/^/\//' | sed -e 's/$/\(\$\|\\\^\)\//' |
sort -u >>"$data/adblock.exclusions.important"
cat <"$data/white.list.referral" | grep -Ev '^\s*$|^#|^!' | sed 's/\*\.//' | sort -u | sed -e 's/^/|/' | sed -e 's/$/^/' >"$data/adblock.exclusions.referral"
cat <"$data/white.list.referral" | grep -Ev '^\s*$|^#|^!' | grep -E '^\*\.' |
sed 's/^\*//' | sed 's/^.//' | sed 's/\./\\./g' | sed -e 's/^/\(\\|\|\\.\|\^\)/' |
sed -e 's/^/\//' | sed -e 's/$/\(\$\|\\\^\)\//' |
sort -u >>"$data/adblock.exclusions.referral"
2021-10-23 12:49:56 +02:00
2021-10-19 21:54:26 +02:00
echo '==========================================='
echo 'Deadlist ...'
echo '==========================================='
# Build dead list
./buildList.sh dead
cd "$SCRIPTPATH" || exit