rpi/pihole/blocklists/build/updateWhitelist.sh

143 lines
4.9 KiB
Bash
Raw Normal View History

2021-10-19 21:54:26 +02:00
#!/bin/bash
SCRIPT=$(readlink -f "$0")
SCRIPTPATH=$(dirname "$SCRIPT")
cd "$SCRIPTPATH" || exit
# Test internet connection
if ! (ping -c1 -w2 google.de >/dev/null) && ! (ping -c1 -w2 cloudflare.com >/dev/null); then
echo "No Internet connection! The script will be terminated!"
exit 1
fi
# Helpfunctions
generateWhitelist() {
dos2unix -q "$1"
if [ -s "$1" ]; then
if [ "$(sed $= -n "$1")" != "0" ]; then
cat <"$1" | sed -e 's/^[[:space:]]*//' | awk '{print $1}' | grep -Ev '^\s*$|^#|^!' >"$2"
sortList "$2"
fi
fi
rm -f "$1"
}
sortList() {
if [ -s "$1" ]; then
sort -uf "$1" >"$1.sorted"
mv "$1.sorted" "$1"
fi
}
convertWhiteToAdBlock() {
rm -f "$2"
while IFS= read -r domain || [ -n "$domain" ]; do
if [ "${domain:0:1}" == "#" ] || [ "${domain:0:1}" == "" ]; then
echo "$domain" | sed 's/^\#/\!/' >>"$2"
continue
fi
if [ "${domain:0:2}" == "*." ]; then
echo "@@||$(echo "$domain" | sed 's/^\*\.//')^" >>"$2"
else
if [[ $domain =~ \* ]]; then
echo "@@/$domain/" >>"$2"
else
echo "@@|$domain^" >>"$2"
fi
fi
done <"$1"
}
# Start
echo '==========================================='
echo 'Whitelist ...'
echo '==========================================='
echo ""
# Config
regex='(?=^.{4,253}$)(^(?:[a-zA-Z0-9](?:(?:[a-zA-Z0-9\-]){0,61}[a-zA-Z0-9])?\.)+([a-zA-Z]{2,}|xn--[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])$)'
data=/media/nas/git/rpi/pihole/blocklists/data
tmp=$data/tmp
# notracking
curl -s -L https://raw.githubusercontent.com/notracking/hosts-blocklists-scripts/master/hostnames.whitelist.txt >$tmp
generateWhitelist $tmp $data/white.list.notracking
wc -l $data/white.list.notracking
# Energized
curl -s -L https://raw.githubusercontent.com/EnergizedProtection/unblock/master/basic/formats/domains.txt >$tmp
generateWhitelist $tmp $data/white.list.energized
wc -l $data/white.list.energized
# Referral
curl -s -L https://raw.githubusercontent.com/nextdns/metadata/master/privacy/affiliate-tracking-domains >$tmp
curl -s -L https://raw.githubusercontent.com/anudeepND/whitelist/master/domains/referral-sites.txt >>$tmp
generateWhitelist $tmp $tmp.1
cat <$tmp.1 >>$data/white.list.referral
rm -f $tmp.1
sortList $data/white.list.referral
wc -l $data/white.list.referral
# SHC
curl -s -L https://someonewhocares.org/hosts/zero/ | grep -P '^#0' | awk '{print $2}' | grep -P "$regex" >$tmp
generateWhitelist $tmp $data/white.list.shc
wc -l $data/white.list.shc
# OISD
lynx -dump https://oisd.nl/excludes.php | sed -e 's/^[[:space:]]*//' | grep -P '^\[' | awk -F']' '{print $2}' | grep -P "$regex" >$tmp
generateWhitelist $tmp $data/white.list.oisd
wc -l $data/white.list.oisd
# AdGuard
curl -s -L https://adguardteam.github.io/AdGuardSDNSFilter/Filters/filter.txt |
grep -P '^\@\@' | sed 's/[\|^\@]//g' | sed 's/$important//' |
grep -P "$regex" >$tmp
curl -s -L https://adguardteam.github.io/AdGuardSDNSFilter/Filters/filter.txt |
grep -P '^\@\@\|\|' | sed 's/[\|^\@]//g' | sed 's/$important//' |
grep -P "$regex" | sed -e 's/^/*./' >>$tmp
curl -s -L https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exclusions.txt |
grep -Ev '^\s*$|^#|^!' | sed 's/[\|^]//g' | sed 's/$important//' |
grep -P "$regex" >>$tmp
curl -s -L https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt |
grep -P '^\@\@' | sed 's/[\|^\@]//g' | sed 's/$important//' |
grep -P "$regex" >>$tmp
curl -s -L https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt |
grep -P '^\@\@\|\|' | sed 's/[\|^\@]//g' | sed 's/$important//' |
grep -P "$regex" | sed -e 's/^/*./' >>$tmp
curl -s -L https://raw.githubusercontent.com/DandelionSprout/adfilt/master/AdGuard%20Home%20Compilation%20List/AdGuardHomeCompilationList.txt |
grep -P '^\@\@' | sed 's/[\|^\@]//g' | sed 's/$important//' |
grep -P "$regex" >>$tmp
curl -s -L https://raw.githubusercontent.com/DandelionSprout/adfilt/master/AdGuard%20Home%20Compilation%20List/AdGuardHomeCompilationList.txt |
grep -P '^\@\@\|\|' | sed 's/[\|^\@]//g' | sed 's/$important//' |
grep -P "$regex" | sed -e 's/^/*./' >>$tmp
generateWhitelist $tmp $data/white.list.adguard
wc -l $data/white.list.adguard
# Build exclude list
./buildList.sh exclude
# Push personal whitelist to local repositories
cp $data/white.list /media/nas/git/hosts
cp $data/white.list.referral /media/nas/git/hosts
# Convert personal whitelist to AdBlock format
convertWhiteToAdBlock "$data/white.list" /media/nas/git/adguard/whitelist.adguard
convertWhiteToAdBlock "$data/white.list.referral" /media/nas/git/adguard/whitelist.referral.adguard
echo '==========================================='
echo 'Deadlist ...'
echo '==========================================='
# Build dead list
./buildList.sh dead
cd "$SCRIPTPATH" || exit