#!/bin/sh -e [ -n "$1" ] \ || { echo "usage: $(basename $0) [timeout]" && exit 0; } [ "$1" != "-c" ] || { shift; cached="yes"; } [ "$1" != "-r" ] || { shift; reparse="yes"; } name="$1" timeout=${2:-5} cleanup() { [ -f "$temp" ] && rm "$temp"; } trap cleanup EXIT temp="$(mktemp)" # main #url="https://git.alpinelinux.org/aports/tree/$name?h=master" #pre="tree listing" #pst="<\/table" #rex="href='.+\/([^\/]+.patch)'" #plain="https://git.alpinelinux.org/aports/plain" # github mirror url="https://github.com/alpinelinux/aports/tree/master/$name" pre="repository-content" pst="footer container" rex='title="([^\/]+.patch)"' plain="https://raw.githubusercontent.com/alpinelinux/aports/master" # extract the text between the pre and post regexes on the target webpage. { curl --compressed -m $timeout -L -s $url \ || { echo "url failed" >&2 && continue; }; } \ | awk "sub(/^.*$pre/,\"\"){echo=1} /$pst/{echo=0} echo" \ > $temp perl -n -e "/$rex/ && print \"\$1\\n\"" $temp | while read path; do curl -LO "$plain/$name/$path" done