#!/bin/sh -e

VERSIONSROOT="$(dirname "$(readlink -f "$0")")"
which vercmp >/dev/null || { echo "vercmp not found!" >&2 && exit 1; }

[ -n "$1" ] \
|| { echo "usage: $(basename $0) [-c|-r] <package> [timeout]" && exit 0; }

. "$VERSIONSROOT/lib/log.sh"

[ "$1" != "-c" ] || { shift; cached="yes"; }
[ "$1" != "-r" ] || { shift; reparse="yes"; }

[ -d "$VERSIONSROOT/pkg/$1" ] || err "unrecognized package: $1"
[ -f "$VERSIONSROOT/pkg/$1/url" ] || err "no 'url' file for $1"

name="$1"
timeout=${2:-5}

{ vercmp formats | grep -q "^$name\$"; } \
&& fmt="$name" \
|| fmt="default"

cleanup() { [ -f "$temp" ] && rm "$temp"; }
trap cleanup EXIT

temp="$(mktemp)"

while read -r line; do
    url="$(echo "$line" | cut -f1)"
    pre="$(echo "$line" | cut -f2)"
    pst="$(echo "$line" | cut -f3)"
    rex="$(echo "$line" | cut -f4)"

    if [ "$cached" ]; then
        cache="$(ls -ct $VERSIONSROOT/pkg/$name/*.cache | head -n1)"
        [ -f "$cache" ] || { echo "[ERR] no cache for $name!" >&2 && exit 1; }

        cat "$cache"
        exit 0
    fi

    # extract the text between the pre and post regexes on the target webpage.
    { curl --compressed -m $timeout -L -s $url || { echo "url failed" >&2 && continue; }; } \
    | awk "sub(/^.*$pre/,\"\"){echo=1} /$pst/{echo=0} echo" \
    > $temp

    cache="$VERSIONSROOT/pkg/$name/$(md5sum $temp | awk '{print $1}').cache"

    if [ ! -f "$cache" ] || [ "$reparse" ]; then
        oldcaches="$(ls $VERSIONSROOT/pkg/$name/*.cache 2>/dev/null || true)"
        [ ! "$oldcaches" ] || rm "$oldcaches"
        perl -n -e "/$rex/ && print \"\$1\\n\"" $temp > "$cache.unsorted"

        cat "$cache.unsorted" | shsort -r "vercmp -f $fmt" > "$cache"
        rm "$cache.unsorted"
    fi

    cat "$cache"
    exit 0

done < "$VERSIONSROOT/pkg/$name/url"

err "unable to fetch versions for $1"