diff --git a/docs/aegisub_convert_docs.pl b/docs/aegisub_convert_docs.pl index 81ee766cd..d4633d5aa 100644 --- a/docs/aegisub_convert_docs.pl +++ b/docs/aegisub_convert_docs.pl @@ -149,11 +149,11 @@ exit(0); ########################################## + +# parse out pages this page links to +# modify it +# return filename of the page, modified html and list of links sub parse_and_fix_html { - # parse out pages this page links to - # modify it - # return filename of the page, modified html and list of links - #get arguments my ($url, $base, $content) = @_; my (@links, @links_to_modify); # list of links to return later @@ -172,16 +172,16 @@ sub parse_and_fix_html { foreach my $tag ( $html->links() ) { my ($tagname, %attrs) = @{$tag}; - my $quoted = quotemeta($host); + my $quoted = quotemeta($docs_base_url); # does the link interest us? if ( ($tagname eq 'a') and exists($attrs{'href'}) ) { my $href = quotemeta($attrs{'href'}); # quotemeta? - $href =~ s!\&!\&\;!g; # quotemeta kills & things + $href =~ s!\&!\&\;!g; # quotemeta kills & things for some reason # skip and kill special or "edit" links if ( $attrs{'href'} =~ m!docs/index\.php\?!i ) { - $content =~ s!(.*?)!$3!gi; + $content =~ s!(.*?)!$1!gi; next(LINKLIST); } # skip and kill image/special links @@ -189,8 +189,12 @@ sub parse_and_fix_html { $content =~ s!(.*?)!$1!gi; next(LINKLIST); } - # don't process #anchor links - if ( $attrs{'href'} =~ m!\#(.*?)$! ) { + # change somepage#anchor links so they point to the right document, + # but don't return them for further processing + # BUG: if a page is ONLY referred to by this type of link, it won't get downloaded! + # (highly unlikely) + if ( $attrs{'href'} =~ m!.+\#(.*?)$! ) { + push(@links_to_modify, $attrs{'href'}); next(LINKLIST); } @@ -199,7 +203,7 @@ sub parse_and_fix_html { push(@links_to_modify, $attrs{'href'}); } # is not relative and goes somewhere else than aegisub.cellosoft.com - # so we're not interested in it + # so we're not interested in it (#anchor links are not touched either) else { next(LINKLIST); } push(@links, URI->new_abs($attrs{'href'}, $base)); @@ -240,8 +244,13 @@ sub parse_and_fix_html { # kill the topbar - $content =~ s!\
!!s; + $content =~ s!
!!s; + # kill the article/discussion/history thing + $content =~ s!
!!s; + + # kill the "toolbox" at the bottom left + $content =~ s!
new_abs($1, $base)); + while ( $content =~ m!url\((\")?(.+?)(\")?\)!mgi ) { + my $text = $2; + + # skip it if it's nonrelative and goes somewhere else than aegisub.cellosoft.com + if ( $text =~ m!^http!i ) { + # actually fuck this there shouldn't be any nonrelative links in there anyway + next(LINKLIST) + #unless ( $text =~ m!^${quoted}!i ); + } + + push(@links, URI->new_abs($text, $base)); } my $filename = convert_link($url);