[docmaker] Recognise URLs.
* src/tools/docmaker/tohtml.py (re_url): New regular expression. (make_html_para): Use it.
This commit is contained in:
parent
3da5182804
commit
c7cc9ebe20
|
@ -1,3 +1,10 @@
|
||||||
|
2013-06-25 Werner Lemberg <wl@gnu.org>
|
||||||
|
|
||||||
|
[docmaker] Recognise URLs.
|
||||||
|
|
||||||
|
* src/tools/docmaker/tohtml.py (re_url): New regular expression.
|
||||||
|
(make_html_para): Use it.
|
||||||
|
|
||||||
2013-06-19 Werner Lemberg <wl@gnu.org>
|
2013-06-19 Werner Lemberg <wl@gnu.org>
|
||||||
|
|
||||||
* Version 2.5.0.1 released.
|
* Version 2.5.0.1 released.
|
||||||
|
|
|
@ -1,11 +1,46 @@
|
||||||
# ToHTML (c) 2002, 2003, 2005, 2006, 2007, 2008
|
# ToHTML (c) 2002, 2003, 2005-2008, 2013
|
||||||
# David Turner <david@freetype.org>
|
# David Turner <david@freetype.org>
|
||||||
|
|
||||||
from sources import *
|
from sources import *
|
||||||
from content import *
|
from content import *
|
||||||
from formatter import *
|
from formatter import *
|
||||||
|
|
||||||
import time
|
import time, re
|
||||||
|
|
||||||
|
|
||||||
|
# this regular expression code to identify an URL has been taken from
|
||||||
|
#
|
||||||
|
# http://mail.python.org/pipermail/tutor/2002-September/017228.html
|
||||||
|
#
|
||||||
|
# (with slight modifications)
|
||||||
|
|
||||||
|
urls = r'(?:https?|telnet|gopher|file|wais|ftp)'
|
||||||
|
ltrs = r'\w'
|
||||||
|
gunk = r'/#~:.?+=&%@!\-'
|
||||||
|
punc = r'.:?\-'
|
||||||
|
any = "%(ltrs)s%(gunk)s%(punc)s" % { 'ltrs' : ltrs,
|
||||||
|
'gunk' : gunk,
|
||||||
|
'punc' : punc }
|
||||||
|
url = r"""
|
||||||
|
(
|
||||||
|
\b # start at word boundary
|
||||||
|
%(urls)s : # need resource and a colon
|
||||||
|
[%(any)s] +? # followed by one or more of any valid
|
||||||
|
# character, but be conservative and
|
||||||
|
# take only what you need to...
|
||||||
|
(?= # [look-ahead non-consumptive assertion]
|
||||||
|
[%(punc)s]* # either 0 or more punctuation
|
||||||
|
(?: # [non-grouping parentheses]
|
||||||
|
[^%(any)s] | $ # followed by a non-url char
|
||||||
|
# or end of the string
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
""" % {'urls' : urls,
|
||||||
|
'any' : any,
|
||||||
|
'punc' : punc }
|
||||||
|
|
||||||
|
re_url = re.compile( url, re.VERBOSE | re.MULTILINE )
|
||||||
|
|
||||||
|
|
||||||
# The following defines the HTML header used by all generated pages.
|
# The following defines the HTML header used by all generated pages.
|
||||||
|
@ -291,6 +326,8 @@ class HtmlFormatter( Formatter ):
|
||||||
line = self.make_html_word( words[0] )
|
line = self.make_html_word( words[0] )
|
||||||
for word in words[1:]:
|
for word in words[1:]:
|
||||||
line = line + " " + self.make_html_word( word )
|
line = line + " " + self.make_html_word( word )
|
||||||
|
# handle hyperlinks
|
||||||
|
line = re_url.sub( r'<a href="\1">\1</a>', line )
|
||||||
# convert `...' quotations into real left and right single quotes
|
# convert `...' quotations into real left and right single quotes
|
||||||
line = re.sub( r"(^|\W)`(.*?)'(\W|$)", \
|
line = re.sub( r"(^|\W)`(.*?)'(\W|$)", \
|
||||||
r'\1‘\2’\3', \
|
r'\1‘\2’\3', \
|
||||||
|
|
Loading…
Reference in New Issue