[docmaker] Recognise URLs.
* src/tools/docmaker/tohtml.py (re_url): New regular expression. (make_html_para): Use it.
This commit is contained in:
parent
3da5182804
commit
c7cc9ebe20
|
@ -1,3 +1,10 @@
|
|||
2013-06-25 Werner Lemberg <wl@gnu.org>
|
||||
|
||||
[docmaker] Recognise URLs.
|
||||
|
||||
* src/tools/docmaker/tohtml.py (re_url): New regular expression.
|
||||
(make_html_para): Use it.
|
||||
|
||||
2013-06-19 Werner Lemberg <wl@gnu.org>
|
||||
|
||||
* Version 2.5.0.1 released.
|
||||
|
|
|
@ -1,11 +1,46 @@
|
|||
# ToHTML (c) 2002, 2003, 2005, 2006, 2007, 2008
|
||||
# ToHTML (c) 2002, 2003, 2005-2008, 2013
|
||||
# David Turner <david@freetype.org>
|
||||
|
||||
from sources import *
|
||||
from content import *
|
||||
from formatter import *
|
||||
|
||||
import time
|
||||
import time, re
|
||||
|
||||
|
||||
# this regular expression code to identify an URL has been taken from
|
||||
#
|
||||
# http://mail.python.org/pipermail/tutor/2002-September/017228.html
|
||||
#
|
||||
# (with slight modifications)
|
||||
|
||||
urls = r'(?:https?|telnet|gopher|file|wais|ftp)'
|
||||
ltrs = r'\w'
|
||||
gunk = r'/#~:.?+=&%@!\-'
|
||||
punc = r'.:?\-'
|
||||
any = "%(ltrs)s%(gunk)s%(punc)s" % { 'ltrs' : ltrs,
|
||||
'gunk' : gunk,
|
||||
'punc' : punc }
|
||||
url = r"""
|
||||
(
|
||||
\b # start at word boundary
|
||||
%(urls)s : # need resource and a colon
|
||||
[%(any)s] +? # followed by one or more of any valid
|
||||
# character, but be conservative and
|
||||
# take only what you need to...
|
||||
(?= # [look-ahead non-consumptive assertion]
|
||||
[%(punc)s]* # either 0 or more punctuation
|
||||
(?: # [non-grouping parentheses]
|
||||
[^%(any)s] | $ # followed by a non-url char
|
||||
# or end of the string
|
||||
)
|
||||
)
|
||||
)
|
||||
""" % {'urls' : urls,
|
||||
'any' : any,
|
||||
'punc' : punc }
|
||||
|
||||
re_url = re.compile( url, re.VERBOSE | re.MULTILINE )
|
||||
|
||||
|
||||
# The following defines the HTML header used by all generated pages.
|
||||
|
@ -291,6 +326,8 @@ class HtmlFormatter( Formatter ):
|
|||
line = self.make_html_word( words[0] )
|
||||
for word in words[1:]:
|
||||
line = line + " " + self.make_html_word( word )
|
||||
# handle hyperlinks
|
||||
line = re_url.sub( r'<a href="\1">\1</a>', line )
|
||||
# convert `...' quotations into real left and right single quotes
|
||||
line = re.sub( r"(^|\W)`(.*?)'(\W|$)", \
|
||||
r'\1‘\2’\3', \
|
||||
|
|
Loading…
Reference in New Issue