359 lines
10 KiB
Python
359 lines
10 KiB
Python
# Sources (c) 2002, 2003, 2004, 2006, 2007
|
|
# David Turner <david@freetype.org>
|
|
#
|
|
#
|
|
# this file contains definitions of classes needed to decompose
|
|
# C sources files into a series of multi-line "blocks". There are
|
|
# two kinds of blocks:
|
|
#
|
|
# - normal blocks, which contain source code or ordinary comments
|
|
#
|
|
# - documentation blocks, which have restricted formatting, and
|
|
# whose text always start with a documentation markup tag like
|
|
# "<Function>", "<Type>", etc..
|
|
#
|
|
# the routines used to process the content of documentation blocks
|
|
# are not contained here, but in "content.py"
|
|
#
|
|
# the classes and methods found here only deal with text parsing
|
|
# and basic documentation block extraction
|
|
#
|
|
import fileinput, re, sys, os, string
|
|
|
|
|
|
|
|
|
|
|
|
|
|
################################################################
|
|
##
|
|
## BLOCK FORMAT PATTERN
|
|
##
|
|
## A simple class containing compiled regular expressions used
|
|
## to detect potential documentation format block comments within
|
|
## C source code
|
|
##
|
|
## note that the 'column' pattern must contain a group that will
|
|
## be used to "unbox" the content of documentation comment blocks
|
|
##
|
|
class SourceBlockFormat:
|
|
|
|
def __init__( self, id, start, column, end ):
|
|
"""create a block pattern, used to recognize special documentation blocks"""
|
|
|
|
self.id = id
|
|
self.start = re.compile( start, re.VERBOSE )
|
|
self.column = re.compile( column, re.VERBOSE )
|
|
self.end = re.compile( end, re.VERBOSE )
|
|
|
|
|
|
|
|
#
|
|
# format 1 documentation comment blocks look like the following:
|
|
#
|
|
# /************************************/
|
|
# /* */
|
|
# /* */
|
|
# /* */
|
|
# /************************************/
|
|
#
|
|
# we define a few regular expressions here to detect them
|
|
#
|
|
|
|
start = r'''
|
|
\s* # any number of whitespace
|
|
/\*{2,}/ # followed by '/' and at least two asterisks then '/'
|
|
\s*$ # eventually followed by whitespace
|
|
'''
|
|
|
|
column = r'''
|
|
\s* # any number of whitespace
|
|
/\*{1} # followed by '/' and precisely one asterisk
|
|
([^*].*) # followed by anything (group 1)
|
|
\*{1}/ # followed by one asterisk and a '/'
|
|
\s*$ # eventually followed by whitespace
|
|
'''
|
|
|
|
re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
|
|
|
|
#
|
|
# format 2 documentation comment blocks look like the following:
|
|
#
|
|
# /************************************ (at least 2 asterisks)
|
|
# *
|
|
# *
|
|
# *
|
|
# *
|
|
# **/ (1 or more asterisks at the end)
|
|
#
|
|
# we define a few regular expressions here to detect them
|
|
#
|
|
start = r'''
|
|
\s* # any number of whitespace
|
|
/\*{2,} # followed by '/' and at least two asterisks
|
|
\s*$ # eventually followed by whitespace
|
|
'''
|
|
|
|
column = r'''
|
|
\s* # any number of whitespace
|
|
\*{1}(?!/) # followed by precisely one asterisk not followed by `/'
|
|
(.*) # then anything (group1)
|
|
'''
|
|
|
|
end = r'''
|
|
\s* # any number of whitespace
|
|
\*+/ # followed by at least one asterisk, then '/'
|
|
'''
|
|
|
|
re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
|
|
|
|
#
|
|
# the list of supported documentation block formats, we could add new ones
|
|
# relatively easily
|
|
#
|
|
re_source_block_formats = [ re_source_block_format1, re_source_block_format2 ]
|
|
|
|
|
|
#
|
|
# the following regular expressions corresponds to markup tags
|
|
# within the documentation comment blocks. they're equivalent
|
|
# despite their different syntax
|
|
#
|
|
# notice how each markup tag _must_ begin a new line
|
|
#
|
|
re_markup_tag1 = re.compile( r'''\s*<(\w*)>''' ) # <xxxx> format
|
|
re_markup_tag2 = re.compile( r'''\s*@(\w*):''' ) # @xxxx: format
|
|
|
|
#
|
|
# the list of supported markup tags, we could add new ones relatively
|
|
# easily
|
|
#
|
|
re_markup_tags = [ re_markup_tag1, re_markup_tag2 ]
|
|
|
|
#
|
|
# used to detect a cross-reference, after markup tags have been stripped
|
|
#
|
|
re_crossref = re.compile( r'@(\w*)(.*)' )
|
|
|
|
#
|
|
# used to detect italic and bold styles in paragraph text
|
|
#
|
|
re_italic = re.compile( r"_(\w(\w|')*)_(.*)" ) # _italic_
|
|
re_bold = re.compile( r"\*(\w(\w|')*)\*(.*)" ) # *bold*
|
|
|
|
#
|
|
# used to detect the end of commented source lines
|
|
#
|
|
re_source_sep = re.compile( r'\s*/\*\s*\*/' )
|
|
|
|
#
|
|
# used to perform cross-reference within source output
|
|
#
|
|
re_source_crossref = re.compile( r'(\W*)(\w*)' )
|
|
|
|
#
|
|
# a list of reserved source keywords
|
|
#
|
|
re_source_keywords = re.compile( '''\\b ( typedef |
|
|
struct |
|
|
enum |
|
|
union |
|
|
const |
|
|
char |
|
|
int |
|
|
short |
|
|
long |
|
|
void |
|
|
signed |
|
|
unsigned |
|
|
\#include |
|
|
\#define |
|
|
\#undef |
|
|
\#if |
|
|
\#ifdef |
|
|
\#ifndef |
|
|
\#else |
|
|
\#endif ) \\b''', re.VERBOSE )
|
|
|
|
################################################################
|
|
##
|
|
## SOURCE BLOCK CLASS
|
|
##
|
|
## A SourceProcessor is in charge or reading a C source file
|
|
## and decomposing it into a series of different "SourceBlocks".
|
|
## each one of these blocks can be made of the following data:
|
|
##
|
|
## - A documentation comment block that starts with "/**" and
|
|
## whose exact format will be discussed later
|
|
##
|
|
## - normal sources lines, include comments
|
|
##
|
|
## the important fields in a text block are the following ones:
|
|
##
|
|
## self.lines : a list of text lines for the corresponding block
|
|
##
|
|
## self.content : for documentation comment blocks only, this is the
|
|
## block content that has been "unboxed" from its
|
|
## decoration. This is None for all other blocks
|
|
## (i.e. sources or ordinary comments with no starting
|
|
## markup tag)
|
|
##
|
|
class SourceBlock:
|
|
def __init__( self, processor, filename, lineno, lines ):
|
|
self.processor = processor
|
|
self.filename = filename
|
|
self.lineno = lineno
|
|
self.lines = lines[:]
|
|
self.format = processor.format
|
|
self.content = []
|
|
|
|
if self.format == None:
|
|
return
|
|
|
|
words = []
|
|
|
|
# extract comment lines
|
|
lines = []
|
|
|
|
for line0 in self.lines:
|
|
m = self.format.column.match( line0 )
|
|
if m:
|
|
lines.append( m.group(1) )
|
|
|
|
# now, look for a markup tag
|
|
for l in lines:
|
|
l = string.strip(l)
|
|
if len(l) > 0:
|
|
for tag in re_markup_tags:
|
|
if tag.match( l ):
|
|
self.content = lines
|
|
return
|
|
|
|
def location( self ):
|
|
return "(" + self.filename + ":" + repr(self.lineno) + ")"
|
|
|
|
|
|
# debugging only - not used in normal operations
|
|
def dump( self ):
|
|
|
|
if self.content:
|
|
print "{{{content start---"
|
|
for l in self.content:
|
|
print l
|
|
print "---content end}}}"
|
|
return
|
|
|
|
fmt = ""
|
|
if self.format:
|
|
fmt = repr(self.format.id) + " "
|
|
|
|
for line in self.lines:
|
|
print line
|
|
|
|
|
|
################################################################
|
|
##
|
|
## SOURCE PROCESSOR CLASS
|
|
##
|
|
## The SourceProcessor is in charge or reading a C source file
|
|
## and decomposing it into a series of different "SourceBlock"
|
|
## objects.
|
|
##
|
|
## each one of these blocks can be made of the following data:
|
|
##
|
|
## - A documentation comment block that starts with "/**" and
|
|
## whose exact format will be discussed later
|
|
##
|
|
## - normal sources lines, include comments
|
|
##
|
|
##
|
|
class SourceProcessor:
|
|
|
|
def __init__( self ):
|
|
"""initialize a source processor"""
|
|
self.blocks = []
|
|
self.filename = None
|
|
self.format = None
|
|
self.lines = []
|
|
|
|
def reset( self ):
|
|
"""reset a block processor, clean all its blocks"""
|
|
self.blocks = []
|
|
self.format = None
|
|
|
|
|
|
def parse_file( self, filename ):
|
|
"""parse a C source file, and adds its blocks to the processor's list"""
|
|
|
|
self.reset()
|
|
|
|
self.filename = filename
|
|
|
|
fileinput.close()
|
|
self.format = None
|
|
self.lineno = 0
|
|
self.lines = []
|
|
|
|
for line in fileinput.input( filename ):
|
|
|
|
# strip trailing newlines, important on Windows machines !!
|
|
if line[-1] == '\012':
|
|
line = line[0:-1]
|
|
|
|
if self.format == None:
|
|
self.process_normal_line( line )
|
|
|
|
else:
|
|
if self.format.end.match( line ):
|
|
# that's a normal block end, add it to lines and
|
|
# create a new block
|
|
self.lines.append( line )
|
|
self.add_block_lines()
|
|
|
|
elif self.format.column.match( line ):
|
|
# that's a normal column line, add it to 'lines'
|
|
self.lines.append( line )
|
|
|
|
else:
|
|
# humm.. this is an unexpected block end,
|
|
# create a new block, but don't process the line
|
|
self.add_block_lines()
|
|
|
|
# we need to process the line again
|
|
self.process_normal_line( line )
|
|
|
|
# record the last lines
|
|
self.add_block_lines()
|
|
|
|
|
|
|
|
def process_normal_line( self, line ):
|
|
"""process a normal line and check if it's the start of a new block"""
|
|
for f in re_source_block_formats:
|
|
if f.start.match( line ):
|
|
self.add_block_lines()
|
|
self.format = f
|
|
self.lineno = fileinput.filelineno()
|
|
|
|
self.lines.append( line )
|
|
|
|
|
|
|
|
def add_block_lines( self ):
|
|
"""add the current accumulated lines, and create a new block"""
|
|
if self.lines != []:
|
|
block = SourceBlock( self, self.filename, self.lineno, self.lines )
|
|
|
|
self.blocks.append( block )
|
|
self.format = None
|
|
self.lines = []
|
|
|
|
|
|
# debugging only, not used in normal operations
|
|
def dump( self ):
|
|
"""print all blocks in a processor"""
|
|
for b in self.blocks:
|
|
b.dump()
|
|
|
|
# eof
|