* src/tools/docmaker/*: adding new (more advanced) version of

the DocMaker tool. Python with regular expressions rocks..
2002-07-30 18:49:52 +00:00 · 2002-07-30 18:49:52 +00:00 · 5ea78be3a5
commit 5ea78be3a5
parent 1ff88d285d
7 changed files with 1780 additions and 0 deletions
--- a/3
+++ b/3
@ -4,6 +4,9 @@
        to demonstrate a "cleaner" API to support incremental font loading.
        comments appreciated...

+        * src/tools/docmaker/*: adding new (more advanced) version of
+        the DocMaker tool. Python with regular expressions rocks..
+
 2002-07-28  Werner Lemberg  <wl@gnu.org>

 	s/ft_memset/FT_MEM_SET/.
--- a/src/tools/docmaker/content.py
+++ b/src/tools/docmaker/content.py
@ -0,0 +1,547 @@
+#
+#  this file contains routines used to parse the content of documentation
+#  comment block and build a more structured objects out of them
+#
+
+from sources import *
+from utils import *
+import string, re
+
+
+# this regular expresion is used to detect code sequences. these
+# are simply code fragments embedded in '{' and '}' like in:
+#
+#  {
+#    x = y + z;
+#    if ( zookoo == 2 )
+#    {
+#      foobar();
+#    }
+#  }
+#
+# note that identation of the starting and ending accolades must be
+# exactly the same. the code sequence can contain accolades at greater
+# indentation
+#
+re_code_start = re.compile( r"(\s*){\s*$" )
+re_code_end   = re.compile( r"(\s*)}\s*$" )
+
+
+# this regular expression is used to isolate identifiers from
+# other text
+#
+re_identifier = re.compile( r'(\w*)' )
+
+
+#############################################################################
+#
+# The DocCode class is used to store source code lines.
+#
+#   'self.lines' contains a set of source code lines that will be dumped as
+#   HTML in a <PRE> tag.
+#
+#   The object is filled line by line by the parser; it strips the leading
+#   "margin" space from each input line before storing it in 'self.lines'.
+#
+class DocCode:
+
+    def __init__( self, margin, lines ):
+        self.lines  = []
+        self.words  = None
+
+        # remove margin spaces
+        for l in lines:
+            if string.strip( l[:margin] ) == "":
+                l = l[margin:]
+            self.lines.append( l )
+
+    def dump( self, prefix = "", width=60 ):
+        for l in self.lines:
+            print prefix + l
+
+
+#############################################################################
+#
+# The DocPara class is used to store "normal" text paragraph.
+#
+#   'self.words' contains the list of words that make up the paragraph
+#
+class DocPara:
+
+    def __init__( self, lines ):
+        self.lines = None
+        self.words = []
+        for l in lines:
+            l = string.strip(l)
+            self.words.extend( string.split( l ) )
+
+    def dump( self, prefix = "", width = 60 ):
+        cur  = ""  # current line
+        col  = 0   # current width
+
+        for word in self.words:
+            ln = len(word)
+            if col > 0:
+                ln = ln+1
+
+            if col + ln > width:
+                print prefix + cur
+                cur = word
+                col = len(word)
+            else:
+                if col > 0:
+                    cur = cur + " "
+                cur = cur + word
+                col = col + ln
+
+        if col > 0:
+            print prefix + cur
+
+
+
+#############################################################################
+#
+#  The DocField class is used to store a list containing either DocPara or
+#  DocCode objects. Each DocField also has an optional "name" which is used
+#  when the object corresponds to a field of value definition
+#
+class DocField:
+
+    def __init__( self, name, lines ):
+
+        self.name  = name  # can be None for normal paragraphs/sources
+        self.items = []     # list of items
+
+        mode_none  = 0   # start parsing mode
+        mode_code  = 1   # parsing code sequences
+        mode_para  = 3   # parsing normal paragraph
+
+        margin     = -1  # current code sequence indentation
+        cur_lines  = []
+
+        # now analyze the markup lines to see if they contain paragraphs,
+        # code sequences or fields definitions
+        #
+        start = 0
+        mode  = mode_none
+        for l in lines:
+
+            # are we parsing a code sequence ?
+            if mode == mode_code:
+
+                m = re_code_end.match( l )
+                if m and len(m.group(1)) <= margin:
+                    # that's it, we finised the code sequence
+                    code = DocCode( margin, cur_lines )
+                    self.items.append( code )
+                    margin    = -1
+                    cur_lines = []
+                    mode      = mode_none
+                else:
+                    # nope, continue the code sequence
+                    cur_lines.append( l[margin:] )
+            else:
+                # start of code sequence ?
+                m = re_code_start.match( l )
+                if m:
+                    # save current lines
+                    if cur_lines:
+                        para = DocPara( cur_lines )
+                        self.items.append( para )
+                        cur_lines = []
+
+                    # switch to code extraction mode
+                    margin = len(m.group(1))
+                    mode   = mode_code
+
+                else:
+                    if not string.split( l ) and cur_lines:
+                        # if the line is empty, we end the current paragraph,
+                        # if any
+                        para = DocPara( cur_lines )
+                        self.items.append( para )
+                        cur_lines = []
+                    else:
+                        # otherwise, simply add the line to the current
+                        # paragraph
+                        cur_lines.append( l )
+
+        if mode == mode_code:
+            # unexpected end of code sequence
+            code = DocCode( margin, cur_lines )
+            self.items.append( code )
+
+        elif cur_lines:
+            para = DocPara( cur_lines )
+            self.items.append( para )
+
+    def dump( self, prefix = "" ):
+        if self.field:
+            print prefix + self.field + " ::"
+            prefix = prefix + "----"
+
+        first = 1
+        for p in self.items:
+            if not first:
+                print ""
+            p.dump( prefix )
+            first = 0
+
+
+# this regular expression is used to detect field definitions
+#
+re_field  = re.compile( r"\s*(\w*)\s*::" )
+
+
+
+class DocMarkup:
+
+    def __init__( self, tag, lines ):
+        self.tag       = string.lower(tag)
+        self.fields    = []
+
+        cur_lines = []
+        field     = None
+        mode      = 0
+
+        for l in lines:
+            m = re_field.match( l )
+            if m:
+                # we detected the start of a new field definition
+
+                # first, save the current one
+                if cur_lines:
+                    f = DocField( field, cur_lines )
+                    self.fields.append( f )
+                    cur_lines = []
+                    field     = None
+
+                field     = m.group(1)   # record field name
+                ln        = len(m.group(0))
+                l         = " "*ln + l[ln:]
+                cur_lines = [ l ]
+            else:
+                cur_lines.append( l )
+
+        if field or cur_lines:
+            f = DocField( field, cur_lines )
+            self.fields.append( f )
+
+    def get_name( self ):
+        try:
+            return self.fields[0].items[0].words[0]
+
+        except:
+            return None
+
+    def dump( self, margin ):
+        print " "*margin + "<" + self.tag + ">"
+        for f in self.fields:
+            f.dump( "  " )
+        print " "*margin + "</" + self.tag + ">"
+
+
+
+
+class DocChapter:
+
+    def __init__( self, block ):
+        self.block    = block
+        self.sections = []
+        if block:
+            self.name     = block.name
+            self.title    = block.get_markup_words( "title" )
+            self.order    = block.get_markup_words( "sections" )
+        else:
+            self.name     = "Other"
+            self.title    = string.split( "Miscellaneous" )
+            self.order    = []
+
+
+
+class DocSection:
+
+    def __init__( self, name = "Other" ):
+        self.name        = name
+        self.blocks      = {}
+        self.block_names = []  # ordered block names in section
+        self.defs        = []
+        self.abstract    = ""
+        self.description = ""
+        self.order       = []
+        self.title       = "ERROR"
+        self.chapter     = None
+
+    def add_def( self, block ):
+        self.defs.append( block )
+
+    def add_block( self, block ):
+        self.block_names.append( block.name )
+        self.blocks[ block.name ] = block
+
+    def process( self ):
+        # lookup one block that contains a valid section description
+        for block in self.defs:
+            title = block.get_markup_text( "Title" )
+            if title:
+                self.title       = title
+                self.abstract    = block.get_markup_words( "abstract" )
+                self.description = block.get_markup_items( "description" )
+                self.order       = block.get_markup_words( "order" )
+                return
+                
+    def reorder( self ):
+        
+        self.block_names = sort_order_list( self.block_names, self.order )
+
+
+class ContentProcessor:
+
+    def __init__( self ):
+        """initialize a block content processor"""
+        self.reset()
+        
+        self.sections = {}    # dictionary of documentation sections
+        self.section  = None  # current documentation section
+
+        self.chapters = []        # list of chapters
+
+    def set_section( self, section_name ):
+        """set current section during parsing"""
+        if not self.sections.has_key( section_name ):
+            section = DocSection( section_name )
+            self.sections[ section_name ] = section
+            self.section                  = section
+        else:
+            self.section = self.sections[ section_name ]
+
+    def add_chapter( self, block ):
+        chapter = DocChapter( block )
+        self.chapters.append( chapter )
+
+
+    def reset( self ):
+        """reset the content processor for a new block"""
+        self.markups      = []
+        self.markup       = None
+        self.markup_lines = []
+
+    def add_markup( self ):
+        """add a new markup section"""
+        if self.markup and self.markup_lines:
+
+            # get rid of last line of markup if it's empty
+            marks = self.markup_lines
+            if len(marks) > 0 and not string.strip(marks[-1]):
+                self.markup_lines = marks[:-1]
+
+            m = DocMarkup( self.markup, self.markup_lines )
+
+            self.markups.append( m )
+
+            self.markup       = None
+            self.markup_lines = []
+
+
+    def process_content( self, content ):
+        """process a block content and return a list of DocMarkup objects
+           corresponding to it"""
+        markup       = None
+        markup_lines = []
+        first        = 1
+
+        for line in content:
+            found = None
+            for t in re_markup_tags:
+                m = t.match( line )
+                if m:
+                    found  = string.lower(m.group(1))
+                    prefix = len(m.group(0))
+                    line   = " "*prefix + line[prefix:]   # remove markup from line
+                    break
+
+            # is it the start of a new markup section ?
+            if found:
+                first = 0
+                self.add_markup()  # add current markup content
+                self.markup = found
+                if len(string.strip( line )) > 0:
+                    self.markup_lines.append( line )
+            elif first == 0:
+                self.markup_lines.append( line )
+
+        self.add_markup()
+
+        return self.markups
+
+
+    def  parse_sources( self, source_processor ):
+        blocks = source_processor.blocks
+        count  = len(blocks)
+        for n in range(count):
+            
+            source = blocks[n]
+            if source.content:
+                # this is a documentation comment, we need to catch
+                # all following normal blocks in the "follow" list
+                #
+                follow = []
+                m = n+1
+                while m < count and not blocks[m].content:
+                    follow.append( blocks[m] )
+                    m = m+1
+
+                doc_block = DocBlock( source, follow, self )
+    
+    
+    def  finish( self ):
+
+        # process all sections to extract their abstract, description
+        # and ordered list of items
+        #
+        for sec in self.sections.values():
+            sec.process()
+
+        # process chapters to check that all sections are correctly
+        # listed there
+        for chap in self.chapters:
+            for sec in chap.order:
+                if self.sections.has_key(sec):
+                    section = self.sections[ sec ]
+                    section.chapter = chap
+                    section.reorder()
+                    chap.sections.append( section )
+                else:
+                    sys.stderr.write( "WARNING: chapter '" +
+                        chap.name + "' in " + chap.block.location() + \
+                        " lists unknown section '" + sec + "'\n" )
+
+        # check that all sections are in a chapter
+        #
+        others = []
+        for sec in self.sections.values():
+            if not sec.chapter:
+                others.append(sec)
+
+        # create a new special chapter for all remaining sections
+        # when necessary
+        #
+        if others:
+            chap = DocChapter( None )
+            chap.sections = others
+            self.chapters.append( chap )
+            
+
+
+class DocBlock:
+
+    def __init__( self, source, follow, processor ):
+        
+        processor.reset()
+
+        self.source    = source
+        self.code      = []
+        self.type      = "ERRTYPE"
+        self.name      = "ERRNAME"
+        self.section   = processor.section
+        self.markups   = processor.process_content( source.content )
+
+        # compute block type from first markup tag
+        try:
+            self.type = self.markups[0].tag
+        except:
+            pass
+        
+            
+        # compute block name from first markup paragraph
+        try:
+            markup = self.markups[0]
+            para   = markup.fields[0].items[0]
+            name   = para.words[0]
+            m = re_identifier.match( name )
+            if m:
+                name = m.group(1)
+            self.name = name
+        except:
+            pass
+
+        # detect new section starts
+        if self.type == "section":
+            processor.set_section( self.name )
+            processor.section.add_def( self )
+
+        # detect new chapter
+        elif self.type == "chapter":
+            processor.add_chapter( self )
+
+        else:
+            processor.section.add_block( self )
+
+        # now, compute the source lines relevant to this documentation
+        # block. We keep normal comments in for obvious reasons (??)
+        source = []
+        for b in follow:
+            if b.format:
+                break
+            for l in b.lines:
+                # we use "/* */" as a separator
+                if re_source_sep.match( l ):
+                    break
+                source.append( l )
+
+        # now strip the leading and trailing empty lines from the sources
+        start = 0
+        end   = len( source )-1
+        
+        while start < end and not string.strip( source[start] ):
+            start = start + 1
+
+        while start < end and not string.strip( source[end] ):
+            end = end - 1
+
+        source = source[start:end+1]
+
+        self.code = source
+
+
+    def location( self ):
+        return self.source.location()
+
+
+
+    def get_markup( self, tag_name ):
+        """return the DocMarkup corresponding to a given tag in a block"""
+        for m in self.markups:
+            if m.tag == string.lower(tag_name):
+                return m
+        return None
+
+
+    def get_markup_name( self, tag_name ):
+        """return the name of a given primary markup in a block"""
+        try:
+            m = self.get_markup( tag_name )
+            return m.get_name()
+        except:
+            return None
+
+
+    def get_markup_words( self, tag_name ):
+        try:
+            m = self.get_markup( tag_name )
+            return m.fields[0].items[0].words
+        except:
+            return []
+
+
+    def get_markup_text( self, tag_name ):
+        result = self.get_markup_words( tag_name )
+        return string.join( result )
+
+
+    def get_markup_items( self, tag_name ):
+        try:
+            m = self.get_markup( tag_name )
+            return m.fields[0].items
+        except:
+            return None
--- a/src/tools/docmaker/docmaker.py
+++ b/src/tools/docmaker/docmaker.py
@ -0,0 +1,120 @@
+#!/usr/bin/env python
+#
+#  DocMaker 0.2 (c) 2002 David Turner <david@freetype.org>
+#
+# This program is a re-write of the original DocMaker took used
+# to generate the API Reference of the FreeType font engine
+# by converting in-source comments into structured HTML
+#
+# This new version is capable of outputting XML data, as well
+# as accepts more liberal formatting options
+#
+# It also uses regular expression matching and substitution
+# to speed things significantly
+#
+
+from sources import *
+from content import *
+from tohtml  import *
+
+import sys, os, time, string, glob, getopt
+
+
+def file_exists( pathname ):
+    """checks that a given file exists"""
+    result = 1
+    try:
+        file = open( pathname, "r" )
+        file.close()
+    except:
+        result = None
+        sys.err.write( pathname + " couldn't be accessed\n" )
+
+    return result
+
+
+def make_file_list( args = None ):
+    """builds a list of input files from command-line arguments"""
+
+    file_list = []
+    # sys.stderr.write( repr( sys.argv[1 :] ) + '\n' )
+
+    if not args:
+        args = sys.argv[1 :]
+
+    for pathname in args:
+        if string.find( pathname, '*' ) >= 0:
+            newpath = glob.glob( pathname )
+            newpath.sort()  # sort files -- this is important because
+                            # of the order of files
+        else:
+            newpath = [pathname]
+            
+        last = len( file_list )
+        file_list[last : last] = newpath
+
+    if len( file_list ) == 0:
+        file_list = None
+    else:
+        # now filter the file list to remove non-existing ones
+        file_list = filter( file_exists, file_list )
+    
+    return file_list
+
+
+
+def usage():
+    print "\nDocMaker 0.2 Usage information\n"
+    print "  docmaker [options] file1 [ file2 ... ]\n"
+    print "using the following options:\n"
+    print "  -h : print this page"
+    
+
+def main( argv ):
+    """main program loop"""
+
+    try:
+        opts, args = getopt.getopt( argv[1:],"h", [ "help" ] )
+
+    except getopt.GetoptError:
+        usage()
+        sys.exit( 2 )
+
+    if args == []:
+        usage()
+        sys.exit( 1 )
+
+    # process options
+    #
+    for opt in opts:
+        if opt[0] in ( "-h", "--help" ):
+            usage()
+            sys.exit( 0 )
+
+    # create context and processor
+    source_processor  = SourceProcessor()
+    content_processor = ContentProcessor()
+
+    # retrieve the list of files to process
+    file_list = make_file_list()
+    for filename in file_list:
+        source_processor.parse_file( filename )
+        content_processor.parse_sources( source_processor )
+        
+    # process sections
+    content_processor.finish()
+
+    formatter = HtmlFormatter( content_processor, "Example", "zz" )
+
+    formatter.toc_dump()
+    formatter.index_dump()
+    formatter.section_dump_all()
+
+
+# if called from the command line
+#
+if __name__ == '__main__':
+    main( sys.argv )
+
+
+# eof
--- a/src/tools/docmaker/formatter.py
+++ b/src/tools/docmaker/formatter.py
@ -0,0 +1,194 @@
+from sources import *
+from content import *
+from utils   import *
+
+class Formatter:
+
+    def __init__( self, processor ):
+
+        self.processor   = processor
+        self.identifiers = {}
+        self.chapters    = processor.chapters
+        self.sections    = processor.sections.values()
+        self.block_index = []
+
+        # store all blocks in a dictionary
+        self.blocks      = []
+        for section in self.sections:
+            for block in section.blocks.values():
+                self.add_identifier( block.name, block )
+                    
+                # add enumeration values to the index, since this is useful
+                for markup in block.markups:
+                    if markup.tag == 'values':
+                        for field in markup.fields:
+                            self.add_identifier( field.name, block )
+
+
+        self.block_index = self.identifiers.keys()
+        self.block_index.sort( index_sort )
+
+
+    def add_identifier( self, name, block ):
+        if self.identifiers.has_key( name ):
+            # duplicate name !!
+            sys.stderr.write( \
+               "WARNING: duplicate definition for '" + name + "' in " + \
+               block.location() + ", previous definition in " +         \
+               self.identifiers[ name ].location() + "\n" )
+        else:
+            self.identifiers[name] = block
+              
+
+    #
+    #  Formatting the table of contents
+    #
+
+    def  toc_enter( self ):
+        pass
+    
+    def  toc_chapter_enter( self, chapter ):
+        pass
+    
+    def  toc_section_enter( self, section ):
+        pass
+        
+    def  toc_section_exit( self, section ):
+        pass
+        
+    def  toc_chapter_exit( self, chapter ):
+        pass
+
+    def  toc_index( self, index_filename ):
+        pass
+    
+    def  toc_exit( self ):
+        pass
+
+    def  toc_dump( self, toc_filename = None, index_filename = None ):
+        
+        output = None
+        if toc_filename:
+            output = open_output( toc_filename )
+        
+        self.toc_enter()
+    
+        for chap in self.processor.chapters:
+    
+            self.toc_chapter_enter( chap )
+    
+            for section in chap.sections:
+                self.toc_section_enter( section )
+                self.toc_section_exit( section )
+    
+            self.toc_chapter_exit ( chap )
+    
+        self.toc_index( index_filename )
+    
+        self.toc_exit()
+
+        if output:
+            close_output( output )
+    
+    #
+    #  Formatting the index
+    #
+
+    def  index_enter( self ):
+        pass
+
+    def  index_name_enter( self, name ):
+        pass
+
+    def  index_name_exit( self, name ):
+        pass
+
+    def  index_exit( self ):
+        pass
+
+    def  index_dump( self, index_filename = None ):
+        
+        output = None
+        if index_filename:
+            output = open_output( index_filename )
+
+        self.index_enter()
+
+        for name in self.block_index:
+            self.index_name_enter( name )
+            self.index_name_exit ( name )
+
+        self.index_exit()
+     
+        if output:
+            close_output( output )
+     
+    #
+    #  Formatting a section
+    #
+    def  section_enter( self, section ):
+        pass
+    
+    def  block_enter( self, block ):
+        pass
+    
+    def  markup_enter( self, markup, block = None ):
+        pass
+    
+    def  field_enter( self, field, markup = None, block = None ):
+        pass
+        
+    def  field_exit( self, field, markup = None, block = None ):
+        pass
+    
+    def  markup_exit( self, markup, block = None ):
+        pass
+        
+    def  block_exit( self, block ):
+        pass
+
+    def  section_exit( self, section ):
+        pass
+
+
+    def  section_dump( self, section, section_filename = None ):
+        
+        output = None
+        if section_filename:
+            output = open_output( section_filename )
+        
+        self.section_enter( section )
+
+        for name in section.block_names:
+            block = self.identifiers[ name ]
+            self.block_enter( block )
+
+            for markup in block.markups[1:]:   # always ignore first markup !!
+                self.markup_enter( markup, block )
+
+                for field in markup.fields:
+                    self.field_enter( field, markup, block )
+
+                    self.field_exit ( field, markup, block )
+
+                self.markup_exit( markup, block )
+
+            self.block_exit( block )
+
+        self.section_exit ( section )
+
+        if output:
+            close_output( output )
+
+
+    def section_dump_all( self ):
+        for section in self.sections:
+            self.section_dump( section )
+
+    #
+    #  Formatting a block
+    #
+
+
+
+
--- a/src/tools/docmaker/sources.py
+++ b/src/tools/docmaker/sources.py
@ -0,0 +1,355 @@
+#
+# this file contains definitions of classes needed to decompose
+# C sources files into a series of multi-line "blocks". There are
+# two kinds of blocks:
+#
+#   - normal blocks, which contain source code or ordinary comments
+#
+#   - documentation blocks, which have restricted formatting, and
+#     whose text always start with a documentation markup tag like
+#     "<Function>", "<Type>", etc..
+#
+# the routines used to process the content of documentation blocks
+# are not contained here, but in "doccontent.py"
+#
+# the classes and methods found here only deal with text parsing
+# and basic documentation block extraction
+#
+import fileinput, re, sys, os, string
+
+
+
+
+
+
+################################################################
+##
+##  BLOCK FORMAT PATTERN
+##
+##   A simple class containing compiled regular expressions used
+##   to detect potential documentation format block comments within
+##   C source code
+##
+##   note that the 'column' pattern must contain a group that will
+##   be used to "unbox" the content of documentation comment blocks
+##
+class SourceBlockFormat:
+
+    def __init__( self, id, start, column, end ):
+        """create a block pattern, used to recognize special documentation blocks"""
+
+        self.id     = id
+        self.start  = re.compile( start, re.VERBOSE )
+        self.column = re.compile( column, re.VERBOSE )
+        self.end    = re.compile( end, re.VERBOSE )
+
+
+
+#
+# format 1 documentation comment blocks look like the following:
+#
+#    /************************************/
+#    /*                                  */
+#    /*                                  */
+#    /*                                  */
+#    /************************************/
+#
+# we define a few regular expressions here to detect them
+#
+
+start = r'''
+  \s*       # any number of whitespace
+  /\*{2,}/  # followed by '/' and at least two asterisks then '/'
+  \s*$      # eventually followed by whitespace
+'''
+
+column = r'''
+  \s*      # any number of whitespace
+  /\*{1}   # followed by '/' and precisely one asterisk
+  ([^*].*) # followed by anything (group 1)
+  \*{1}/   # followed by one asterisk and a '/'
+  \s*$     # enventually followed by whitespace
+'''
+
+re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
+
+#
+# format 2 documentation comment blocks look like the following:
+#
+#    /************************************
+#     *
+#     *                                                                    
+#     *                                                                    
+#     *                                                                    
+#     **/       (1 or more asterisks at the end)
+#
+# we define a few regular expressions here to detect them
+#
+start = r'''
+  \s*     # any number of whitespace
+  /\*{2,} # followed by '/' and at least two asterisks
+  \s*$    # eventually followed by whitespace
+'''
+
+column = r'''
+  \s*         # any number of whitespace
+  \*{1}       # followed by precisely one asterisk
+  (.*)        # followed by anything (group1)
+'''
+
+end = r'''
+  \s*     # any number of whitespace
+  \*+/    # followed by at least on asterisk, then '/'
+'''
+
+re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
+
+#
+# the list of supported documentation block formats, we could add new ones
+# relatively easily
+#
+re_source_block_formats = [ re_source_block_format1, re_source_block_format2 ]
+
+
+#
+# the following regular expressions corresponds to markup tags
+# within the documentation comment blocks. they're equivalent
+# despite their different syntax
+#
+# notice how each markup tag _must_ begin a new line
+#
+re_markup_tag1 = re.compile( r'''\s*<(\w*)>''' )  # <xxxx> format
+re_markup_tag2 = re.compile( r'''\s*@(\w*):''' )  # @xxxx: format
+
+#
+# the list of supported markup tags, we could add new ones relatively
+# easily
+#
+re_markup_tags = [ re_markup_tag1, re_markup_tag2 ]
+
+#
+# used to detect a cross-reference, after markup tags have been stripped
+#
+re_crossref = re.compile( r'@(\w*)' )
+
+#
+# used to detect italic and bold styles in paragraph text
+#
+re_italic = re.compile( r'_(\w+)_' )
+re_bold   = re.compile( r'\*(\w+)\*' )
+
+#
+# used to detect the end of commented source lines
+#
+re_source_sep = re.compile( r'\s*/\*\s*\*/' )
+
+#
+# used to perform cross-reference within source output
+#
+re_source_crossref = re.compile( r'(\W*)(\w*)' )
+
+#
+# a list of reserved source keywords
+#
+re_source_keywords = re.compile( '''( typedef | 
+                                       struct |
+                                       enum   |
+                                       union  |
+                                       const  |
+                                       char   |
+                                       int    |
+                                       short  |
+                                       long   |
+                                       void   |
+                                       signed |
+                                       unsigned |
+                                       \#include |
+                                       \#define  |
+                                       \#undef   |
+                                       \#if      |
+                                       \#ifdef   |
+                                       \#ifndef  |
+                                       \#else    |
+                                       \#endif   )''', re.VERBOSE )
+
+################################################################
+##
+##  SOURCE BLOCK CLASS
+##
+##   A SourceProcessor is in charge or reading a C source file
+##   and decomposing it into a series of different "SourceBlocks".
+##   each one of these blocks can be made of the following data:
+##
+##   - A documentation comment block that starts with "/**" and
+##     whose exact format will be discussed later
+##
+##   - normal sources lines, include comments
+##
+##   the important fields in a text block are the following ones:
+##
+##     self.lines   : a list of text lines for the corresponding block
+##
+##     self.content : for documentation comment blocks only, this is the
+##                    block content that has been "unboxed" from its
+##                    decoration. This is None for all other blocks
+##                    (i.e. sources or ordinary comments with no starting
+##                     markup tag)
+##
+class SourceBlock:
+    def __init__( self, processor, filename, lineno, lines ):
+        self.processor = processor
+        self.filename  = filename
+        self.lineno    = lineno
+        self.lines     = lines
+        self.format    = processor.format
+        self.content   = []
+
+        if self.format == None:
+            return
+
+        words = []
+
+        # extract comment lines
+        lines = []
+
+        for line0 in self.lines[1:]:
+            m = self.format.column.match( line0 )
+            if m:
+                lines.append( m.group(1) )
+
+        # now, look for a markup tag
+        for l in lines:
+            l = string.strip(l)
+            if len(l) > 0:
+                for tag in re_markup_tags:
+                    if tag.match( l ):
+                        self.content = lines
+                return
+
+    def location( self ):
+        return "(" + self.filename + ":" + repr(self.lineno) + ")"
+
+
+    # debugging only - not used in normal operations
+    def dump( self ):
+        
+        if self.content:
+            print "{{{content start---"
+            for l in self.content:
+                print l
+            print "---content end}}}"
+            return
+            
+        fmt = ""
+        if self.format:
+            fmt = repr(self.format.id) + " "
+        
+        for line in self.lines:
+            print line
+
+
+################################################################
+##
+##  SOURCE PROCESSOR CLASS
+##
+##   The SourceProcessor is in charge or reading a C source file
+##   and decomposing it into a series of different "SourceBlock"
+##   objects.
+##
+##   each one of these blocks can be made of the following data:
+##
+##   - A documentation comment block that starts with "/**" and
+##     whose exact format will be discussed later
+##
+##   - normal sources lines, include comments
+##
+##
+class SourceProcessor:
+
+    def  __init__( self ):
+        """initialize a source processor"""
+        self.blocks   = []
+        self.filename = None
+        self.format   = None
+        self.lines    = []
+
+    def  reset( self ):
+        """reset a block processor, clean all its blocks"""
+        self.blocks = []
+        self.format = None
+
+
+    def  parse_file( self, filename ):
+        """parse a C source file, and adds its blocks to the processor's list"""
+        
+        self.reset()
+        
+        self.filename = filename
+        
+        fileinput.close()
+        self.format    = None
+        self.lineno    = 0
+        self.lines     = []
+
+        for line in fileinput.input( filename ):
+            
+            # strip trailing newlines, important on Windows machines !!
+            if  line[-1] == '\012':
+                line = line[0:-1]
+    
+            if self.format == None:
+                self.process_normal_line( line )
+
+            else:
+                if self.format.end.match( line ):
+                    # that's a normal block end, add it to lines and
+                    # create a new block
+                    self.lines.append( line )
+                    self.add_block_lines()
+                    
+                elif self.format.column.match( line ):
+                    # that's a normal column line, add it to 'lines'
+                    self.lines.append( line )
+                        
+                else:
+                    # humm.. this is an unexcepted block end,
+                    # create a new block, but don't process the line
+                    self.add_block_lines()
+                    
+                    # we need to process the line again
+                    self.process_normal_line( line )
+                                
+        # record the last lines
+        self.add_block_lines()
+
+        
+
+    def process_normal_line( self, line ):
+        """process a normal line and check if it's the start of a new block"""
+        for f in re_source_block_formats:
+          if f.start.match( line ):
+            self.add_block_lines()
+            self.format = f
+            self.lineno = fileinput.filelineno()
+
+        self.lines.append( line )
+
+    
+
+    def add_block_lines( self ):
+        """add the current accumulated lines, and create a new block"""
+        if self.lines != []:
+            block = SourceBlock( self, self.filename, self.lineno, self.lines )
+            
+            self.blocks.append( block )
+            self.format = None
+            self.lines  = []
+
+    
+    # debugging only, not used in normal operations
+    def dump( self ):
+        """print all blocks in a processor"""
+        for b in self.blocks:
+            b.dump()
+
+# eof
--- a/src/tools/docmaker/tohtml.py
+++ b/src/tools/docmaker/tohtml.py
@ -0,0 +1,475 @@
+from sources import *
+from content import *
+from formatter import *
+import time
+
+# The following defines the HTML header used by all generated pages.
+#
+html_header_1 = """\
+<html>
+<header>
+<title>"""
+
+html_header_2= """ API Reference</title>
+<basefont face="Verdana,Geneva,Arial,Helvetica">
+<style content="text/css">
+  P { text-align=justify }
+  H1 { text-align=center }
+  LI { text-align=justify }
+</style>
+</header>
+<body text=#000000
+      bgcolor=#FFFFFF
+      link=#0000EF
+      vlink=#51188E
+      alink=#FF0000>
+<center><h1>"""
+
+html_header_3=""" API Reference</h1></center>
+"""
+
+
+
+# The HTML footer used by all generated pages.
+#
+html_footer = """\
+</body>
+</html>"""
+
+# The header and footer used for each section.
+#
+section_title_header = "<center><h1>"
+section_title_footer = "</h1></center>"
+
+# The header and footer used for code segments.
+#
+code_header = "<font color=blue><pre>"
+code_footer = "</pre></font>"
+
+# Paragraph header and footer.
+#
+para_header = "<p>"
+para_footer = "</p>"
+
+# Block header and footer.
+#
+block_header = "<center><table width=75%><tr><td>"
+block_footer = "</td></tr></table><hr width=75%></center>"
+
+# Description header/footer.
+#
+description_header = "<center><table width=87%><tr><td>"
+description_footer = "</td></tr></table></center><br>"
+
+# Marker header/inter/footer combination.
+#
+marker_header = "<center><table width=87% cellpadding=5><tr bgcolor=#EEEEFF><td><em><b>"
+marker_inter  = "</b></em></td></tr><tr><td>"
+marker_footer = "</td></tr></table></center>"
+
+# Source code extracts header/footer.
+#
+source_header = "<center><table width=87%><tr bgcolor=#D6E8FF width=100%><td><pre>\n"
+source_footer = "\n</pre></table></center><br>"
+
+# Chapter header/inter/footer.
+#
+chapter_header = "<br><center><table width=75%><tr><td><h2>"
+chapter_inter  = "</h2><ul>"
+chapter_footer = "</ul></td></tr></table></center>"
+
+
+# source language keyword coloration/styling
+#
+keyword_prefix = '<font color="darkblue">'
+keyword_suffix = '</font>'
+
+section_synopsis_header = '<h2>Synopsys</h2><font color="cyan">'
+section_synopsis_footer = '</font>'
+
+# Translate a single line of source to HTML.  This will convert
+# a "<" into "&lt.", ">" into "&gt.", etc.
+#
+def html_quote( line ):
+    result = string.replace( line,   "&", "&amp;" )
+    result = string.replace( result, "<", "&lt;" )
+    result = string.replace( result, ">", "&gt;" )
+    return result
+
+
+# same as 'html_quote', but ignores left and right brackets
+#
+def html_quote0( line ):
+    return string.replace( line, "&", "&amp;" )
+
+
+def dump_html_code( lines, prefix = "" ):
+    # clean the last empty lines
+    #
+    l = len( self.lines )
+    while l > 0 and string.strip( self.lines[l - 1] ) == "":
+        l = l - 1
+
+    # The code footer should be directly appended to the last code
+    # line to avoid an additional blank line.
+    #
+    print prefix + code_header,
+    for line in self.lines[0 : l+1]:
+        print '\n' + prefix + html_quote(line),
+    print prefix + code_footer,
+
+
+
+class HtmlFormatter(Formatter):
+    
+    def __init__( self, processor, project_title, file_prefix ):
+        
+        Formatter.__init__( self, processor )
+        
+        global html_header_1, html_header_2, html_header_3, html_footer
+        
+        if file_prefix:
+            file_prefix = file_prefix + "-"
+        else:
+            file_prefix = ""
+
+        self.project_title = project_title
+        self.file_prefix   = file_prefix
+        self.html_header   = html_header_1 + project_title + html_header_2 + \
+                             project_title + html_header_3
+    
+        self.html_footer = "<p><center><font size=""-2"">generated on " +   \
+                            time.asctime( time.localtime( time.time() ) ) + \
+                           "</font></p></center>" + html_footer
+        
+        self.columns = 3
+    
+    def  make_section_url( self, section ):
+        return self.file_prefix + section.name + ".html"
+
+
+    def  make_block_url( self, block ):
+        return self.make_section_url( block.section ) + "#" + block.name
+
+
+    def  make_html_words( self, words ):
+        """ convert a series of simple words into some HTML text """
+        line = ""
+        if words:
+            line = html_quote( words[0] )
+            for w in words[1:]:
+                line = line + " " + html_quote( w )
+
+        return line
+
+
+    def  make_html_word( self, word ):
+        """analyze a simple word to detect cross-references and styling"""
+        # look for cross-references
+        #
+        m = re_crossref.match( word )
+        if m:
+            try:
+                name = m.group(1)
+                block = self.identifiers[ name ]
+                url   = self.make_block_url( block )
+                return '<a href="' + url + '">' + name + '</a>'
+            except:
+                return '?' + name + '?'
+
+        # look for italics and bolds
+        m = re_italic.match( word )
+        if m:
+            name = m.group(1)
+            return '<i>'+name+'</i>'
+    
+        m = re_bold.match( word )
+        if m:
+            name = m.group(1)
+            return '<b>'+name+'</b>'
+
+        return html_quote(word)
+
+
+    def  make_html_para( self, words ):
+        """ convert a paragraph's words into tagged HTML text, handle xrefs """
+        line = ""
+        if words:
+            line = self.make_html_word( words[0] )
+            for word in words[1:]:
+                line = line + " " + self.make_html_word( word )
+        
+        return "<p>" + line + "</p>"
+
+
+    def  make_html_code( self, lines ):
+        """ convert a code sequence to HTML """
+        line = code_header + '\n'
+        for l in lines:
+            line = line + html_quote( l ) + '\n'
+
+        return line + code_footer
+
+
+    def  make_html_items( self, items ):
+        """ convert a field's content into some valid HTML """
+        lines = []
+        for item in items:
+            if item.lines:
+                lines.append( self.make_html_code( item.lines ) )
+            else:
+                lines.append( self.make_html_para( item.words ) )
+
+        return string.join( lines, '\n' )
+
+
+    def  print_html_items( self, items ):
+        print self.make_html_items( items )
+
+
+    def print_html_field( self, field ):
+        if field.name:
+            print "<table valign=top><tr><td><b>"+field.name+"</b></td><td>"
+
+        print self.make_html_items( field.items )
+        
+        if field.name:
+            print "</td></tr></table>"
+
+
+    def html_source_quote( self, line, block_name = None ):
+        result = ""
+        while line:
+            m = re_source_crossref.match( line )
+            if m:
+                name   = m.group(2)
+                prefix = html_quote( m.group(1) )
+                length = len( m.group(0) )
+                
+                if name == block_name:
+                    # this is the current block name, if any
+                    result = result + prefix + '<b>' + name + '</b>'
+                
+                elif re_source_keywords.match(name):
+                    # this is a C keyword
+                    result = result + prefix + keyword_prefix + name + keyword_suffix
+                    
+                elif self.identifiers.has_key(name):
+                    # this is a known identifier
+                    block = self.identifiers[name]
+                    result = result + prefix + '<a href="' + \
+                             self.make_block_url(block) + '">' + name + '</a>'
+                else:
+                    result = result + html_quote(line[ : length ])
+
+                line = line[ length : ]
+            else:
+                result = result + html_quote(line)
+                line   = []
+        
+        return result
+
+
+    def print_html_field_list( self, fields ):
+        print "<table valign=top cellpadding=3>"
+        for field in fields:
+            print "<tr><td><b>" + field.name + "</b></td><td>"
+            self.print_html_items( field.items )
+            print "</td></tr>"
+        print "</table>"
+    
+    
+    def print_html_markup( self, markup ):
+        table_fields = []
+        for field in markup.fields:
+            if field.name:
+                # we begin a new series of field or value definitions, we
+                # will record them in the 'table_fields' list before outputting
+                # all of them as a single table
+                #
+                table_fields.append( field )
+                
+            else:
+                if table_fields:
+                    self.print_html_field_list( table_fields )
+                    table_fields = []
+    
+                self.print_html_items( field.items )
+        
+        if table_fields:
+            self.print_html_field_list( table_fields )
+
+    #
+    #  Formatting the index
+    #
+    
+    def  index_enter( self ):
+        print self.html_header
+        self.index_items = {}
+
+    def  index_name_enter( self, name ):
+        block = self.identifiers[ name ]
+        url   = self.make_block_url( block )
+        self.index_items[ name ] = url
+
+    def  index_exit( self ):
+        
+        # block_index already contains the sorted list of index names
+        count = len( self.block_index )
+        rows  = (count + self.columns - 1)/self.columns
+        
+        print "<center><table border=0 cellpadding=0 cellspacing=0>"
+        for r in range(rows):
+            line = "<tr>"
+            for c in range(self.columns):
+                i = r + c*rows
+                if i < count:
+                    bname = self.block_index[ r + c*rows ]
+                    url   = self.index_items[ bname ]
+                    line = line + '<td><a href="' + url + '">' + bname + '</a></td>'
+                else:
+                    line = line + '<td></td>'
+            line = line + "</tr>"
+            print line
+
+        print "</table></center>"
+        print self.html_footer
+        self.index_items = {}
+
+    def  index_dump( self, index_filename = None ):
+        
+        if index_filename == None:
+            index_filename = self.file_prefix + "index.html"
+
+        Formatter.index_dump( self, index_filename )
+
+    #
+    #  Formatting the table of content
+    #    
+    def  toc_enter( self ):
+        print self.html_header
+        print "<center><h1>Table of Contents</h1></center>"
+
+    def  toc_chapter_enter( self, chapter ):
+        print  chapter_header + string.join(chapter.title) + chapter_inter
+        print "<table cellpadding=5>"
+
+    def  toc_section_enter( self, section ):
+        print "<tr valign=top><td>"
+        print '<a href="' + self.make_section_url( section ) + '">' + \
+               section.title + '</a></td><td>'
+        
+        print self.make_html_para( section.abstract )
+
+    def  toc_section_exit( self, section ):
+        print "</td></tr>"
+
+    def  toc_chapter_exit( self, chapter ):
+        print "</table>"
+        print  chapter_footer
+
+    def  toc_index( self, index_filename ):
+        print chapter_header + '<a href="' + index_filename + '">Global Index</a>' + chapter_inter + chapter_footer
+
+    def  toc_exit( self ):
+        print "</table></center>"
+        print self.html_footer
+
+    def  toc_dump( self, toc_filename = None, index_filename = None ):
+        if toc_filename == None:
+            toc_filename = self.file_prefix + "toc.html"
+        
+        if index_filename == None:
+            index_filename = self.file_prefix + "index.html"
+
+        Formatter.toc_dump( self, toc_filename, index_filename )
+
+    #
+    #  Formatting sections
+    #
+    def  section_enter( self, section ):
+        print self.html_header
+
+        print section_title_header
+        print section.title
+        print section_title_footer
+
+        # print section synopsys
+        print section_synopsis_header
+        print "<center><table cellspacing=5 cellpadding=0 border=0>"
+        
+        maxwidth = 0
+        for b in section.blocks.values():
+            if len(b.name) > maxwidth:
+                maxwidth = len(b.name)
+
+        width  = 130  # XXX magic number
+        columns = width / maxwidth
+        if columns < 1:
+            columns = 1
+
+        count   = len(section.block_names)
+        rows    = (count + columns-1)/columns
+        for r in range(rows):
+            line = "<tr>"
+            for c in range(columns):
+                i = r + c*rows
+                line = line + '<td></td><td>'
+                if i < count:
+                    name = section.block_names[i]
+                    line = line + '<a href="#' + name + '">' + name + '</a>'
+
+                line = line + '</td>'
+            line = line + "</tr>"
+            print line
+            
+        print "</table></center><br><br>"
+        print section_synopsis_footer
+
+        print description_header
+        print self.make_html_items( section.description )
+        print description_footer
+
+    def  block_enter( self, block ):
+        print block_header
+
+        # place html anchor if needed
+        if block.name:
+            print '<a name="' + block.name + '">'
+            print "<h4>" + block.name + "</h4>"
+            print "</a>"
+        
+        # dump the block C source lines now
+        if block.code:
+            print source_header
+            for l in block.code:
+                print self.html_source_quote( l, block.name )
+            print source_footer
+
+
+    def  markup_enter( self, markup, block ):
+        if markup.tag == "description":
+            print description_header
+        else:
+            print marker_header + markup.tag + marker_inter
+        
+        self.print_html_markup( markup )
+    
+    def  markup_exit( self, markup, block ):
+        if markup.tag == "description":
+            print description_footer
+        else:
+            print marker_footer
+
+    def  block_exit( self, block ):
+        print block_footer
+
+        
+    def  section_exit( self, section ):
+        print html_footer
+
+
+    def section_dump_all( self ):
+        for section in self.sections:
+            self.section_dump( section, self.file_prefix + section.name + '.html' )
+        
--- a/src/tools/docmaker/utils.py
+++ b/src/tools/docmaker/utils.py
@ -0,0 +1,86 @@
+import string, sys
+
+# This function is used to sort the index.  It is a simple lexicographical
+# sort, except that it places capital letters before lowercase ones.
+#
+def index_sort( s1, s2 ):
+    if not s1:
+        return -1
+
+    if not s2:
+        return 1
+
+    l1 = len( s1 )
+    l2 = len( s2 )
+    m1 = string.lower( s1 )
+    m2 = string.lower( s2 )
+
+    for i in range( l1 ):
+        if i >= l2 or m1[i] > m2[i]:
+            return 1
+
+        if m1[i] < m2[i]:
+            return -1
+
+        if s1[i] < s2[i]:
+            return -1
+
+        if s1[i] > s2[i]:
+            return 1
+
+    if l2 > l1:
+        return -1
+
+    return 0
+
+# Sort input_list, placing the elements of order_list in front.
+#
+def sort_order_list( input_list, order_list ):
+    new_list = order_list[:]
+    for id in input_list:
+        if not id in order_list:
+            new_list.append( id )
+    return new_list
+
+
+# current output directory
+#
+output_dir = None
+
+
+# Open the standard output to a given project documentation file.  Use
+# "output_dir" to determine the filename location if necessary and save the
+# old stdout in a tuple that is returned by this function.
+#
+def open_output( filename ):
+    global output_dir
+
+    if output_dir and output_dir != "":
+        filename = output_dir + os.sep + filename
+
+    old_stdout = sys.stdout
+    new_file   = open( filename, "w" )
+    sys.stdout = new_file
+
+    return ( new_file, old_stdout )
+
+
+# Close the output that was returned by "close_output".
+#
+def close_output( output ):
+    output[0].close()
+    sys.stdout = output[1]
+
+
+# Check output directory.
+#
+def check_output( ):
+    global output_dir
+    if output_dir:
+        if output_dir != "":
+            if not os.path.isdir( output_dir ):
+                sys.stderr.write( "argument" + " '" + output_dir + "' " +
+                                  "is not a valid directory" )
+                sys.exit( 2 )
+        else:
+            output_dir = None