* src/tools/docmaker/*: adding new (more advanced) version of

the DocMaker tool. Python with regular expressions rocks..
2002-07-30 18:49:52 +00:00 · 2002-07-30 18:49:52 +00:00 · f52165cc06
parent 76effc302f
commit f52165cc06
7 changed files with 1780 additions and 0 deletions
--- a/3
+++ b/3
@ -4,6 +4,9 @@
        to demonstrate a "cleaner" API to support incremental font loading.
        comments appreciated...
        * src/tools/docmaker/*: adding new (more advanced) version of
        the DocMaker tool. Python with regular expressions rocks..
 2002-07-28  Werner Lemberg  <wl@gnu.org>
 	s/ft_memset/FT_MEM_SET/.
--- a/src/tools/docmaker/content.py
+++ b/src/tools/docmaker/content.py
@ -0,0 +1,547 @@
 #
 #  this file contains routines used to parse the content of documentation
 #  comment block and build a more structured objects out of them
 #
 from sources import *
 from utils import *
 import string, re
 # this regular expresion is used to detect code sequences. these
 # are simply code fragments embedded in '{' and '}' like in:
 #
 #  {
 #    x = y + z;
 #    if ( zookoo == 2 )
 #    {
 #      foobar();
 #    }
 #  }
 #
 # note that identation of the starting and ending accolades must be
 # exactly the same. the code sequence can contain accolades at greater
 # indentation
 #
 re_code_start = re.compile( r"(\s*){\s*$" )
 re_code_end   = re.compile( r"(\s*)}\s*$" )
 # this regular expression is used to isolate identifiers from
 # other text
 #
 re_identifier = re.compile( r'(\w*)' )
 #############################################################################
 #
 # The DocCode class is used to store source code lines.
 #
 #   'self.lines' contains a set of source code lines that will be dumped as
 #   HTML in a <PRE> tag.
 #
 #   The object is filled line by line by the parser; it strips the leading
 #   "margin" space from each input line before storing it in 'self.lines'.
 #
 class DocCode:
    def __init__( self, margin, lines ):
        self.lines  = []
        self.words  = None
        # remove margin spaces
        for l in lines:
            if string.strip( l[:margin] ) == "":
                l = l[margin:]
            self.lines.append( l )
    def dump( self, prefix = "", width=60 ):
        for l in self.lines:
            print prefix + l
 #############################################################################
 #
 # The DocPara class is used to store "normal" text paragraph.
 #
 #   'self.words' contains the list of words that make up the paragraph
 #
 class DocPara:
    def __init__( self, lines ):
        self.lines = None
        self.words = []
        for l in lines:
            l = string.strip(l)
            self.words.extend( string.split( l ) )
    def dump( self, prefix = "", width = 60 ):
        cur  = ""  # current line
        col  = 0   # current width
        for word in self.words:
            ln = len(word)
            if col > 0:
                ln = ln+1
            if col + ln > width:
                print prefix + cur
                cur = word
                col = len(word)
            else:
                if col > 0:
                    cur = cur + " "
                cur = cur + word
                col = col + ln
        if col > 0:
            print prefix + cur
 #############################################################################
 #
 #  The DocField class is used to store a list containing either DocPara or
 #  DocCode objects. Each DocField also has an optional "name" which is used
 #  when the object corresponds to a field of value definition
 #
 class DocField:
    def __init__( self, name, lines ):
        self.name  = name  # can be None for normal paragraphs/sources
        self.items = []     # list of items
        mode_none  = 0   # start parsing mode
        mode_code  = 1   # parsing code sequences
        mode_para  = 3   # parsing normal paragraph
        margin     = -1  # current code sequence indentation
        cur_lines  = []
        # now analyze the markup lines to see if they contain paragraphs,
        # code sequences or fields definitions
        #
        start = 0
        mode  = mode_none
        for l in lines:
            # are we parsing a code sequence ?
            if mode == mode_code:
                m = re_code_end.match( l )
                if m and len(m.group(1)) <= margin:
                    # that's it, we finised the code sequence
                    code = DocCode( margin, cur_lines )
                    self.items.append( code )
                    margin    = -1
                    cur_lines = []
                    mode      = mode_none
                else:
                    # nope, continue the code sequence
                    cur_lines.append( l[margin:] )
            else:
                # start of code sequence ?
                m = re_code_start.match( l )
                if m:
                    # save current lines
                    if cur_lines:
                        para = DocPara( cur_lines )
                        self.items.append( para )
                        cur_lines = []
                    # switch to code extraction mode
                    margin = len(m.group(1))
                    mode   = mode_code
                else:
                    if not string.split( l ) and cur_lines:
                        # if the line is empty, we end the current paragraph,
                        # if any
                        para = DocPara( cur_lines )
                        self.items.append( para )
                        cur_lines = []
                    else:
                        # otherwise, simply add the line to the current
                        # paragraph
                        cur_lines.append( l )
        if mode == mode_code:
            # unexpected end of code sequence
            code = DocCode( margin, cur_lines )
            self.items.append( code )
        elif cur_lines:
            para = DocPara( cur_lines )
            self.items.append( para )
    def dump( self, prefix = "" ):
        if self.field:
            print prefix + self.field + " ::"
            prefix = prefix + "----"
        first = 1
        for p in self.items:
            if not first:
                print ""
            p.dump( prefix )
            first = 0
 # this regular expression is used to detect field definitions
 #
 re_field  = re.compile( r"\s*(\w*)\s*::" )
 class DocMarkup:
    def __init__( self, tag, lines ):
        self.tag       = string.lower(tag)
        self.fields    = []
        cur_lines = []
        field     = None
        mode      = 0
        for l in lines:
            m = re_field.match( l )
            if m:
                # we detected the start of a new field definition
                # first, save the current one
                if cur_lines:
                    f = DocField( field, cur_lines )
                    self.fields.append( f )
                    cur_lines = []
                    field     = None
                field     = m.group(1)   # record field name
                ln        = len(m.group(0))
                l         = " "*ln + l[ln:]
                cur_lines = [ l ]
            else:
                cur_lines.append( l )
        if field or cur_lines:
            f = DocField( field, cur_lines )
            self.fields.append( f )
    def get_name( self ):
        try:
            return self.fields[0].items[0].words[0]
        except:
            return None
    def dump( self, margin ):
        print " "*margin + "<" + self.tag + ">"
        for f in self.fields:
            f.dump( "  " )
        print " "*margin + "</" + self.tag + ">"
 class DocChapter:
    def __init__( self, block ):
        self.block    = block
        self.sections = []
        if block:
            self.name     = block.name
            self.title    = block.get_markup_words( "title" )
            self.order    = block.get_markup_words( "sections" )
        else:
            self.name     = "Other"
            self.title    = string.split( "Miscellaneous" )
            self.order    = []
 class DocSection:
    def __init__( self, name = "Other" ):
        self.name        = name
        self.blocks      = {}
        self.block_names = []  # ordered block names in section
        self.defs        = []
        self.abstract    = ""
        self.description = ""
        self.order       = []
        self.title       = "ERROR"
        self.chapter     = None
    def add_def( self, block ):
        self.defs.append( block )
    def add_block( self, block ):
        self.block_names.append( block.name )
        self.blocks[ block.name ] = block
    def process( self ):
        # lookup one block that contains a valid section description
        for block in self.defs:
            title = block.get_markup_text( "Title" )
            if title:
                self.title       = title
                self.abstract    = block.get_markup_words( "abstract" )
                self.description = block.get_markup_items( "description" )
                self.order       = block.get_markup_words( "order" )
                return
    def reorder( self ):
        self.block_names = sort_order_list( self.block_names, self.order )
 class ContentProcessor:
    def __init__( self ):
        """initialize a block content processor"""
        self.reset()
        self.sections = {}    # dictionary of documentation sections
        self.section  = None  # current documentation section
        self.chapters = []        # list of chapters
    def set_section( self, section_name ):
        """set current section during parsing"""
        if not self.sections.has_key( section_name ):
            section = DocSection( section_name )
            self.sections[ section_name ] = section
            self.section                  = section
        else:
            self.section = self.sections[ section_name ]
    def add_chapter( self, block ):
        chapter = DocChapter( block )
        self.chapters.append( chapter )
    def reset( self ):
        """reset the content processor for a new block"""
        self.markups      = []
        self.markup       = None
        self.markup_lines = []
    def add_markup( self ):
        """add a new markup section"""
        if self.markup and self.markup_lines:
            # get rid of last line of markup if it's empty
            marks = self.markup_lines
            if len(marks) > 0 and not string.strip(marks[-1]):
                self.markup_lines = marks[:-1]
            m = DocMarkup( self.markup, self.markup_lines )
            self.markups.append( m )
            self.markup       = None
            self.markup_lines = []
    def process_content( self, content ):
        """process a block content and return a list of DocMarkup objects
           corresponding to it"""
        markup       = None
        markup_lines = []
        first        = 1
        for line in content:
            found = None
            for t in re_markup_tags:
                m = t.match( line )
                if m:
                    found  = string.lower(m.group(1))
                    prefix = len(m.group(0))
                    line   = " "*prefix + line[prefix:]   # remove markup from line
                    break
            # is it the start of a new markup section ?
            if found:
                first = 0
                self.add_markup()  # add current markup content
                self.markup = found
                if len(string.strip( line )) > 0:
                    self.markup_lines.append( line )
            elif first == 0:
                self.markup_lines.append( line )
        self.add_markup()
        return self.markups
    def  parse_sources( self, source_processor ):
        blocks = source_processor.blocks
        count  = len(blocks)
        for n in range(count):
            source = blocks[n]
            if source.content:
                # this is a documentation comment, we need to catch
                # all following normal blocks in the "follow" list
                #
                follow = []
                m = n+1
                while m < count and not blocks[m].content:
                    follow.append( blocks[m] )
                    m = m+1
                doc_block = DocBlock( source, follow, self )
    def  finish( self ):
        # process all sections to extract their abstract, description
        # and ordered list of items
        #
        for sec in self.sections.values():
            sec.process()
        # process chapters to check that all sections are correctly
        # listed there
        for chap in self.chapters:
            for sec in chap.order:
                if self.sections.has_key(sec):
                    section = self.sections[ sec ]
                    section.chapter = chap
                    section.reorder()
                    chap.sections.append( section )
                else:
                    sys.stderr.write( "WARNING: chapter '" +
                        chap.name + "' in " + chap.block.location() + \
                        " lists unknown section '" + sec + "'\n" )
        # check that all sections are in a chapter
        #
        others = []
        for sec in self.sections.values():
            if not sec.chapter:
                others.append(sec)
        # create a new special chapter for all remaining sections
        # when necessary
        #
        if others:
            chap = DocChapter( None )
            chap.sections = others
            self.chapters.append( chap )
 class DocBlock:
    def __init__( self, source, follow, processor ):
        processor.reset()
        self.source    = source
        self.code      = []
        self.type      = "ERRTYPE"
        self.name      = "ERRNAME"
        self.section   = processor.section
        self.markups   = processor.process_content( source.content )
        # compute block type from first markup tag
        try:
            self.type = self.markups[0].tag
        except:
            pass
        # compute block name from first markup paragraph
        try:
            markup = self.markups[0]
            para   = markup.fields[0].items[0]
            name   = para.words[0]
            m = re_identifier.match( name )
            if m:
                name = m.group(1)
            self.name = name
        except:
            pass
        # detect new section starts
        if self.type == "section":
            processor.set_section( self.name )
            processor.section.add_def( self )
        # detect new chapter
        elif self.type == "chapter":
            processor.add_chapter( self )
        else:
            processor.section.add_block( self )
        # now, compute the source lines relevant to this documentation
        # block. We keep normal comments in for obvious reasons (??)
        source = []
        for b in follow:
            if b.format:
                break
            for l in b.lines:
                # we use "/* */" as a separator
                if re_source_sep.match( l ):
                    break
                source.append( l )
        # now strip the leading and trailing empty lines from the sources
        start = 0
        end   = len( source )-1
        while start < end and not string.strip( source[start] ):
            start = start + 1
        while start < end and not string.strip( source[end] ):
            end = end - 1
        source = source[start:end+1]
        self.code = source
    def location( self ):
        return self.source.location()
    def get_markup( self, tag_name ):
        """return the DocMarkup corresponding to a given tag in a block"""
        for m in self.markups:
            if m.tag == string.lower(tag_name):
                return m
        return None
    def get_markup_name( self, tag_name ):
        """return the name of a given primary markup in a block"""
        try:
            m = self.get_markup( tag_name )
            return m.get_name()
        except:
            return None
    def get_markup_words( self, tag_name ):
        try:
            m = self.get_markup( tag_name )
            return m.fields[0].items[0].words
        except:
            return []
    def get_markup_text( self, tag_name ):
        result = self.get_markup_words( tag_name )
        return string.join( result )
    def get_markup_items( self, tag_name ):
        try:
            m = self.get_markup( tag_name )
            return m.fields[0].items
        except:
            return None
--- a/src/tools/docmaker/docmaker.py
+++ b/src/tools/docmaker/docmaker.py
@ -0,0 +1,120 @@
 #!/usr/bin/env python
 #
 #  DocMaker 0.2 (c) 2002 David Turner <david@freetype.org>
 #
 # This program is a re-write of the original DocMaker took used
 # to generate the API Reference of the FreeType font engine
 # by converting in-source comments into structured HTML
 #
 # This new version is capable of outputting XML data, as well
 # as accepts more liberal formatting options
 #
 # It also uses regular expression matching and substitution
 # to speed things significantly
 #
 from sources import *
 from content import *
 from tohtml  import *
 import sys, os, time, string, glob, getopt
 def file_exists( pathname ):
    """checks that a given file exists"""
    result = 1
    try:
        file = open( pathname, "r" )
        file.close()
    except:
        result = None
        sys.err.write( pathname + " couldn't be accessed\n" )
    return result
 def make_file_list( args = None ):
    """builds a list of input files from command-line arguments"""
    file_list = []
    # sys.stderr.write( repr( sys.argv[1 :] ) + '\n' )
    if not args:
        args = sys.argv[1 :]
    for pathname in args:
        if string.find( pathname, '*' ) >= 0:
            newpath = glob.glob( pathname )
            newpath.sort()  # sort files -- this is important because
                            # of the order of files
        else:
            newpath = [pathname]
        last = len( file_list )
        file_list[last : last] = newpath
    if len( file_list ) == 0:
        file_list = None
    else:
        # now filter the file list to remove non-existing ones
        file_list = filter( file_exists, file_list )
    return file_list
 def usage():
    print "\nDocMaker 0.2 Usage information\n"
    print "  docmaker [options] file1 [ file2 ... ]\n"
    print "using the following options:\n"
    print "  -h : print this page"
 def main( argv ):
    """main program loop"""
    try:
        opts, args = getopt.getopt( argv[1:],"h", [ "help" ] )
    except getopt.GetoptError:
        usage()
        sys.exit( 2 )
    if args == []:
        usage()
        sys.exit( 1 )
    # process options
    #
    for opt in opts:
        if opt[0] in ( "-h", "--help" ):
            usage()
            sys.exit( 0 )
    # create context and processor
    source_processor  = SourceProcessor()
    content_processor = ContentProcessor()
    # retrieve the list of files to process
    file_list = make_file_list()
    for filename in file_list:
        source_processor.parse_file( filename )
        content_processor.parse_sources( source_processor )
    # process sections
    content_processor.finish()
    formatter = HtmlFormatter( content_processor, "Example", "zz" )
    formatter.toc_dump()
    formatter.index_dump()
    formatter.section_dump_all()
 # if called from the command line
 #
 if __name__ == '__main__':
    main( sys.argv )
 # eof
--- a/src/tools/docmaker/formatter.py
+++ b/src/tools/docmaker/formatter.py
@ -0,0 +1,194 @@
 from sources import *
 from content import *
 from utils   import *
 class Formatter:
    def __init__( self, processor ):
        self.processor   = processor
        self.identifiers = {}
        self.chapters    = processor.chapters
        self.sections    = processor.sections.values()
        self.block_index = []
        # store all blocks in a dictionary
        self.blocks      = []
        for section in self.sections:
            for block in section.blocks.values():
                self.add_identifier( block.name, block )
                # add enumeration values to the index, since this is useful
                for markup in block.markups:
                    if markup.tag == 'values':
                        for field in markup.fields:
                            self.add_identifier( field.name, block )
        self.block_index = self.identifiers.keys()
        self.block_index.sort( index_sort )
    def add_identifier( self, name, block ):
        if self.identifiers.has_key( name ):
            # duplicate name !!
            sys.stderr.write( \
               "WARNING: duplicate definition for '" + name + "' in " + \
               block.location() + ", previous definition in " +         \
               self.identifiers[ name ].location() + "\n" )
        else:
            self.identifiers[name] = block
    #
    #  Formatting the table of contents
    #
    def  toc_enter( self ):
        pass
    def  toc_chapter_enter( self, chapter ):
        pass
    def  toc_section_enter( self, section ):
        pass
    def  toc_section_exit( self, section ):
        pass
    def  toc_chapter_exit( self, chapter ):
        pass
    def  toc_index( self, index_filename ):
        pass
    def  toc_exit( self ):
        pass
    def  toc_dump( self, toc_filename = None, index_filename = None ):
        output = None
        if toc_filename:
            output = open_output( toc_filename )
        self.toc_enter()
        for chap in self.processor.chapters:
            self.toc_chapter_enter( chap )
            for section in chap.sections:
                self.toc_section_enter( section )
                self.toc_section_exit( section )
            self.toc_chapter_exit ( chap )
        self.toc_index( index_filename )
        self.toc_exit()
        if output:
            close_output( output )
    #
    #  Formatting the index
    #
    def  index_enter( self ):
        pass
    def  index_name_enter( self, name ):
        pass
    def  index_name_exit( self, name ):
        pass
    def  index_exit( self ):
        pass
    def  index_dump( self, index_filename = None ):
        output = None
        if index_filename:
            output = open_output( index_filename )
        self.index_enter()
        for name in self.block_index:
            self.index_name_enter( name )
            self.index_name_exit ( name )
        self.index_exit()
        if output:
            close_output( output )
    #
    #  Formatting a section
    #
    def  section_enter( self, section ):
        pass
    def  block_enter( self, block ):
        pass
    def  markup_enter( self, markup, block = None ):
        pass
    def  field_enter( self, field, markup = None, block = None ):
        pass
    def  field_exit( self, field, markup = None, block = None ):
        pass
    def  markup_exit( self, markup, block = None ):
        pass
    def  block_exit( self, block ):
        pass
    def  section_exit( self, section ):
        pass
    def  section_dump( self, section, section_filename = None ):
        output = None
        if section_filename:
            output = open_output( section_filename )
        self.section_enter( section )
        for name in section.block_names:
            block = self.identifiers[ name ]
            self.block_enter( block )
            for markup in block.markups[1:]:   # always ignore first markup !!
                self.markup_enter( markup, block )
                for field in markup.fields:
                    self.field_enter( field, markup, block )
                    self.field_exit ( field, markup, block )
                self.markup_exit( markup, block )
            self.block_exit( block )
        self.section_exit ( section )
        if output:
            close_output( output )
    def section_dump_all( self ):
        for section in self.sections:
            self.section_dump( section )
    #
    #  Formatting a block
    #
--- a/src/tools/docmaker/sources.py
+++ b/src/tools/docmaker/sources.py
@ -0,0 +1,355 @@
 #
 # this file contains definitions of classes needed to decompose
 # C sources files into a series of multi-line "blocks". There are
 # two kinds of blocks:
 #
 #   - normal blocks, which contain source code or ordinary comments
 #
 #   - documentation blocks, which have restricted formatting, and
 #     whose text always start with a documentation markup tag like
 #     "<Function>", "<Type>", etc..
 #
 # the routines used to process the content of documentation blocks
 # are not contained here, but in "doccontent.py"
 #
 # the classes and methods found here only deal with text parsing
 # and basic documentation block extraction
 #
 import fileinput, re, sys, os, string
 ################################################################
 ##
 ##  BLOCK FORMAT PATTERN
 ##
 ##   A simple class containing compiled regular expressions used
 ##   to detect potential documentation format block comments within
 ##   C source code
 ##
 ##   note that the 'column' pattern must contain a group that will
 ##   be used to "unbox" the content of documentation comment blocks
 ##
 class SourceBlockFormat:
    def __init__( self, id, start, column, end ):
        """create a block pattern, used to recognize special documentation blocks"""
        self.id     = id
        self.start  = re.compile( start, re.VERBOSE )
        self.column = re.compile( column, re.VERBOSE )
        self.end    = re.compile( end, re.VERBOSE )
 #
 # format 1 documentation comment blocks look like the following:
 #
 #    /************************************/
 #    /*                                  */
 #    /*                                  */
 #    /*                                  */
 #    /************************************/
 #
 # we define a few regular expressions here to detect them
 #
 start = r'''
  \s*       # any number of whitespace
  /\*{2,}/  # followed by '/' and at least two asterisks then '/'
  \s*$      # eventually followed by whitespace
 '''
 column = r'''
  \s*      # any number of whitespace
  /\*{1}   # followed by '/' and precisely one asterisk
  ([^*].*) # followed by anything (group 1)
  \*{1}/   # followed by one asterisk and a '/'
  \s*$     # enventually followed by whitespace
 '''
 re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
 #
 # format 2 documentation comment blocks look like the following:
 #
 #    /************************************
 #     *
 #     *                                                                    
 #     *                                                                    
 #     *                                                                    
 #     **/       (1 or more asterisks at the end)
 #
 # we define a few regular expressions here to detect them
 #
 start = r'''
  \s*     # any number of whitespace
  /\*{2,} # followed by '/' and at least two asterisks
  \s*$    # eventually followed by whitespace
 '''
 column = r'''
  \s*         # any number of whitespace
  \*{1}       # followed by precisely one asterisk
  (.*)        # followed by anything (group1)
 '''
 end = r'''
  \s*     # any number of whitespace
  \*+/    # followed by at least on asterisk, then '/'
 '''
 re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
 #
 # the list of supported documentation block formats, we could add new ones
 # relatively easily
 #
 re_source_block_formats = [ re_source_block_format1, re_source_block_format2 ]
 #
 # the following regular expressions corresponds to markup tags
 # within the documentation comment blocks. they're equivalent
 # despite their different syntax
 #
 # notice how each markup tag _must_ begin a new line
 #
 re_markup_tag1 = re.compile( r'''\s*<(\w*)>''' )  # <xxxx> format
 re_markup_tag2 = re.compile( r'''\s*@(\w*):''' )  # @xxxx: format
 #
 # the list of supported markup tags, we could add new ones relatively
 # easily
 #
 re_markup_tags = [ re_markup_tag1, re_markup_tag2 ]
 #
 # used to detect a cross-reference, after markup tags have been stripped
 #
 re_crossref = re.compile( r'@(\w*)' )
 #
 # used to detect italic and bold styles in paragraph text
 #
 re_italic = re.compile( r'_(\w+)_' )
 re_bold   = re.compile( r'\*(\w+)\*' )
 #
 # used to detect the end of commented source lines
 #
 re_source_sep = re.compile( r'\s*/\*\s*\*/' )
 #
 # used to perform cross-reference within source output
 #
 re_source_crossref = re.compile( r'(\W*)(\w*)' )
 #
 # a list of reserved source keywords
 #
 re_source_keywords = re.compile( '''( typedef | 
                                       struct |
                                       enum   |
                                       union  |
                                       const  |
                                       char   |
                                       int    |
                                       short  |
                                       long   |
                                       void   |
                                       signed |
                                       unsigned |
                                       \#include |
                                       \#define  |
                                       \#undef   |
                                       \#if      |
                                       \#ifdef   |
                                       \#ifndef  |
                                       \#else    |
                                       \#endif   )''', re.VERBOSE )
 ################################################################
 ##
 ##  SOURCE BLOCK CLASS
 ##
 ##   A SourceProcessor is in charge or reading a C source file
 ##   and decomposing it into a series of different "SourceBlocks".
 ##   each one of these blocks can be made of the following data:
 ##
 ##   - A documentation comment block that starts with "/**" and
 ##     whose exact format will be discussed later
 ##
 ##   - normal sources lines, include comments
 ##
 ##   the important fields in a text block are the following ones:
 ##
 ##     self.lines   : a list of text lines for the corresponding block
 ##
 ##     self.content : for documentation comment blocks only, this is the
 ##                    block content that has been "unboxed" from its
 ##                    decoration. This is None for all other blocks
 ##                    (i.e. sources or ordinary comments with no starting
 ##                     markup tag)
 ##
 class SourceBlock:
    def __init__( self, processor, filename, lineno, lines ):
        self.processor = processor
        self.filename  = filename
        self.lineno    = lineno
        self.lines     = lines
        self.format    = processor.format
        self.content   = []
        if self.format == None:
            return
        words = []
        # extract comment lines
        lines = []
        for line0 in self.lines[1:]:
            m = self.format.column.match( line0 )
            if m:
                lines.append( m.group(1) )
        # now, look for a markup tag
        for l in lines:
            l = string.strip(l)
            if len(l) > 0:
                for tag in re_markup_tags:
                    if tag.match( l ):
                        self.content = lines
                return
    def location( self ):
        return "(" + self.filename + ":" + repr(self.lineno) + ")"
    # debugging only - not used in normal operations
    def dump( self ):
        if self.content:
            print "{{{content start---"
            for l in self.content:
                print l
            print "---content end}}}"
            return
        fmt = ""
        if self.format:
            fmt = repr(self.format.id) + " "
        for line in self.lines:
            print line
 ################################################################
 ##
 ##  SOURCE PROCESSOR CLASS
 ##
 ##   The SourceProcessor is in charge or reading a C source file
 ##   and decomposing it into a series of different "SourceBlock"
 ##   objects.
 ##
 ##   each one of these blocks can be made of the following data:
 ##
 ##   - A documentation comment block that starts with "/**" and
 ##     whose exact format will be discussed later
 ##
 ##   - normal sources lines, include comments
 ##
 ##
 class SourceProcessor:
    def  __init__( self ):
        """initialize a source processor"""
        self.blocks   = []
        self.filename = None
        self.format   = None
        self.lines    = []
    def  reset( self ):
        """reset a block processor, clean all its blocks"""
        self.blocks = []
        self.format = None
    def  parse_file( self, filename ):
        """parse a C source file, and adds its blocks to the processor's list"""
        self.reset()
        self.filename = filename
        fileinput.close()
        self.format    = None
        self.lineno    = 0
        self.lines     = []
        for line in fileinput.input( filename ):
            # strip trailing newlines, important on Windows machines !!
            if  line[-1] == '\012':
                line = line[0:-1]
            if self.format == None:
                self.process_normal_line( line )
            else:
                if self.format.end.match( line ):
                    # that's a normal block end, add it to lines and
                    # create a new block
                    self.lines.append( line )
                    self.add_block_lines()
                elif self.format.column.match( line ):
                    # that's a normal column line, add it to 'lines'
                    self.lines.append( line )
                else:
                    # humm.. this is an unexcepted block end,
                    # create a new block, but don't process the line
                    self.add_block_lines()
                    # we need to process the line again
                    self.process_normal_line( line )
        # record the last lines
        self.add_block_lines()
    def process_normal_line( self, line ):
        """process a normal line and check if it's the start of a new block"""
        for f in re_source_block_formats:
          if f.start.match( line ):
            self.add_block_lines()
            self.format = f
            self.lineno = fileinput.filelineno()
        self.lines.append( line )
    def add_block_lines( self ):
        """add the current accumulated lines, and create a new block"""
        if self.lines != []:
            block = SourceBlock( self, self.filename, self.lineno, self.lines )
            self.blocks.append( block )
            self.format = None
            self.lines  = []
    # debugging only, not used in normal operations
    def dump( self ):
        """print all blocks in a processor"""
        for b in self.blocks:
            b.dump()
 # eof
--- a/src/tools/docmaker/tohtml.py
+++ b/src/tools/docmaker/tohtml.py
@ -0,0 +1,475 @@
 from sources import *
 from content import *
 from formatter import *
 import time
 # The following defines the HTML header used by all generated pages.
 #
 html_header_1 = """\
 <html>
 <header>
 <title>"""
 html_header_2= """ API Reference</title>
 <basefont face="Verdana,Geneva,Arial,Helvetica">
 <style content="text/css">
  P { text-align=justify }
  H1 { text-align=center }
  LI { text-align=justify }
 </style>
 </header>
 <body text=#000000
      bgcolor=#FFFFFF
      link=#0000EF
      vlink=#51188E
      alink=#FF0000>
 <center><h1>"""
 html_header_3=""" API Reference</h1></center>
 """
 # The HTML footer used by all generated pages.
 #
 html_footer = """\
 </body>
 </html>"""
 # The header and footer used for each section.
 #
 section_title_header = "<center><h1>"
 section_title_footer = "</h1></center>"
 # The header and footer used for code segments.
 #
 code_header = "<font color=blue><pre>"
 code_footer = "</pre></font>"
 # Paragraph header and footer.
 #
 para_header = "<p>"
 para_footer = "</p>"
 # Block header and footer.
 #
 block_header = "<center><table width=75%><tr><td>"
 block_footer = "</td></tr></table><hr width=75%></center>"
 # Description header/footer.
 #
 description_header = "<center><table width=87%><tr><td>"
 description_footer = "</td></tr></table></center><br>"
 # Marker header/inter/footer combination.
 #
 marker_header = "<center><table width=87% cellpadding=5><tr bgcolor=#EEEEFF><td><em><b>"
 marker_inter  = "</b></em></td></tr><tr><td>"
 marker_footer = "</td></tr></table></center>"
 # Source code extracts header/footer.
 #
 source_header = "<center><table width=87%><tr bgcolor=#D6E8FF width=100%><td><pre>\n"
 source_footer = "\n</pre></table></center><br>"
 # Chapter header/inter/footer.
 #
 chapter_header = "<br><center><table width=75%><tr><td><h2>"
 chapter_inter  = "</h2><ul>"
 chapter_footer = "</ul></td></tr></table></center>"
 # source language keyword coloration/styling
 #
 keyword_prefix = '<font color="darkblue">'
 keyword_suffix = '</font>'
 section_synopsis_header = '<h2>Synopsys</h2><font color="cyan">'
 section_synopsis_footer = '</font>'
 # Translate a single line of source to HTML.  This will convert
 # a "<" into "&lt.", ">" into "&gt.", etc.
 #
 def html_quote( line ):
    result = string.replace( line,   "&", "&amp;" )
    result = string.replace( result, "<", "&lt;" )
    result = string.replace( result, ">", "&gt;" )
    return result
 # same as 'html_quote', but ignores left and right brackets
 #
 def html_quote0( line ):
    return string.replace( line, "&", "&amp;" )
 def dump_html_code( lines, prefix = "" ):
    # clean the last empty lines
    #
    l = len( self.lines )
    while l > 0 and string.strip( self.lines[l - 1] ) == "":
        l = l - 1
    # The code footer should be directly appended to the last code
    # line to avoid an additional blank line.
    #
    print prefix + code_header,
    for line in self.lines[0 : l+1]:
        print '\n' + prefix + html_quote(line),
    print prefix + code_footer,
 class HtmlFormatter(Formatter):
    def __init__( self, processor, project_title, file_prefix ):
        Formatter.__init__( self, processor )
        global html_header_1, html_header_2, html_header_3, html_footer
        if file_prefix:
            file_prefix = file_prefix + "-"
        else:
            file_prefix = ""
        self.project_title = project_title
        self.file_prefix   = file_prefix
        self.html_header   = html_header_1 + project_title + html_header_2 + \
                             project_title + html_header_3
        self.html_footer = "<p><center><font size=""-2"">generated on " +   \
                            time.asctime( time.localtime( time.time() ) ) + \
                           "</font></p></center>" + html_footer
        self.columns = 3
    def  make_section_url( self, section ):
        return self.file_prefix + section.name + ".html"
    def  make_block_url( self, block ):
        return self.make_section_url( block.section ) + "#" + block.name
    def  make_html_words( self, words ):
        """ convert a series of simple words into some HTML text """
        line = ""
        if words:
            line = html_quote( words[0] )
            for w in words[1:]:
                line = line + " " + html_quote( w )
        return line
    def  make_html_word( self, word ):
        """analyze a simple word to detect cross-references and styling"""
        # look for cross-references
        #
        m = re_crossref.match( word )
        if m:
            try:
                name = m.group(1)
                block = self.identifiers[ name ]
                url   = self.make_block_url( block )
                return '<a href="' + url + '">' + name + '</a>'
            except:
                return '?' + name + '?'
        # look for italics and bolds
        m = re_italic.match( word )
        if m:
            name = m.group(1)
            return '<i>'+name+'</i>'
        m = re_bold.match( word )
        if m:
            name = m.group(1)
            return '<b>'+name+'</b>'
        return html_quote(word)
    def  make_html_para( self, words ):
        """ convert a paragraph's words into tagged HTML text, handle xrefs """
        line = ""
        if words:
            line = self.make_html_word( words[0] )
            for word in words[1:]:
                line = line + " " + self.make_html_word( word )
        return "<p>" + line + "</p>"
    def  make_html_code( self, lines ):
        """ convert a code sequence to HTML """
        line = code_header + '\n'
        for l in lines:
            line = line + html_quote( l ) + '\n'
        return line + code_footer
    def  make_html_items( self, items ):
        """ convert a field's content into some valid HTML """
        lines = []
        for item in items:
            if item.lines:
                lines.append( self.make_html_code( item.lines ) )
            else:
                lines.append( self.make_html_para( item.words ) )
        return string.join( lines, '\n' )
    def  print_html_items( self, items ):
        print self.make_html_items( items )
    def print_html_field( self, field ):
        if field.name:
            print "<table valign=top><tr><td><b>"+field.name+"</b></td><td>"
        print self.make_html_items( field.items )
        if field.name:
            print "</td></tr></table>"
    def html_source_quote( self, line, block_name = None ):
        result = ""
        while line:
            m = re_source_crossref.match( line )
            if m:
                name   = m.group(2)
                prefix = html_quote( m.group(1) )
                length = len( m.group(0) )
                if name == block_name:
                    # this is the current block name, if any
                    result = result + prefix + '<b>' + name + '</b>'
                elif re_source_keywords.match(name):
                    # this is a C keyword
                    result = result + prefix + keyword_prefix + name + keyword_suffix
                elif self.identifiers.has_key(name):
                    # this is a known identifier
                    block = self.identifiers[name]
                    result = result + prefix + '<a href="' + \
                             self.make_block_url(block) + '">' + name + '</a>'
                else:
                    result = result + html_quote(line[ : length ])
                line = line[ length : ]
            else:
                result = result + html_quote(line)
                line   = []
        return result
    def print_html_field_list( self, fields ):
        print "<table valign=top cellpadding=3>"
        for field in fields:
            print "<tr><td><b>" + field.name + "</b></td><td>"
            self.print_html_items( field.items )
            print "</td></tr>"
        print "</table>"
    def print_html_markup( self, markup ):
        table_fields = []
        for field in markup.fields:
            if field.name:
                # we begin a new series of field or value definitions, we
                # will record them in the 'table_fields' list before outputting
                # all of them as a single table
                #
                table_fields.append( field )
            else:
                if table_fields:
                    self.print_html_field_list( table_fields )
                    table_fields = []
                self.print_html_items( field.items )
        if table_fields:
            self.print_html_field_list( table_fields )
    #
    #  Formatting the index
    #
    def  index_enter( self ):
        print self.html_header
        self.index_items = {}
    def  index_name_enter( self, name ):
        block = self.identifiers[ name ]
        url   = self.make_block_url( block )
        self.index_items[ name ] = url
    def  index_exit( self ):
        # block_index already contains the sorted list of index names
        count = len( self.block_index )
        rows  = (count + self.columns - 1)/self.columns
        print "<center><table border=0 cellpadding=0 cellspacing=0>"
        for r in range(rows):
            line = "<tr>"
            for c in range(self.columns):
                i = r + c*rows
                if i < count:
                    bname = self.block_index[ r + c*rows ]
                    url   = self.index_items[ bname ]
                    line = line + '<td><a href="' + url + '">' + bname + '</a></td>'
                else:
                    line = line + '<td></td>'
            line = line + "</tr>"
            print line
        print "</table></center>"
        print self.html_footer
        self.index_items = {}
    def  index_dump( self, index_filename = None ):
        if index_filename == None:
            index_filename = self.file_prefix + "index.html"
        Formatter.index_dump( self, index_filename )
    #
    #  Formatting the table of content
    #    
    def  toc_enter( self ):
        print self.html_header
        print "<center><h1>Table of Contents</h1></center>"
    def  toc_chapter_enter( self, chapter ):
        print  chapter_header + string.join(chapter.title) + chapter_inter
        print "<table cellpadding=5>"
    def  toc_section_enter( self, section ):
        print "<tr valign=top><td>"
        print '<a href="' + self.make_section_url( section ) + '">' + \
               section.title + '</a></td><td>'
        print self.make_html_para( section.abstract )
    def  toc_section_exit( self, section ):
        print "</td></tr>"
    def  toc_chapter_exit( self, chapter ):
        print "</table>"
        print  chapter_footer
    def  toc_index( self, index_filename ):
        print chapter_header + '<a href="' + index_filename + '">Global Index</a>' + chapter_inter + chapter_footer
    def  toc_exit( self ):
        print "</table></center>"
        print self.html_footer
    def  toc_dump( self, toc_filename = None, index_filename = None ):
        if toc_filename == None:
            toc_filename = self.file_prefix + "toc.html"
        if index_filename == None:
            index_filename = self.file_prefix + "index.html"
        Formatter.toc_dump( self, toc_filename, index_filename )
    #
    #  Formatting sections
    #
    def  section_enter( self, section ):
        print self.html_header
        print section_title_header
        print section.title
        print section_title_footer
        # print section synopsys
        print section_synopsis_header
        print "<center><table cellspacing=5 cellpadding=0 border=0>"
        maxwidth = 0
        for b in section.blocks.values():
            if len(b.name) > maxwidth:
                maxwidth = len(b.name)
        width  = 130  # XXX magic number
        columns = width / maxwidth
        if columns < 1:
            columns = 1
        count   = len(section.block_names)
        rows    = (count + columns-1)/columns
        for r in range(rows):
            line = "<tr>"
            for c in range(columns):
                i = r + c*rows
                line = line + '<td></td><td>'
                if i < count:
                    name = section.block_names[i]
                    line = line + '<a href="#' + name + '">' + name + '</a>'
                line = line + '</td>'
            line = line + "</tr>"
            print line
        print "</table></center><br><br>"
        print section_synopsis_footer
        print description_header
        print self.make_html_items( section.description )
        print description_footer
    def  block_enter( self, block ):
        print block_header
        # place html anchor if needed
        if block.name:
            print '<a name="' + block.name + '">'
            print "<h4>" + block.name + "</h4>"
            print "</a>"
        # dump the block C source lines now
        if block.code:
            print source_header
            for l in block.code:
                print self.html_source_quote( l, block.name )
            print source_footer
    def  markup_enter( self, markup, block ):
        if markup.tag == "description":
            print description_header
        else:
            print marker_header + markup.tag + marker_inter
        self.print_html_markup( markup )
    def  markup_exit( self, markup, block ):
        if markup.tag == "description":
            print description_footer
        else:
            print marker_footer
    def  block_exit( self, block ):
        print block_footer
    def  section_exit( self, section ):
        print html_footer
    def section_dump_all( self ):
        for section in self.sections:
            self.section_dump( section, self.file_prefix + section.name + '.html' )
--- a/src/tools/docmaker/utils.py
+++ b/src/tools/docmaker/utils.py
@ -0,0 +1,86 @@
 import string, sys
 # This function is used to sort the index.  It is a simple lexicographical
 # sort, except that it places capital letters before lowercase ones.
 #
 def index_sort( s1, s2 ):
    if not s1:
        return -1
    if not s2:
        return 1
    l1 = len( s1 )
    l2 = len( s2 )
    m1 = string.lower( s1 )
    m2 = string.lower( s2 )
    for i in range( l1 ):
        if i >= l2 or m1[i] > m2[i]:
            return 1
        if m1[i] < m2[i]:
            return -1
        if s1[i] < s2[i]:
            return -1
        if s1[i] > s2[i]:
            return 1
    if l2 > l1:
        return -1
    return 0
 # Sort input_list, placing the elements of order_list in front.
 #
 def sort_order_list( input_list, order_list ):
    new_list = order_list[:]
    for id in input_list:
        if not id in order_list:
            new_list.append( id )
    return new_list
 # current output directory
 #
 output_dir = None
 # Open the standard output to a given project documentation file.  Use
 # "output_dir" to determine the filename location if necessary and save the
 # old stdout in a tuple that is returned by this function.
 #
 def open_output( filename ):
    global output_dir
    if output_dir and output_dir != "":
        filename = output_dir + os.sep + filename
    old_stdout = sys.stdout
    new_file   = open( filename, "w" )
    sys.stdout = new_file
    return ( new_file, old_stdout )
 # Close the output that was returned by "close_output".
 #
 def close_output( output ):
    output[0].close()
    sys.stdout = output[1]
 # Check output directory.
 #
 def check_output( ):
    global output_dir
    if output_dir:
        if output_dir != "":
            if not os.path.isdir( output_dir ):
                sys.stderr.write( "argument" + " '" + output_dir + "' " +
                                  "is not a valid directory" )
                sys.exit( 2 )
        else:
            output_dir = None