freetype2/src/tools/docwriter/content.py

#
#  content.py
#
#    Parse comment blocks to build content blocks (library file).
#
#  Copyright 2002-2018 by
#  David Turner.
#
#  This file is part of the FreeType project, and may only be used,
#  modified, and distributed under the terms of the FreeType project
#  license, LICENSE.TXT.  By continuing to use, modify, or distribute
#  this file you indicate that you have read the license and
#  understand and accept it fully.

"""This module contains routines to parse documentation comment blocks,
building more structured objects out of them."""

from __future__ import print_function

import logging
import re

import sources
import utils

log = logging.getLogger( __name__ )

#
# Regular expressions to detect code sequences.  `Code sequences' are simply
# code fragments embedded in '```' and '```', as demonstrated in the following
# example. The language can optionally be specified on the first line after the
# backticks, and is used for syntax highlighting.
#
#   ```c
#     x = y + z;
#     if ( zookoo == 2 )
#     {
#       foobar();
#     }
#   ```
#
# Note that the indentation of the first opening backticks and the last closing
# backticks must be exactly the same.  The code sequence itself should have a
# larger indentation than the surrounding braces.
#
re_code_start   = re.compile( r"(\s*)```([\w\+\#\-]+)?\s*$" )
re_code_end     = re.compile( r"(\s*)```\s*$" )

#
# A regular expression to isolate identifiers from other text.  Two syntax
# forms are supported:
#
#   <name>
#   <name>[<id>]
#
# where both `<name>' and `<id>' consist of alphanumeric characters, `_',
# and `-'.  Use `<id>' if there are multiple, valid `<name>' entries; in the
# index, `<id>' will be appended in parentheses.
#
# For example,
#
#   stem_darkening[autofit]
#
# becomes `stem_darkening (autofit)' in the index.
#
re_identifier = re.compile( r"""
                              ((?:\w|-)+
                               (?:\[(?:\w|-)+\])?)
                            """, re.VERBOSE )


#
# We collect macro names ending in `_H' (group 1), as defined in
# `freetype/config/ftheader.h'.  While outputting the object data, we use
# this info together with the object's file location (group 2) to emit the
# appropriate header file macro and its associated file name before the
# object itself.
#
# Example:
#
#   #define FT_FREETYPE_H <freetype.h>
#
re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' )


################################################################
##
##  DOC CODE CLASS
##
##  The `DocCode' class is used to store source code lines.
##
##  `self.lines' contains a set of source code lines that will be dumped as
##  HTML in a <PRE> tag.
##
##  The object is filled line by line by the parser; it strips the leading
##  `margin' space from each input line before storing it in `self.lines'.
##
class  DocCode( object ):

    def  __init__( self, margin, lines, lang = None ):
        self.lines = []
        self.words = None
        self.lang = lang

        # remove margin spaces
        for l in lines:
            if l[:margin].strip(  ) == "":
                l = l[margin:]
            self.lines.append( l )

    def  dump( self, prefix = "" ):
        lines = self.dump_lines( 0 )
        for l in lines:
            print( prefix + l )

    def  dump_lines( self, margin = 0 ):
        result = []
        for l in self.lines:
            result.append( " " * margin + l )
        return result


################################################################
##
##  DOC PARA CLASS
##
##  `Normal' text paragraphs are stored in the `DocPara' class.
##
##  `self.words' contains the list of words that make up the paragraph.
##
class  DocPara( object ):

    def  __init__( self, lines, margin = -1 ):
        self.lines  = None
        self.words  = []
        self.indent = len( lines[0] ) - len( lines[0].lstrip() )
        first_line  = lines[0].strip()
        indent_diff = self.indent - margin

        if margin > 0 and indent_diff >= 4:
            # if the first line has an indentation >= 4,
            # add those spaces to it.
            indent_list = [''] * indent_diff
            self.words.extend( indent_list )
            # This para is indented, the next may also be relative
            # to the parent, so set indent to margin
            self.indent = margin

        self.words.extend( first_line.split() )

        for l in lines[1:]:
            l = l.strip()
            self.words.extend( l.split() )

    def  dump( self, prefix = "" ):
        lines = self.dump_lines( 0 )
        for l in lines:
            print( prefix + l )

    def  dump_lines( self, margin = 0, width = 60 ):
        cur    = ""  # current line
        col    = 0   # current width
        result = []

        for word in self.words:
            ln = len( word )
            if col > 0:
                ln = ln + 1

            if col + ln > width:
                result.append( " " * margin + cur )
                cur = word
                col = len( word )
            else:
                if col > 0:
                    cur = cur + " "
                cur = cur + word
                col = col + ln

        if col > 0:
            result.append( " " * margin + cur )

        return result


################################################################
##
##  DOC FIELD CLASS
##
##  The `DocField' class stores a list containing either `DocPara' or
##  `DocCode' objects.  Each DocField object also has an optional `name'
##  that is used when the object corresponds to a field or value definition.
##
class  DocField( object ):

    def  __init__( self, name, lines ):
        self.name  = name  # can be `None' for normal paragraphs/sources
        self.items = []    # list of items

        mode_none  = 0     # start parsing mode
        mode_code  = 1     # parsing code sequences

        margin     = -1    # current code sequence indentation
        cur_lines  = []
        indent     = -1
        lang       = None

        # analyze the markup lines to check whether they contain paragraphs,
        # code sequences, or fields definitions
        #
        mode  = mode_none

        for l in lines:
            # are we parsing a code sequence?
            if mode == mode_code:
                m = re_code_end.match( l )
                if m and len( m.group( 1 ) ) <= margin:
                    # that's it, we finished the code sequence
                    code = DocCode( 0, cur_lines, lang )
                    self.items.append( code )
                    margin    = -1
                    cur_lines = []
                    mode      = mode_none
                else:
                    # otherwise continue the code sequence
                    cur_lines.append( l[margin:] )
            else:
                # start of code sequence?
                m = re_code_start.match( l )
                if m:
                    # save current lines
                    if cur_lines:
                        para = DocPara( cur_lines )
                        self.items.append( para )
                        cur_lines = []

                    # switch to code extraction mode
                    margin = len( m.group( 1 ) )
                    lang   = m.group( 2 )
                    mode   = mode_code
                else:
                    if not l.split() and cur_lines:
                        # if the line is empty, we end the current paragraph,
                        # if any
                        para = DocPara( cur_lines, indent )
                        self.items.append( para )
                        # store indent value of current para
                        indent = para.indent
                        cur_lines = []
                    else:
                        # otherwise, simply add the line to the current
                        # paragraph
                        cur_lines.append( l )

        if mode == mode_code:
            # unexpected end of code sequence
            code = DocCode( margin, cur_lines, lang )
            self.items.append( code )
        elif cur_lines:
            para = DocPara( cur_lines, indent )
            self.items.append( para )

    def  dump( self, prefix = "" ):
        first = 1
        for p in self.items:
            if not first:
                print( "" )
            p.dump( prefix )
            first = 0

    def  dump_lines( self, margin = 0, width = 60 ):
        result = []
        nl     = None

        for p in self.items:
            if nl:
                result.append( "" )

            result.extend( p.dump_lines( margin, width ) )
            nl = 1

        return result


#
# A regular expression to detect field definitions.
#
# Examples:
#
#   foo     ::
#   foo.bar ::
#
re_field = re.compile( r"""
                         \s*
                           (
                             \w*
                           |
                             \w (\w | \.)* \w
                           )
                         \s* ::
                       """, re.VERBOSE )


################################################################
##
##  DOC MARKUP CLASS
##
class  DocMarkup( object ):

    def  __init__( self, tag, lines ):
        self.tag    = tag.lower()
        self.fields = []

        cur_lines = []
        field     = None

        for l in lines:
            m = re_field.match( l )
            if m:
                # We detected the start of a new field definition.

                # first, save the current one
                if cur_lines:
                    f = DocField( field, cur_lines )
                    self.fields.append( f )
                    cur_lines = []
                    field     = None

                field     = m.group( 1 )   # record field name
                ln        = len( m.group( 0 ) )
                l         = " " * ln + l[ln:]
                cur_lines = [l]
            else:
                cur_lines.append( l )

        if field or cur_lines:
            f = DocField( field, cur_lines )
            self.fields.append( f )

    def  get_name( self ):
        try:
            return self.fields[0].items[0].words[0]
        except Exception:
            return None

    def  dump( self, margin ):
        print( " " * margin + "<" + self.tag + ">" )
        for f in self.fields:
            f.dump( "  " )
        print( " " * margin + "</" + self.tag + ">" )


################################################################
##
##  DOC CHAPTER CLASS
##
class  DocChapter( object ):

    def  __init__( self, block ):
        self.block    = block
        self.sections = []
        if block:
            self.name  = block.name
            self.title = block.get_markup_words( "title" )
            self.order = block.get_markup_words( "sections" )
        else:
            self.name  = "Other"
            self.title = "Miscellaneous".split()
            self.order = []


################################################################
##
##  DOC SECTION CLASS
##
class  DocSection( object ):

    def  __init__( self, name = "Other" ):
        self.name        = name
        self.blocks      = {}
        self.block_names = []  # ordered block names in section
        self.defs        = []
        self.abstract    = ""
        self.description = ""
        self.order       = []
        self.title       = "ERROR"
        self.chapter     = None

    def  add_def( self, block ):
        self.defs.append( block )

    def  add_block( self, block ):
        self.block_names.append( block.name )
        self.blocks[block.name] = block

    def  process( self ):
        # look up one block that contains a valid section description
        for block in self.defs:
            title = block.get_markup_text( "title" )
            if title:
                self.title       = title
                self.abstract    = block.get_markup_words( "abstract" )
                self.description = block.get_markup_items( "description" )
                self.order       = block.get_markup_words_all( "order" )
                return

    def  reorder( self ):
        self.block_names = utils.sort_order_list( self.block_names,
                                                  self.order )


################################################################
##
##  CONTENT PROCESSOR CLASS
##
class  ContentProcessor( object ):

    def  __init__( self ):
        """Initialize a block content processor."""
        self.reset()

        self.sections = {}    # dictionary of documentation sections
        self.section  = None  # current documentation section

        self.chapters = []    # list of chapters

        self.headers  = {}    # dictionary of header macros

    def  set_section( self, section_name ):
        """Set current section during parsing."""
        if not section_name in self.sections:
            section = DocSection( section_name )
            self.sections[section_name] = section
            self.section                = section
        else:
            self.section = self.sections[section_name]

    def  add_chapter( self, block ):
        chapter = DocChapter( block )
        self.chapters.append( chapter )

    def  reset( self ):
        """Reset the content processor for a new block."""
        self.markups      = []
        self.markup       = None
        self.markup_lines = []

    def  add_markup( self ):
        """Add a new markup section."""
        if self.markup and self.markup_lines:

            # get rid of last line of markup if it's empty
            marks = self.markup_lines
            if len( marks ) > 0 and not marks[-1].strip():
                self.markup_lines = marks[:-1]

            m = DocMarkup( self.markup, self.markup_lines )

            self.markups.append( m )

            self.markup       = None
            self.markup_lines = []

    def  process_content( self, content ):
        """Process a block content and return a list of DocMarkup objects
        corresponding to it."""
        first        = 1

        margin  = -1
        in_code = 0

        for line in content:
            if in_code:
                m = re_code_end.match( line )
                if m and len( m.group( 1 ) ) <= margin:
                    in_code = 0
                    margin  = -1
            else:
                m = re_code_start.match( line )
                if m:
                    in_code = 1
                    margin  = len( m.group( 1 ) )

            found = None

            if not in_code:
                for t in sources.re_markup_tags:
                    m = t.match( line )
                    if m:
                        found  = m.group( 1 ).lower()
                        prefix = len( m.group( 0 ) )
                        # remove markup from line
                        line   = " " * prefix + line[prefix:]
                        break

            # is it the start of a new markup section ?
            if found:
                first = 0
                self.add_markup()  # add current markup content
                self.markup = found
                if len( line.strip() ) > 0:
                    self.markup_lines.append( line )
            elif first == 0:
                self.markup_lines.append( line )

        self.add_markup()

        return self.markups

    def  parse_sources( self, source_processor ):
        blocks = source_processor.blocks
        count  = len( blocks )

        for n in range( count ):
            source = blocks[n]
            if source.content:
                # this is a documentation comment, we need to catch
                # all following normal blocks in the "follow" list
                #
                follow = []
                m = n + 1
                while m < count and not blocks[m].content:
                    follow.append( blocks[m] )
                    m = m + 1

                DocBlock( source, follow, self )

    def  finish( self ):
        # process all sections to extract their abstract, description
        # and ordered list of items
        #
        for sec in self.sections.values():
            sec.process()

        # process chapters to check that all sections are correctly
        # listed there
        for chap in self.chapters:
            for sec in chap.order:
                if sec in self.sections:
                    section = self.sections[sec]
                    section.chapter = chap
                    section.reorder()
                    chap.sections.append( section )
                else:
                    log.warn( "Chapter '%s' in %s"
                        " lists unknown section '%s'",
                        chap.name, chap.block.location(), sec )

        # check that all sections are in a chapter
        #
        others = []
        for sec in self.sections.values():
            if not sec.chapter:
                sec.reorder()
                others.append( sec )

        # create a new special chapter for all remaining sections
        # when necessary
        #
        if others:
            chap = DocChapter( None )
            # Assign the chapter to all sections
            for section in others:
                section.chapter = chap
            chap.sections = others
            self.chapters.append( chap )


################################################################
##
##  DOC BLOCK CLASS
##
class  DocBlock( object ):

    def  __init__( self, source, follow, processor ):
        processor.reset()

        self.source  = source
        self.code    = []
        self.type    = "ERRTYPE"
        self.name    = "ERRNAME"
        self.section = processor.section
        self.markups = processor.process_content( source.content )

        # compute block type from first markup tag
        try:
            self.type = self.markups[0].tag
        except Exception:
            pass

        # compute block name from first markup paragraph
        try:
            markup = self.markups[0]
            para   = markup.fields[0].items[0]
            name   = para.words[0]
            m = re_identifier.match( name )
            if m:
                name = m.group( 1 )
            self.name = name
        except Exception:
            pass

        if self.type == "section":
            # detect new section starts
            processor.set_section( self.name )
            processor.section.add_def( self )
        elif self.type == "chapter":
            # detect new chapter
            processor.add_chapter( self )
        else:
            processor.section.add_block( self )

        # now, compute the source lines relevant to this documentation
        # block. We keep normal comments in for obvious reasons (??)
        source = []
        for b in follow:
            if b.format:
                break
            for l in b.lines:
                # collect header macro definitions
                m = re_header_macro.match( l )
                if m:
                    processor.headers[m.group( 2 )] = m.group( 1 )

                # we use "/* */" as a separator
                if sources.re_source_sep.match( l ):
                    break
                source.append( l )

        # now strip the leading and trailing empty lines from the sources
        start = 0
        end   = len( source ) - 1

        while start < end and not source[start].strip():
            start = start + 1

        while start < end and not source[end].strip():
            end = end - 1

        if start == end and not source[start].strip():
            self.code = []
        else:
            self.code = source[start:end + 1]

    def  location( self ):
        return self.source.location()

    def  get_markup( self, tag_name ):
        """Return the DocMarkup corresponding to a given tag in a block."""
        for m in self.markups:
            if m.tag == tag_name.lower():
                return m
        return None

    def  get_markup_words( self, tag_name ):
        try:
            m = self.get_markup( tag_name )
            return m.fields[0].items[0].words
        except Exception:
            return []

    def  get_markup_words_all( self, tag_name ):
        try:
            m = self.get_markup( tag_name )
            words = []
            for item in m.fields[0].items:
                # We honour empty lines in an `<Order>' section element by
                # adding the sentinel `/empty/'.  The formatter should then
                # convert it to an appropriate representation in the
                # `section_enter' function.
                words += item.words
                words.append( "/empty/" )
            return words
        except Exception:
            return []

    def  get_markup_text( self, tag_name ):
        result = self.get_markup_words( tag_name )
        return " ".join( result )

    def  get_markup_items( self, tag_name ):
        try:
            m = self.get_markup( tag_name )
            return m.fields[0].items
        except Exception:
            return None

# eof