690 lines
21 KiB
Python
690 lines
21 KiB
Python
#
|
|
# content.py
|
|
#
|
|
# Parse comment blocks to build content blocks (library file).
|
|
#
|
|
# Copyright 2002-2018 by
|
|
# David Turner.
|
|
#
|
|
# This file is part of the FreeType project, and may only be used,
|
|
# modified, and distributed under the terms of the FreeType project
|
|
# license, LICENSE.TXT. By continuing to use, modify, or distribute
|
|
# this file you indicate that you have read the license and
|
|
# understand and accept it fully.
|
|
|
|
"""This module contains routines to parse documentation comment blocks,
|
|
building more structured objects out of them."""
|
|
|
|
from __future__ import print_function
|
|
|
|
import logging
|
|
import re
|
|
|
|
import sources
|
|
import utils
|
|
|
|
log = logging.getLogger( __name__ )
|
|
|
|
#
|
|
# Regular expressions to detect code sequences. `Code sequences' are simply
|
|
# code fragments embedded in '```' and '```', as demonstrated in the following
|
|
# example. The language can optionally be specified on the first line after the
|
|
# backticks, and is used for syntax highlighting.
|
|
#
|
|
# ```c
|
|
# x = y + z;
|
|
# if ( zookoo == 2 )
|
|
# {
|
|
# foobar();
|
|
# }
|
|
# ```
|
|
#
|
|
# Note that the indentation of the first opening backticks and the last closing
|
|
# backticks must be exactly the same. The code sequence itself should have a
|
|
# larger indentation than the surrounding braces.
|
|
#
|
|
re_code_start = re.compile( r"(\s*)```([\w\+\#\-]+)?\s*$" )
|
|
re_code_end = re.compile( r"(\s*)```\s*$" )
|
|
|
|
#
|
|
# A regular expression to isolate identifiers from other text. Two syntax
|
|
# forms are supported:
|
|
#
|
|
# <name>
|
|
# <name>[<id>]
|
|
#
|
|
# where both `<name>' and `<id>' consist of alphanumeric characters, `_',
|
|
# and `-'. Use `<id>' if there are multiple, valid `<name>' entries; in the
|
|
# index, `<id>' will be appended in parentheses.
|
|
#
|
|
# For example,
|
|
#
|
|
# stem_darkening[autofit]
|
|
#
|
|
# becomes `stem_darkening (autofit)' in the index.
|
|
#
|
|
re_identifier = re.compile( r"""
|
|
((?:\w|-)+
|
|
(?:\[(?:\w|-)+\])?)
|
|
""", re.VERBOSE )
|
|
|
|
|
|
#
|
|
# We collect macro names ending in `_H' (group 1), as defined in
|
|
# `freetype/config/ftheader.h'. While outputting the object data, we use
|
|
# this info together with the object's file location (group 2) to emit the
|
|
# appropriate header file macro and its associated file name before the
|
|
# object itself.
|
|
#
|
|
# Example:
|
|
#
|
|
# #define FT_FREETYPE_H <freetype.h>
|
|
#
|
|
re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' )
|
|
|
|
|
|
################################################################
|
|
##
|
|
## DOC CODE CLASS
|
|
##
|
|
## The `DocCode' class is used to store source code lines.
|
|
##
|
|
## `self.lines' contains a set of source code lines that will be dumped as
|
|
## HTML in a <PRE> tag.
|
|
##
|
|
## The object is filled line by line by the parser; it strips the leading
|
|
## `margin' space from each input line before storing it in `self.lines'.
|
|
##
|
|
class DocCode( object ):
|
|
|
|
def __init__( self, margin, lines, lang = None ):
|
|
self.lines = []
|
|
self.words = None
|
|
self.lang = lang
|
|
|
|
# remove margin spaces
|
|
for l in lines:
|
|
if l[:margin].strip( ) == "":
|
|
l = l[margin:]
|
|
self.lines.append( l )
|
|
|
|
def dump( self, prefix = "" ):
|
|
lines = self.dump_lines( 0 )
|
|
for l in lines:
|
|
print( prefix + l )
|
|
|
|
def dump_lines( self, margin = 0 ):
|
|
result = []
|
|
for l in self.lines:
|
|
result.append( " " * margin + l )
|
|
return result
|
|
|
|
|
|
|
|
################################################################
|
|
##
|
|
## DOC PARA CLASS
|
|
##
|
|
## `Normal' text paragraphs are stored in the `DocPara' class.
|
|
##
|
|
## `self.words' contains the list of words that make up the paragraph.
|
|
##
|
|
class DocPara( object ):
|
|
|
|
def __init__( self, lines, margin = -1 ):
|
|
self.lines = None
|
|
self.words = []
|
|
self.indent = len( lines[0] ) - len( lines[0].lstrip() )
|
|
first_line = lines[0].strip()
|
|
indent_diff = self.indent - margin
|
|
|
|
if margin > 0 and indent_diff >= 4:
|
|
# if the first line has an indentation >= 4,
|
|
# add those spaces to it.
|
|
indent_list = [''] * indent_diff
|
|
self.words.extend( indent_list )
|
|
# This para is indented, the next may also be relative
|
|
# to the parent, so set indent to margin
|
|
self.indent = margin
|
|
|
|
self.words.extend( first_line.split() )
|
|
|
|
for l in lines[1:]:
|
|
l = l.strip()
|
|
self.words.extend( l.split() )
|
|
|
|
def dump( self, prefix = "" ):
|
|
lines = self.dump_lines( 0 )
|
|
for l in lines:
|
|
print( prefix + l )
|
|
|
|
def dump_lines( self, margin = 0, width = 60 ):
|
|
cur = "" # current line
|
|
col = 0 # current width
|
|
result = []
|
|
|
|
for word in self.words:
|
|
ln = len( word )
|
|
if col > 0:
|
|
ln = ln + 1
|
|
|
|
if col + ln > width:
|
|
result.append( " " * margin + cur )
|
|
cur = word
|
|
col = len( word )
|
|
else:
|
|
if col > 0:
|
|
cur = cur + " "
|
|
cur = cur + word
|
|
col = col + ln
|
|
|
|
if col > 0:
|
|
result.append( " " * margin + cur )
|
|
|
|
return result
|
|
|
|
|
|
################################################################
|
|
##
|
|
## DOC FIELD CLASS
|
|
##
|
|
## The `DocField' class stores a list containing either `DocPara' or
|
|
## `DocCode' objects. Each DocField object also has an optional `name'
|
|
## that is used when the object corresponds to a field or value definition.
|
|
##
|
|
class DocField( object ):
|
|
|
|
def __init__( self, name, lines ):
|
|
self.name = name # can be `None' for normal paragraphs/sources
|
|
self.items = [] # list of items
|
|
|
|
mode_none = 0 # start parsing mode
|
|
mode_code = 1 # parsing code sequences
|
|
|
|
margin = -1 # current code sequence indentation
|
|
cur_lines = []
|
|
indent = -1
|
|
lang = None
|
|
|
|
# analyze the markup lines to check whether they contain paragraphs,
|
|
# code sequences, or fields definitions
|
|
#
|
|
mode = mode_none
|
|
|
|
for l in lines:
|
|
# are we parsing a code sequence?
|
|
if mode == mode_code:
|
|
m = re_code_end.match( l )
|
|
if m and len( m.group( 1 ) ) <= margin:
|
|
# that's it, we finished the code sequence
|
|
code = DocCode( 0, cur_lines, lang )
|
|
self.items.append( code )
|
|
margin = -1
|
|
cur_lines = []
|
|
mode = mode_none
|
|
else:
|
|
# otherwise continue the code sequence
|
|
cur_lines.append( l[margin:] )
|
|
else:
|
|
# start of code sequence?
|
|
m = re_code_start.match( l )
|
|
if m:
|
|
# save current lines
|
|
if cur_lines:
|
|
para = DocPara( cur_lines )
|
|
self.items.append( para )
|
|
cur_lines = []
|
|
|
|
# switch to code extraction mode
|
|
margin = len( m.group( 1 ) )
|
|
lang = m.group( 2 )
|
|
mode = mode_code
|
|
else:
|
|
if not l.split() and cur_lines:
|
|
# if the line is empty, we end the current paragraph,
|
|
# if any
|
|
para = DocPara( cur_lines, indent )
|
|
self.items.append( para )
|
|
# store indent value of current para
|
|
indent = para.indent
|
|
cur_lines = []
|
|
else:
|
|
# otherwise, simply add the line to the current
|
|
# paragraph
|
|
cur_lines.append( l )
|
|
|
|
if mode == mode_code:
|
|
# unexpected end of code sequence
|
|
code = DocCode( margin, cur_lines, lang )
|
|
self.items.append( code )
|
|
elif cur_lines:
|
|
para = DocPara( cur_lines, indent )
|
|
self.items.append( para )
|
|
|
|
def dump( self, prefix = "" ):
|
|
first = 1
|
|
for p in self.items:
|
|
if not first:
|
|
print( "" )
|
|
p.dump( prefix )
|
|
first = 0
|
|
|
|
def dump_lines( self, margin = 0, width = 60 ):
|
|
result = []
|
|
nl = None
|
|
|
|
for p in self.items:
|
|
if nl:
|
|
result.append( "" )
|
|
|
|
result.extend( p.dump_lines( margin, width ) )
|
|
nl = 1
|
|
|
|
return result
|
|
|
|
|
|
#
|
|
# A regular expression to detect field definitions.
|
|
#
|
|
# Examples:
|
|
#
|
|
# foo ::
|
|
# foo.bar ::
|
|
#
|
|
re_field = re.compile( r"""
|
|
\s*
|
|
(
|
|
\w*
|
|
|
|
|
\w (\w | \.)* \w
|
|
)
|
|
\s* ::
|
|
""", re.VERBOSE )
|
|
|
|
|
|
################################################################
|
|
##
|
|
## DOC MARKUP CLASS
|
|
##
|
|
class DocMarkup( object ):
|
|
|
|
def __init__( self, tag, lines ):
|
|
self.tag = tag.lower()
|
|
self.fields = []
|
|
|
|
cur_lines = []
|
|
field = None
|
|
|
|
for l in lines:
|
|
m = re_field.match( l )
|
|
if m:
|
|
# We detected the start of a new field definition.
|
|
|
|
# first, save the current one
|
|
if cur_lines:
|
|
f = DocField( field, cur_lines )
|
|
self.fields.append( f )
|
|
cur_lines = []
|
|
field = None
|
|
|
|
field = m.group( 1 ) # record field name
|
|
ln = len( m.group( 0 ) )
|
|
l = " " * ln + l[ln:]
|
|
cur_lines = [l]
|
|
else:
|
|
cur_lines.append( l )
|
|
|
|
if field or cur_lines:
|
|
f = DocField( field, cur_lines )
|
|
self.fields.append( f )
|
|
|
|
def get_name( self ):
|
|
try:
|
|
return self.fields[0].items[0].words[0]
|
|
except Exception:
|
|
return None
|
|
|
|
def dump( self, margin ):
|
|
print( " " * margin + "<" + self.tag + ">" )
|
|
for f in self.fields:
|
|
f.dump( " " )
|
|
print( " " * margin + "</" + self.tag + ">" )
|
|
|
|
|
|
################################################################
|
|
##
|
|
## DOC CHAPTER CLASS
|
|
##
|
|
class DocChapter( object ):
|
|
|
|
def __init__( self, block ):
|
|
self.block = block
|
|
self.sections = []
|
|
if block:
|
|
self.name = block.name
|
|
self.title = block.get_markup_words( "title" )
|
|
self.order = block.get_markup_words( "sections" )
|
|
else:
|
|
self.name = "Other"
|
|
self.title = "Miscellaneous".split()
|
|
self.order = []
|
|
|
|
|
|
################################################################
|
|
##
|
|
## DOC SECTION CLASS
|
|
##
|
|
class DocSection( object ):
|
|
|
|
def __init__( self, name = "Other" ):
|
|
self.name = name
|
|
self.blocks = {}
|
|
self.block_names = [] # ordered block names in section
|
|
self.defs = []
|
|
self.abstract = ""
|
|
self.description = ""
|
|
self.order = []
|
|
self.title = "ERROR"
|
|
self.chapter = None
|
|
|
|
def add_def( self, block ):
|
|
self.defs.append( block )
|
|
|
|
def add_block( self, block ):
|
|
self.block_names.append( block.name )
|
|
self.blocks[block.name] = block
|
|
|
|
def process( self ):
|
|
# look up one block that contains a valid section description
|
|
for block in self.defs:
|
|
title = block.get_markup_text( "title" )
|
|
if title:
|
|
self.title = title
|
|
self.abstract = block.get_markup_words( "abstract" )
|
|
self.description = block.get_markup_items( "description" )
|
|
self.order = block.get_markup_words_all( "order" )
|
|
return
|
|
|
|
def reorder( self ):
|
|
self.block_names = utils.sort_order_list( self.block_names,
|
|
self.order )
|
|
|
|
|
|
################################################################
|
|
##
|
|
## CONTENT PROCESSOR CLASS
|
|
##
|
|
class ContentProcessor( object ):
|
|
|
|
def __init__( self ):
|
|
"""Initialize a block content processor."""
|
|
self.reset()
|
|
|
|
self.sections = {} # dictionary of documentation sections
|
|
self.section = None # current documentation section
|
|
|
|
self.chapters = [] # list of chapters
|
|
|
|
self.headers = {} # dictionary of header macros
|
|
|
|
def set_section( self, section_name ):
|
|
"""Set current section during parsing."""
|
|
if not section_name in self.sections:
|
|
section = DocSection( section_name )
|
|
self.sections[section_name] = section
|
|
self.section = section
|
|
else:
|
|
self.section = self.sections[section_name]
|
|
|
|
def add_chapter( self, block ):
|
|
chapter = DocChapter( block )
|
|
self.chapters.append( chapter )
|
|
|
|
def reset( self ):
|
|
"""Reset the content processor for a new block."""
|
|
self.markups = []
|
|
self.markup = None
|
|
self.markup_lines = []
|
|
|
|
def add_markup( self ):
|
|
"""Add a new markup section."""
|
|
if self.markup and self.markup_lines:
|
|
|
|
# get rid of last line of markup if it's empty
|
|
marks = self.markup_lines
|
|
if len( marks ) > 0 and not marks[-1].strip():
|
|
self.markup_lines = marks[:-1]
|
|
|
|
m = DocMarkup( self.markup, self.markup_lines )
|
|
|
|
self.markups.append( m )
|
|
|
|
self.markup = None
|
|
self.markup_lines = []
|
|
|
|
def process_content( self, content ):
|
|
"""Process a block content and return a list of DocMarkup objects
|
|
corresponding to it."""
|
|
first = 1
|
|
|
|
margin = -1
|
|
in_code = 0
|
|
|
|
for line in content:
|
|
if in_code:
|
|
m = re_code_end.match( line )
|
|
if m and len( m.group( 1 ) ) <= margin:
|
|
in_code = 0
|
|
margin = -1
|
|
else:
|
|
m = re_code_start.match( line )
|
|
if m:
|
|
in_code = 1
|
|
margin = len( m.group( 1 ) )
|
|
|
|
found = None
|
|
|
|
if not in_code:
|
|
for t in sources.re_markup_tags:
|
|
m = t.match( line )
|
|
if m:
|
|
found = m.group( 1 ).lower()
|
|
prefix = len( m.group( 0 ) )
|
|
# remove markup from line
|
|
line = " " * prefix + line[prefix:]
|
|
break
|
|
|
|
# is it the start of a new markup section ?
|
|
if found:
|
|
first = 0
|
|
self.add_markup() # add current markup content
|
|
self.markup = found
|
|
if len( line.strip() ) > 0:
|
|
self.markup_lines.append( line )
|
|
elif first == 0:
|
|
self.markup_lines.append( line )
|
|
|
|
self.add_markup()
|
|
|
|
return self.markups
|
|
|
|
def parse_sources( self, source_processor ):
|
|
blocks = source_processor.blocks
|
|
count = len( blocks )
|
|
|
|
for n in range( count ):
|
|
source = blocks[n]
|
|
if source.content:
|
|
# this is a documentation comment, we need to catch
|
|
# all following normal blocks in the "follow" list
|
|
#
|
|
follow = []
|
|
m = n + 1
|
|
while m < count and not blocks[m].content:
|
|
follow.append( blocks[m] )
|
|
m = m + 1
|
|
|
|
DocBlock( source, follow, self )
|
|
|
|
def finish( self ):
|
|
# process all sections to extract their abstract, description
|
|
# and ordered list of items
|
|
#
|
|
for sec in self.sections.values():
|
|
sec.process()
|
|
|
|
# process chapters to check that all sections are correctly
|
|
# listed there
|
|
for chap in self.chapters:
|
|
for sec in chap.order:
|
|
if sec in self.sections:
|
|
section = self.sections[sec]
|
|
section.chapter = chap
|
|
section.reorder()
|
|
chap.sections.append( section )
|
|
else:
|
|
log.warn( "Chapter '%s' in %s"
|
|
" lists unknown section '%s'",
|
|
chap.name, chap.block.location(), sec )
|
|
|
|
# check that all sections are in a chapter
|
|
#
|
|
others = []
|
|
for sec in self.sections.values():
|
|
if not sec.chapter:
|
|
sec.reorder()
|
|
others.append( sec )
|
|
|
|
# create a new special chapter for all remaining sections
|
|
# when necessary
|
|
#
|
|
if others:
|
|
chap = DocChapter( None )
|
|
# Assign the chapter to all sections
|
|
for section in others:
|
|
section.chapter = chap
|
|
chap.sections = others
|
|
self.chapters.append( chap )
|
|
|
|
|
|
################################################################
|
|
##
|
|
## DOC BLOCK CLASS
|
|
##
|
|
class DocBlock( object ):
|
|
|
|
def __init__( self, source, follow, processor ):
|
|
processor.reset()
|
|
|
|
self.source = source
|
|
self.code = []
|
|
self.type = "ERRTYPE"
|
|
self.name = "ERRNAME"
|
|
self.section = processor.section
|
|
self.markups = processor.process_content( source.content )
|
|
|
|
# compute block type from first markup tag
|
|
try:
|
|
self.type = self.markups[0].tag
|
|
except Exception:
|
|
pass
|
|
|
|
# compute block name from first markup paragraph
|
|
try:
|
|
markup = self.markups[0]
|
|
para = markup.fields[0].items[0]
|
|
name = para.words[0]
|
|
m = re_identifier.match( name )
|
|
if m:
|
|
name = m.group( 1 )
|
|
self.name = name
|
|
except Exception:
|
|
pass
|
|
|
|
if self.type == "section":
|
|
# detect new section starts
|
|
processor.set_section( self.name )
|
|
processor.section.add_def( self )
|
|
elif self.type == "chapter":
|
|
# detect new chapter
|
|
processor.add_chapter( self )
|
|
else:
|
|
processor.section.add_block( self )
|
|
|
|
# now, compute the source lines relevant to this documentation
|
|
# block. We keep normal comments in for obvious reasons (??)
|
|
source = []
|
|
for b in follow:
|
|
if b.format:
|
|
break
|
|
for l in b.lines:
|
|
# collect header macro definitions
|
|
m = re_header_macro.match( l )
|
|
if m:
|
|
processor.headers[m.group( 2 )] = m.group( 1 )
|
|
|
|
# we use "/* */" as a separator
|
|
if sources.re_source_sep.match( l ):
|
|
break
|
|
source.append( l )
|
|
|
|
# now strip the leading and trailing empty lines from the sources
|
|
start = 0
|
|
end = len( source ) - 1
|
|
|
|
while start < end and not source[start].strip():
|
|
start = start + 1
|
|
|
|
while start < end and not source[end].strip():
|
|
end = end - 1
|
|
|
|
if start == end and not source[start].strip():
|
|
self.code = []
|
|
else:
|
|
self.code = source[start:end + 1]
|
|
|
|
def location( self ):
|
|
return self.source.location()
|
|
|
|
def get_markup( self, tag_name ):
|
|
"""Return the DocMarkup corresponding to a given tag in a block."""
|
|
for m in self.markups:
|
|
if m.tag == tag_name.lower():
|
|
return m
|
|
return None
|
|
|
|
def get_markup_words( self, tag_name ):
|
|
try:
|
|
m = self.get_markup( tag_name )
|
|
return m.fields[0].items[0].words
|
|
except Exception:
|
|
return []
|
|
|
|
def get_markup_words_all( self, tag_name ):
|
|
try:
|
|
m = self.get_markup( tag_name )
|
|
words = []
|
|
for item in m.fields[0].items:
|
|
# We honour empty lines in an `<Order>' section element by
|
|
# adding the sentinel `/empty/'. The formatter should then
|
|
# convert it to an appropriate representation in the
|
|
# `section_enter' function.
|
|
words += item.words
|
|
words.append( "/empty/" )
|
|
return words
|
|
except Exception:
|
|
return []
|
|
|
|
def get_markup_text( self, tag_name ):
|
|
result = self.get_markup_words( tag_name )
|
|
return " ".join( result )
|
|
|
|
def get_markup_items( self, tag_name ):
|
|
try:
|
|
m = self.get_markup( tag_name )
|
|
return m.fields[0].items
|
|
except Exception:
|
|
return None
|
|
|
|
# eof
|