vpp/docs/_scripts/siphon/process.py

424 lines
13 KiB
Python
Raw Normal View History

# Copyright (c) 2016 Comcast Cable Communications Management, LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Generation template class
import html.parser
import json
import logging
import os
import sys
import re
import jinja2
# Classes register themselves in this dictionary
"""Mapping of known processors to their classes"""
siphons = {}
"""Mapping of known output formats to their classes"""
formats = {}
class Siphon(object):
"""Generate rendered output for siphoned data."""
# Set by subclasses
"""Our siphon name"""
name = None
# Set by subclasses
"""Name of an identifier used by this siphon"""
identifier = None
# Set by subclasses
"""The pyparsing object to use to parse with"""
_parser = None
"""The input data"""
_cmds = None
"""Group key to (directory,file) mapping"""
_group = None
"""Logging handler"""
log = None
"""Directory to look for siphon rendering templates"""
template_directory = None
"""Directory to output parts in"""
outdir = None
"""Template environment, if we're using templates"""
_tplenv = None
def __init__(self, template_directory, format, outdir, repository_link):
super(Siphon, self).__init__()
self.log = logging.getLogger("siphon.process.%s" % self.name)
# Get our output format details
fmt_klass = formats[format]
fmt = fmt_klass()
self._format = fmt
# Sort out the template search path
def _tpldir(name):
return os.sep.join((template_directory, fmt.name, name))
self.template_directory = template_directory
searchpath = [
_tpldir(self.name),
_tpldir("default"),
]
self.outdir = outdir
loader = jinja2.FileSystemLoader(searchpath=searchpath)
self._tplenv = jinja2.Environment(
loader=loader,
trim_blocks=True,
autoescape=False,
keep_trailing_newline=True,
)
# Convenience, get a reference to the internal escape and
# unescape methods in html.parser. These then become
# available to templates to use, if needed.
self._h = html.parser.HTMLParser()
self.escape = html.escape
self.unescape = html.unescape
# TODO: customize release
self.repository_link = repository_link
# Output renderers
"""Returns an object to be used as the sorting key in the item index."""
def index_sort_key(self, group):
return group
"""Returns a string to use as the header at the top of the item index."""
def index_header(self):
return self.template("index_header")
"""Returns the string fragment to use for each section in the item
index."""
def index_section(self, group):
return self.template("index_section", group=group)
"""Returns the string fragment to use for each entry in the item index."""
def index_entry(self, meta, item):
return self.template("index_entry", meta=meta, item=item)
"""Returns an object, typically a string, to be used as the sorting key
for items within a section."""
def item_sort_key(self, item):
return item["name"]
"""Returns a key for grouping items together."""
def group_key(self, directory, file, macro, name):
_global = self._cmds["_global"]
if file in _global and "group_label" in _global[file]:
self._group[file] = (directory, file)
return file
self._group[directory] = (directory, None)
return directory
"""Returns a key for identifying items within a grouping."""
def item_key(self, directory, file, macro, name):
return name
"""Returns a string to use as the header when rendering the item."""
def item_header(self, group):
return self.template("item_header", group=group)
"""Returns a string to use as the body when rendering the item."""
def item_format(self, meta, item):
return self.template("item_format", meta=meta, item=item)
"""Returns a string to use as the label for the page reference."""
def page_label(self, group):
return "_".join((self.name, self.sanitize_label(group)))
"""Returns a title to use for a page."""
def page_title(self, group):
_global = self._cmds["_global"]
(directory, file) = self._group[group]
if file and file in _global and "group_label" in _global[file]:
return _global[file]["group_label"]
if directory in _global and "group_label" in _global[directory]:
return _global[directory]["group_label"]
return directory
"""Returns a string to use as the label for the section reference."""
def item_label(self, group, item):
return "__".join((self.name, item))
"""Label sanitizer; for creating Doxygen references"""
def sanitize_label(self, value):
return value.replace(" ", "_").replace("/", "_").replace(".", "_")
"""Template processor"""
def template(self, name, **kwargs):
tpl = self._tplenv.get_template(name + self._format.extension)
return tpl.render(this=self, **kwargs)
# Processing methods
"""Parse the input file into a more usable dictionary structure."""
def load_json(self, files):
self._cmds = {}
self._group = {}
line_num = 0
line_start = 0
for filename in files:
filename = os.path.relpath(filename)
self.log.info('Parsing items in file "%s".' % filename)
data = None
with open(filename, "r") as fd:
data = json.load(fd)
self._cmds["_global"] = data["global"]
# iterate the items loaded and regroup it
for item in data["items"]:
try:
o = self._parser.parse(item["block"])
except Exception:
self.log.error(
"Exception parsing item: %s\n%s"
% (
json.dumps(item, separators=(",", ": "), indent=4),
item["block"],
)
)
raise
# Augment the item with metadata
o["meta"] = {}
for key in item:
if key == "block":
continue
o["meta"][key] = item[key]
# Load some interesting fields
directory = item["directory"]
file = item["file"]
macro = o["macro"]
name = o["name"]
# Generate keys to group items by
group_key = self.group_key(directory, file, macro, name)
item_key = self.item_key(directory, file, macro, name)
if group_key not in self._cmds:
self._cmds[group_key] = {}
self._cmds[group_key][item_key] = o
"""Iterate over the input data, calling render methods to generate the
output."""
def process(self, out=None):
if out is None:
out = sys.stdout
# Accumulated body contents
contents = ""
# Write the header for this siphon type
out.write(self.index_header())
# Sort key helper for the index
def group_sort_key(group):
return self.index_sort_key(group)
# Iterate the dictionary and process it
for group in sorted(self._cmds.keys(), key=group_sort_key):
if group.startswith("_"):
continue
self.log.info(
'Processing items in group "%s" (%s).' % (group, group_sort_key(group))
)
# Generate the section index entry (write it now)
out.write(self.index_section(group))
# Generate the item header (save for later)
contents += self.item_header(group)
def item_sort_key(key):
return self.item_sort_key(self._cmds[group][key])
for key in sorted(self._cmds[group].keys(), key=item_sort_key):
self.log.debug(
'--- Processing key "%s" (%s).' % (key, item_sort_key(key))
)
o = self._cmds[group][key]
meta = {
"directory": o["meta"]["directory"],
"file": o["meta"]["file"],
"macro": o["macro"],
"name": o["name"],
"key": key,
"label": self.item_label(group, key),
}
# Generate the index entry for the item (write it now)
out.write(self.index_entry(meta, o))
# Generate the item itself (save for later)
contents += self.item_format(meta, o)
page_name = self.separate_page_names(group)
if page_name != "":
path = os.path.join(self.outdir, page_name)
with open(path, "w+") as page:
page.write(contents)
contents = ""
# Deliver the accumulated body output
out.write(contents)
def do_cliexstart(self, matchobj):
title = matchobj.group(1)
title = " ".join(title.splitlines())
content = matchobj.group(2)
content = re.sub(r"\n", r"\n ", content)
return "\n\n.. code-block:: console\n\n %s\n %s\n\n" % (title, content)
def do_clistart(self, matchobj):
content = matchobj.group(1)
content = re.sub(r"\n", r"\n ", content)
return "\n\n.. code-block:: console\n\n %s\n\n" % content
def do_cliexcmd(self, matchobj):
content = matchobj.group(1)
content = " ".join(content.splitlines())
return "\n\n.. code-block:: console\n\n %s\n\n" % content
def process_list(self, matchobj):
content = matchobj.group(1)
content = self.reindent(content, 2)
return "@@@@%s\nBBBB" % content
def process_special(self, s):
# ----------- markers to remove
s = re.sub(r"@cliexpar\s*", r"", s)
s = re.sub(r"@parblock\s*", r"", s)
s = re.sub(r"@endparblock\s*", r"", s)
s = re.sub(r"<br>", "", s)
# ----------- emphasis
# <b><em>
s = re.sub(r"<b><em>\s*", "``", s)
s = re.sub(r"\s*</b></em>", "``", s)
s = re.sub(r"\s*</em></b>", "``", s)
# <b>
s = re.sub(r"<b>\s*", "**", s)
s = re.sub(r"\s*</b>", "**", s)
# <code>
s = re.sub(r"<code>\s*", "``", s)
s = re.sub(r"\s*</code>", "``", s)
# <em>
s = re.sub(r"'?<em>\s*", r"``", s)
s = re.sub(r"\s*</em>'?", r"``", s)
# @c <something>
s = re.sub(r"@c\s(\S+)", r"``\1``", s)
# ----------- todos
s = re.sub(r"@todo[^\n]*", "", s)
s = re.sub(r"@TODO[^\n]*", "", s)
# ----------- code blocks
s = re.sub(r"@cliexcmd{(.+?)}", self.do_cliexcmd, s, flags=re.DOTALL)
s = re.sub(
r"@cliexstart{(.+?)}(.+?)@cliexend", self.do_cliexstart, s, flags=re.DOTALL
)
s = re.sub(r"@clistart(.+?)@cliend", self.do_clistart, s, flags=re.DOTALL)
# ----------- lists
s = re.sub(r"^\s*-", r"\n@@@@", s, flags=re.MULTILINE)
s = re.sub(r"@@@@(.*?)\n\n+", self.process_list, s, flags=re.DOTALL)
s = re.sub(r"BBBB@@@@", r"-", s)
s = re.sub(r"@@@@", r"-", s)
s = re.sub(r"BBBB", r"\n\n", s)
# ----------- Cleanup remains
s = re.sub(r"@cliexend\s*", r"", s)
return s
def separate_page_names(self, group):
return ""
# This push the given textblock <indent> spaces right
def reindent(self, s, indent):
ind = " " * indent
s = re.sub(r"\n", "\n" + ind, s)
return s
# This aligns the given textblock left (no indent)
def noindent(self, s):
s = re.sub(r"\n[ \f\v\t]*", "\n", s)
return s
class Format(object):
"""Output format class"""
"""Name of this output format"""
name = None
"""Expected file extension of templates that build this format"""
extension = None
class FormatMarkdown(Format):
"""Markdown output format"""
name = "markdown"
extension = ".md"
# Register 'markdown'
formats["markdown"] = FormatMarkdown
class FormatItemlist(Format):
"""Itemlist output format"""
name = "itemlist"
extension = ".itemlist"
# Register 'itemlist'
formats["itemlist"] = FormatItemlist