#!/usr/bin/env python3 # :Author: David Goodger, Günter Milde # Based on the html4css1 writer by David Goodger. # :Maintainer: docutils-develop@lists.sourceforge.net # :Revision: $Revision: 10185 $ # :Date: $Date: 2005-06-28$ # :Copyright: © 2016 David Goodger, Günter Milde # :License: Released under the terms of the `2-Clause BSD license`_, in short: # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. # This file is offered as-is, without any warranty. # # .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause """Common definitions for Docutils HTML writers.""" from __future__ import annotations __docformat__ = 'reStructuredText' import base64 import mimetypes import os import os.path import re import warnings import xml.etree.ElementTree as ET from pathlib import Path import docutils from docutils import frontend, languages, nodes, utils, writers from docutils.parsers.rst.directives import length_or_percentage_or_unitless from docutils.parsers.rst.directives.images import PIL from docutils.transforms import writer_aux from docutils.utils.math import (latex2mathml, math2html, tex2mathml_extern, unichar2tex, wrap_math_code, MathError) TYPE_CHECKING = False if TYPE_CHECKING: from docutils.transforms import Transform class Writer(writers.Writer): supported = ('html', 'xhtml') # update in subclass """Formats this writer supports.""" settings_spec = ( 'HTML Writer Options', None, (('Specify the template file (UTF-8 encoded). ' '(default: writer dependent)', ['--template'], {'metavar': ''}), ('Comma separated list of stylesheet URLs. ' 'Overrides previous --stylesheet and --stylesheet-path settings.', ['--stylesheet'], {'metavar': '', 'overrides': 'stylesheet_path', 'validator': frontend.validate_comma_separated_list}), ('Comma separated list of stylesheet paths. ' 'Relative paths are expanded if a matching file is found in ' 'the --stylesheet-dirs. With --link-stylesheet, ' 'the path is rewritten relative to the output HTML file. ' '(default: writer dependent)', ['--stylesheet-path'], {'metavar': '', 'overrides': 'stylesheet', 'validator': frontend.validate_comma_separated_list}), ('Comma-separated list of directories where stylesheets are found. ' 'Used by --stylesheet-path when expanding relative path arguments. ' '(default: writer dependent)', ['--stylesheet-dirs'], {'metavar': '', 'validator': frontend.validate_comma_separated_list}), ('Embed the stylesheet(s) in the output HTML file. The stylesheet ' 'files must be accessible during processing. (default)', ['--embed-stylesheet'], {'default': True, 'action': 'store_true', 'validator': frontend.validate_boolean}), ('Link to the stylesheet(s) in the output HTML file. ', ['--link-stylesheet'], {'dest': 'embed_stylesheet', 'action': 'store_false'}), ('Specify the initial header level. ' 'Does not affect document title & subtitle (see --no-doc-title).' '(default: writer dependent).', ['--initial-header-level'], {'choices': '1 2 3 4 5 6'.split(), 'default': '2', 'metavar': ''}), ('Format for footnote references: one of "superscript" or ' '"brackets". (default: "brackets")', ['--footnote-references'], {'choices': ['superscript', 'brackets'], 'default': 'brackets', 'metavar': '', 'overrides': 'trim_footnote_reference_space'}), ('Format for block quote attributions: ' 'one of "dash" (em-dash prefix), "parentheses"/"parens", or "none". ' '(default: "dash")', ['--attribution'], {'choices': ['dash', 'parentheses', 'parens', 'none'], 'default': 'dash', 'metavar': ''}), ('Remove extra vertical whitespace between items of "simple" bullet ' 'lists and enumerated lists. (default)', ['--compact-lists'], {'default': True, 'action': 'store_true', 'validator': frontend.validate_boolean}), ('Disable compact simple bullet and enumerated lists.', ['--no-compact-lists'], {'dest': 'compact_lists', 'action': 'store_false'}), ('Remove extra vertical whitespace between items of simple field ' 'lists. (default)', ['--compact-field-lists'], {'default': True, 'action': 'store_true', 'validator': frontend.validate_boolean}), ('Disable compact simple field lists.', ['--no-compact-field-lists'], {'dest': 'compact_field_lists', 'action': 'store_false'}), ('Added to standard table classes. ' 'Defined styles: borderless, booktabs, ' 'align-left, align-center, align-right, ' 'colwidths-auto, colwidths-grid.', ['--table-style'], {'default': ''}), ('Math output format (one of "MathML", "HTML", "MathJax", ' 'or "LaTeX") and option(s). (default: "MathML")', ['--math-output'], {'default': 'MathML', 'validator': frontend.validate_math_output}), ('Prepend an XML declaration. ', ['--xml-declaration'], {'default': False, 'action': 'store_true', 'validator': frontend.validate_boolean}), ('Omit the XML declaration.', ['--no-xml-declaration'], {'dest': 'xml_declaration', 'action': 'store_false'}), ('Obfuscate email addresses to confuse harvesters while still ' 'keeping email links usable with standards-compliant browsers.', ['--cloak-email-addresses'], {'action': 'store_true', 'validator': frontend.validate_boolean}), ) ) settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace'} relative_path_settings = ('template',) config_section = 'html base writer' # overwrite in subclass config_section_dependencies = ('writers', 'html writers') visitor_attributes = ( 'head_prefix', 'head', 'stylesheet', 'body_prefix', 'body_pre_docinfo', 'docinfo', 'body', 'body_suffix', 'title', 'subtitle', 'header', 'footer', 'meta', 'fragment', 'html_prolog', 'html_head', 'html_title', 'html_subtitle', 'html_body') def get_transforms(self) -> list[type[Transform]]: return super().get_transforms() + [writer_aux.Admonitions] def translate(self) -> None: self.visitor = visitor = self.translator_class(self.document) self.document.walkabout(visitor) for attr in self.visitor_attributes: setattr(self, attr, getattr(visitor, attr)) self.output = self.apply_template() def apply_template(self) -> str: template_path = Path(self.document.settings.template) template = template_path.read_text(encoding='utf-8') return template % self.interpolation_dict() def interpolation_dict(self): subs = {} settings = self.document.settings for attr in self.visitor_attributes: subs[attr] = ''.join(getattr(self, attr)).rstrip('\n') subs['encoding'] = settings.output_encoding subs['version'] = docutils.__version__ return subs def assemble_parts(self) -> None: super().assemble_parts() for part in self.visitor_attributes: self.parts[part] = ''.join(getattr(self, part)) class HTMLTranslator(writers.DoctreeTranslator): """ Generic Docutils to HTML translator. See the `html4css1` and `html5_polyglot` writers for full featured HTML translators. .. IMPORTANT:: The `visit_*` and `depart_*` methods use a heterogeneous stack, `self.context`. When subclassing, make sure to be consistent in its use! Examples for robust coding: a) Override both `visit_*` and `depart_*` methods, don't call the parent functions. b) Extend both and unconditionally call the parent functions:: def visit_example(self, node): if foo: self.body.append('
') html4css1.HTMLTranslator.visit_example(self, node) def depart_example(self, node): html4css1.HTMLTranslator.depart_example(self, node) if foo: self.body.append('
') c) Extend both, calling the parent functions under the same conditions:: def visit_example(self, node): if foo: self.body.append('
\n') else: # call the parent method _html_base.HTMLTranslator.visit_example(self, node) def depart_example(self, node): if foo: self.body.append('
\n') else: # call the parent method _html_base.HTMLTranslator.depart_example(self, node) d) Extend one method (call the parent), but don't otherwise use the `self.context` stack:: def depart_example(self, node): _html_base.HTMLTranslator.depart_example(self, node) if foo: # implementation-specific code # that does not use `self.context` self.body.append('\n') This way, changes in stack use will not bite you. """ doctype = '\n' head_prefix_template = ('\n\n') content_type = '\n' generator = ( f'\n') # `starttag()` arguments for the main document (HTML5 uses
) documenttag_args = {'tagname': 'div', 'CLASS': 'document'} # Template for the MathJax script in the header: mathjax_script = '\n' mathjax_url = 'file:/usr/share/javascript/mathjax/MathJax.js' """ URL of the MathJax javascript library. The MathJax library ought to be installed on the same server as the rest of the deployed site files and specified in the `math-output` setting appended to "mathjax". See `Docutils Configuration`__. __ https://docutils.sourceforge.io/docs/user/config.html#math-output The fallback tries a local MathJax installation at ``/usr/share/javascript/mathjax/MathJax.js``. """ stylesheet_link = '\n' embedded_stylesheet = '\n' words_and_spaces = re.compile(r'[^ \n]+| +|\n') # wrap point inside word: in_word_wrap_point = re.compile(r'.+\W\W.+|[-?].+') lang_attribute = 'lang' # name changes to 'xml:lang' in XHTML 1.1 special_characters = {ord('&'): '&', ord('<'): '<', ord('"'): '"', ord('>'): '>', ord('@'): '@', # may thwart address harvesters } """Character references for characters with a special meaning in HTML.""" videotypes = ('video/mp4', 'video/webm', 'video/ogg') """MIME types supported by the HTML5 {suffix}') elif mimetype == 'application/x-shockwave-flash': atts['type'] = mimetype element = (self.starttag(node, 'object', '', data=uri, **atts) + f'{alt}{suffix}') elif element: # embedded SVG, see above element += suffix else: atts['alt'] = alt element = self.emptytag(node, 'img', suffix, src=uri, **atts) self.body.append(element) if suffix: # block-element self.report_messages(node) def depart_image(self, node) -> None: pass def visit_inline(self, node) -> None: self.body.append(self.starttag(node, 'span', '')) def depart_inline(self, node) -> None: self.body.append('') # footnote and citation labels: def visit_label(self, node) -> None: self.body.append('') self.body.append('[') # footnote/citation backrefs: if self.settings.footnote_backlinks: backrefs = node.parent.get('backrefs', []) if len(backrefs) == 1: self.body.append('' % backrefs[0]) def depart_label(self, node) -> None: backrefs = [] if self.settings.footnote_backlinks: backrefs = node.parent.get('backrefs', backrefs) if len(backrefs) == 1: self.body.append('') self.body.append(']\n') if len(backrefs) > 1: backlinks = ['%s' % (ref, i) for (i, ref) in enumerate(backrefs, 1)] self.body.append('(%s)\n' % ','.join(backlinks)) def visit_legend(self, node) -> None: self.body.append(self.starttag(node, 'div', CLASS='legend')) def depart_legend(self, node) -> None: self.body.append('\n') def visit_line(self, node) -> None: self.body.append(self.starttag(node, 'div', suffix='', CLASS='line')) if not len(node): self.body.append('
') def depart_line(self, node) -> None: self.body.append('\n') def visit_line_block(self, node) -> None: self.body.append(self.starttag(node, 'div', CLASS='line-block')) def depart_line_block(self, node) -> None: self.body.append('\n') def visit_list_item(self, node) -> None: self.body.append(self.starttag(node, 'li', '')) def depart_list_item(self, node) -> None: self.body.append('\n') # inline literal def visit_literal(self, node): # special case: "code" role classes = node['classes'] if 'code' in classes: # filter 'code' from class arguments classes.pop(classes.index('code')) self.body.append(self.starttag(node, 'code', '')) return self.body.append( self.starttag(node, 'span', '', CLASS='docutils literal')) text = node.astext() if not isinstance(node.parent, nodes.literal_block): text = text.replace('\n', ' ') # Protect text like ``--an-option`` and the regular expression # ``[+]?(\d+(\.\d*)?|\.\d+)`` from bad line wrapping for token in self.words_and_spaces.findall(text): if token.strip() and self.in_word_wrap_point.search(token): self.body.append('%s' % self.encode(token)) else: self.body.append(self.encode(token)) self.body.append('') raise nodes.SkipNode # content already processed def depart_literal(self, node) -> None: # skipped unless literal element is from "code" role: self.body.append('') def visit_literal_block(self, node) -> None: self.body.append(self.starttag(node, 'pre', '', CLASS='literal-block')) if 'code' in node['classes']: self.body.append('') def depart_literal_block(self, node) -> None: if 'code' in node['classes']: self.body.append('') self.body.append('\n') # Mathematics: # As there is no native HTML math support, we provide alternatives # for the math-output: LaTeX and MathJax simply wrap the content, # HTML and MathML also convert the math_code. # HTML element: math_tags = { # format: (inline, block, [class arguments]) 'html': ('span', 'div', ['formula']), 'latex': ('tt', 'pre', ['math']), 'mathjax': ('span', 'div', ['math']), 'mathml': ('', 'div', []), 'problematic': ('span', 'pre', ['math', 'problematic']), } def visit_math(self, node): # Also called from `visit_math_block()`: is_block = isinstance(node, nodes.math_block) format = self.math_output math_code = node.astext().translate(unichar2tex.uni2tex_table) # preamble code and conversion if format == 'html': if self.math_options and not self.math_header: self.math_header = [ self.stylesheet_call(utils.find_file_in_dirs( s, self.settings.stylesheet_dirs), adjust_path=True) for s in self.math_options.split(',')] math2html.DocumentParameters.displaymode = is_block # TODO: fix display mode in matrices and fractions math_code = wrap_math_code(math_code, is_block) math_code = math2html.math2html(math_code) elif format == 'latex': math_code = self.encode(math_code) elif format == 'mathjax': if not self.math_header: if self.math_options: self.mathjax_url = self.math_options else: self.document.reporter.warning( 'No MathJax URL specified, using local fallback ' '(see config.html).', base_node=node) # append MathJax configuration # (input LaTeX with AMS, output common HTML): if '?' not in self.mathjax_url: self.mathjax_url += '?config=TeX-AMS_CHTML' self.math_header = [self.mathjax_script % self.mathjax_url] if is_block: math_code = wrap_math_code(math_code, is_block) else: math_code = rf'\({math_code}\)' math_code = self.encode(math_code) elif format == 'mathml': if 'XHTML 1' in self.doctype: self.content_type = self.content_type_mathml if self.math_options: converter = getattr(tex2mathml_extern, self.math_options) else: converter = latex2mathml.tex2mathml try: math_code = converter(math_code, as_block=is_block) except (MathError, OSError) as err: details = getattr(err, 'details', []) self.messages.append(self.document.reporter.warning( err, *details, base_node=node)) math_code = self.encode(node.astext()) if self.settings.report_level <= 2: format = 'problematic' else: format = 'latex' if isinstance(err, OSError): # report missing converter only once self.math_output = format # append to document body tag = self.math_tags[format][is_block] suffix = '\n' if is_block else '' if tag: self.body.append(self.starttag(node, tag, suffix=suffix, classes=self.math_tags[format][2])) self.body.extend([math_code, suffix]) if tag: self.body.append(f'{suffix}') # Content already processed: raise nodes.SkipChildren def depart_math(self, node) -> None: pass def visit_math_block(self, node) -> None: self.visit_math(node) def depart_math_block(self, node) -> None: self.report_messages(node) # Meta tags: 'lang' attribute replaced by 'xml:lang' in XHTML 1.1 # HTML5/polyglot recommends using both def visit_meta(self, node) -> None: self.meta.append(self.emptytag(node, 'meta', **node.non_default_attributes())) def depart_meta(self, node) -> None: pass def visit_option(self, node) -> None: self.body.append(self.starttag(node, 'span', '', CLASS='option')) def depart_option(self, node) -> None: self.body.append('') if isinstance(node.next_node(descend=False, siblings=True), nodes.option): self.body.append(', ') def visit_option_argument(self, node) -> None: self.body.append(node.get('delimiter', ' ')) self.body.append(self.starttag(node, 'var', '')) def depart_option_argument(self, node) -> None: self.body.append('') def visit_option_group(self, node) -> None: self.body.append(self.starttag(node, 'dt', '')) self.body.append('') def depart_option_group(self, node) -> None: self.body.append('\n') def visit_option_list(self, node) -> None: self.body.append( self.starttag(node, 'dl', CLASS='option-list')) def depart_option_list(self, node) -> None: self.body.append('\n') def visit_option_list_item(self, node) -> None: pass def depart_option_list_item(self, node) -> None: pass def visit_option_string(self, node) -> None: pass def depart_option_string(self, node) -> None: pass def visit_organization(self, node) -> None: self.visit_docinfo_item(node, 'organization') def depart_organization(self, node) -> None: self.depart_docinfo_item() # Do not omit

tags # -------------------- # # The HTML4CSS1 writer does this to "produce # visually compact lists (less vertical whitespace)". This writer # relies on CSS rules for visual compactness. # # * In XHTML 1.1, e.g., a

element may not contain # character data, so you cannot drop the

tags. # * Keeping simple paragraphs in the field_body enables a CSS # rule to start the field-body on a new line if the label is too long # * it makes the code simpler. # # TODO: omit paragraph tags in simple table cells? def visit_paragraph(self, node) -> None: self.body.append(self.starttag(node, 'p', '')) def depart_paragraph(self, node) -> None: self.body.append('

') if not (isinstance(node.parent, (nodes.list_item, nodes.entry)) and (len(node.parent) == 1)): self.body.append('\n') self.report_messages(node) def visit_problematic(self, node) -> None: if node.hasattr('refid'): self.body.append('' % node['refid']) self.context.append('') else: self.context.append('') self.body.append(self.starttag(node, 'span', '', CLASS='problematic')) def depart_problematic(self, node) -> None: self.body.append('') self.body.append(self.context.pop()) def visit_raw(self, node): if 'html' in node.get('format', '').split(): if isinstance(node.parent, nodes.TextElement): tagname = 'span' else: tagname = 'div' if node['classes']: self.body.append(self.starttag(node, tagname, suffix='')) self.body.append(node.astext()) if node['classes']: self.body.append('' % tagname) # Keep non-HTML raw text out of output: raise nodes.SkipNode def visit_reference(self, node) -> None: atts = {'classes': ['reference']} suffix = '' if 'refuri' in node: atts['href'] = node['refuri'] if (self.settings.cloak_email_addresses and atts['href'].startswith('mailto:')): atts['href'] = self.cloak_mailto(atts['href']) self.in_mailto = True atts['classes'].append('external') else: assert 'refid' in node, \ 'References must have "refuri" or "refid" attribute.' atts['href'] = '#' + node['refid'] atts['classes'].append('internal') if len(node) == 1 and isinstance(node[0], nodes.image): atts['classes'].append('image-reference') if not isinstance(node.parent, nodes.TextElement): suffix = '\n' self.body.append(self.starttag(node, 'a', suffix, **atts)) def depart_reference(self, node) -> None: self.body.append('') if not isinstance(node.parent, nodes.TextElement): self.body.append('\n') self.in_mailto = False def visit_revision(self, node) -> None: self.visit_docinfo_item(node, 'revision', meta=False) def depart_revision(self, node) -> None: self.depart_docinfo_item() def visit_row(self, node) -> None: self.body.append(self.starttag(node, 'tr', '')) node.column = 0 def depart_row(self, node) -> None: self.body.append('\n') def visit_rubric(self, node) -> None: self.body.append(self.starttag(node, 'p', '', CLASS='rubric')) def depart_rubric(self, node) -> None: self.body.append('

\n') def visit_section(self, node) -> None: self.section_level += 1 self.body.append( self.starttag(node, 'div', CLASS='section')) def depart_section(self, node) -> None: self.section_level -= 1 self.body.append('\n') # TODO: use the new HTML5 element \n') def visit_table(self, node) -> None: atts = {'classes': self.settings.table_style.replace(',', ' ').split()} if 'align' in node: atts['classes'].append('align-%s' % node['align']) if 'width' in node: width = node['width'] if width[-1:] in '0123456789.': # unitless value width += 'px' # add default length unit atts['style'] = f'width: {width};' tag = self.starttag(node, 'table', **atts) self.body.append(tag) def depart_table(self, node) -> None: self.body.append('\n') self.report_messages(node) def visit_target(self, node) -> None: if ('refuri' not in node and 'refid' not in node and 'refname' not in node): self.body.append(self.starttag(node, 'span', '', CLASS='target')) self.context.append('') else: self.context.append('') def depart_target(self, node) -> None: self.body.append(self.context.pop()) # no hard-coded vertical alignment in table body def visit_tbody(self, node) -> None: self.body.append(self.starttag(node, 'tbody')) def depart_tbody(self, node) -> None: self.body.append('\n') def visit_term(self, node) -> None: if 'details' in node.parent.parent['classes']: self.body.append(self.starttag(node, 'summary', suffix='')) else: # The parent node (definition_list_item) is omitted in HTML. self.body.append(self.starttag(node, 'dt', suffix='', classes=node.parent['classes'], ids=node.parent['ids'])) def depart_term(self, node) -> None: # Nest (optional) classifier(s) in the
element if node.next_node(nodes.classifier, descend=False, siblings=True): return # skip (depart_classifier() calls this function again) if 'details' in node.parent.parent['classes']: self.body.append('\n') else: self.body.append('
\n') def visit_tgroup(self, node) -> None: self.colspecs = [] node.stubs = [] def depart_tgroup(self, node) -> None: pass def visit_thead(self, node) -> None: self.body.append(self.starttag(node, 'thead')) def depart_thead(self, node) -> None: self.body.append('\n') def section_title_tags(self, node): atts = {} h_level = self.section_level + self.initial_header_level - 1 # Only 6 heading levels have dedicated HTML tags. tagname = 'h%i' % min(h_level, 6) if h_level > 6: atts['aria-level'] = h_level start_tag = self.starttag(node, tagname, '', **atts) if node.hasattr('refid'): atts = {} atts['class'] = 'toc-backref' atts['role'] = 'doc-backlink' # HTML5 only atts['href'] = '#' + node['refid'] start_tag += self.starttag(nodes.reference(), 'a', '', **atts) close_tag = '\n' % tagname else: close_tag = '\n' % tagname return start_tag, close_tag def visit_title(self, node) -> None: close_tag = '

\n' if isinstance(node.parent, nodes.topic): # TODO: use role="heading" or

? (HTML5 only) self.body.append( self.starttag(node, 'p', '', CLASS='topic-title')) if (self.settings.toc_backlinks and 'contents' in node.parent['classes']): self.body.append('') close_tag = '

\n' elif isinstance(node.parent, nodes.sidebar): # TODO: use role="heading" or

? (HTML5 only) self.body.append( self.starttag(node, 'p', '', CLASS='sidebar-title')) elif isinstance(node.parent, nodes.Admonition): self.body.append( self.starttag(node, 'p', '', CLASS='admonition-title')) elif isinstance(node.parent, nodes.table): self.body.append(self.starttag(node, 'caption', '')) close_tag = '\n' elif isinstance(node.parent, nodes.document): self.body.append(self.starttag(node, 'h1', '', CLASS='title')) close_tag = '

\n' self.in_document_title = len(self.body) else: assert isinstance(node.parent, nodes.section) # Get correct heading and evt. backlink tags start_tag, close_tag = self.section_title_tags(node) self.body.append(start_tag) self.context.append(close_tag) def depart_title(self, node) -> None: self.body.append(self.context.pop()) if self.in_document_title: self.title = self.body[self.in_document_title:-1] self.in_document_title = 0 self.body_pre_docinfo.extend(self.body) self.html_title.extend(self.body) del self.body[:] def visit_title_reference(self, node) -> None: self.body.append(self.starttag(node, 'cite', '')) def depart_title_reference(self, node) -> None: self.body.append('') def visit_topic(self, node) -> None: self.body.append(self.starttag(node, 'div', CLASS='topic')) def depart_topic(self, node) -> None: self.body.append('\n') def visit_transition(self, node) -> None: self.body.append(self.emptytag(node, 'hr', CLASS='docutils')) def depart_transition(self, node) -> None: pass def visit_version(self, node) -> None: self.visit_docinfo_item(node, 'version', meta=False) def depart_version(self, node) -> None: self.depart_docinfo_item() def unimplemented_visit(self, node): raise NotImplementedError('visiting unimplemented node type: %s' % node.__class__.__name__) class SimpleListChecker(nodes.GenericNodeVisitor): """ Raise `nodes.NodeFound` if non-simple list item is encountered. Here "simple" means a list item containing nothing other than a single paragraph, a simple list, or a paragraph followed by a simple list. This version also checks for simple field lists and docinfo. """ def default_visit(self, node): raise nodes.NodeFound def default_departure(self, node): pass def visit_list_item(self, node): children = [child for child in node.children if not isinstance(child, nodes.Invisible)] if (children and isinstance(children[0], nodes.paragraph) and (isinstance(children[-1], nodes.bullet_list) or isinstance(children[-1], nodes.enumerated_list) or isinstance(children[-1], nodes.field_list))): children.pop() if len(children) <= 1: return else: raise nodes.NodeFound def pass_node(self, node) -> None: pass def ignore_node(self, node): # ignore nodes that are never complex (can contain only inline nodes) raise nodes.SkipNode # Paragraphs and text visit_Text = ignore_node visit_paragraph = ignore_node # Lists visit_bullet_list = pass_node visit_enumerated_list = pass_node visit_docinfo = pass_node # Docinfo nodes: visit_author = ignore_node visit_authors = visit_list_item visit_address = visit_list_item visit_contact = pass_node visit_copyright = ignore_node visit_date = ignore_node visit_organization = ignore_node visit_status = ignore_node visit_version = visit_list_item # Definition list: visit_definition_list = pass_node visit_definition_list_item = pass_node visit_term = ignore_node visit_classifier = pass_node visit_definition = visit_list_item # Field list: visit_field_list = pass_node visit_field = pass_node # the field body corresponds to a list item visit_field_body = visit_list_item visit_field_name = ignore_node # Invisible nodes should be ignored. visit_comment = ignore_node visit_substitution_definition = ignore_node visit_target = ignore_node visit_pending = ignore_node