# $Id: docutils_xml.py 9810 2024-08-01 07:22:07Z aa-turner $ # Author: David Goodger, Paul Tremblay, Guenter Milde # Maintainer: docutils-develop@lists.sourceforge.net # Copyright: This module has been placed in the public domain. """ Simple document tree Writer, writes Docutils XML according to https://docutils.sourceforge.io/docs/ref/docutils.dtd. """ __docformat__ = 'reStructuredText' from io import StringIO import xml.sax.saxutils import docutils from docutils import frontend, nodes, writers, utils class RawXmlError(docutils.ApplicationError): pass class Writer(writers.Writer): supported = ('xml',) """Formats this writer supports.""" settings_spec = ( '"Docutils XML" Writer Options', None, (('Generate XML with newlines before and after tags.', ['--newlines'], {'action': 'store_true', 'validator': frontend.validate_boolean}), ('Generate XML with indents and newlines.', ['--indents'], # TODO use integer value for number of spaces? {'action': 'store_true', 'validator': frontend.validate_boolean}), ('Omit the XML declaration. Use with caution.', ['--no-xml-declaration'], {'dest': 'xml_declaration', 'default': 1, 'action': 'store_false', 'validator': frontend.validate_boolean}), ('Omit the DOCTYPE declaration.', ['--no-doctype'], {'dest': 'doctype_declaration', 'default': 1, 'action': 'store_false', 'validator': frontend.validate_boolean}),)) settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace'} config_section = 'docutils_xml writer' config_section_dependencies = ('writers',) output = None """Final translated form of `document`.""" def __init__(self) -> None: writers.Writer.__init__(self) self.translator_class = XMLTranslator def translate(self) -> None: self.visitor = visitor = self.translator_class(self.document) self.document.walkabout(visitor) self.output = ''.join(visitor.output) class XMLTranslator(nodes.GenericNodeVisitor): # TODO: add stylesheet options similar to HTML and LaTeX writers? # xml_stylesheet = '\n' doctype = ( '\n') generator = '\n' xmlparser = xml.sax.make_parser() """SAX parser instance to check/extract raw XML.""" xmlparser.setFeature( "http://xml.org/sax/features/external-general-entities", True) def __init__(self, document) -> None: nodes.NodeVisitor.__init__(self, document) # Reporter self.warn = self.document.reporter.warning self.error = self.document.reporter.error # Settings self.settings = settings = document.settings self.indent = self.newline = '' if settings.newlines: self.newline = '\n' if settings.indents: self.newline = '\n' self.indent = ' ' # TODO make this configurable? self.level = 0 # indentation level self.in_simple = 0 # level of nesting inside mixed-content elements self.fixed_text = 0 # level of nesting inside FixedText elements # Output self.output = [] if settings.xml_declaration: self.output.append(utils.xml_declaration(settings.output_encoding)) if settings.doctype_declaration: self.output.append(self.doctype) self.output.append(self.generator % docutils.__version__) # initialize XML parser self.the_handle = TestXml() self.xmlparser.setContentHandler(self.the_handle) # generic visit and depart methods # -------------------------------- simple_nodes = (nodes.TextElement, nodes.meta, nodes.image, nodes.colspec, nodes.transition) def default_visit(self, node) -> None: """Default node visit method.""" if not self.in_simple: self.output.append(self.indent*self.level) self.output.append(node.starttag(xml.sax.saxutils.quoteattr)) if not isinstance(node, nodes.Inline): self.level += 1 # `nodes.literal` is not an instance of FixedTextElement by design, # see docs/ref/rst/restructuredtext.html#inline-literals if isinstance(node, (nodes.FixedTextElement, nodes.literal)): self.fixed_text += 1 if isinstance(node, self.simple_nodes): self.in_simple += 1 if not self.in_simple: self.output.append(self.newline) def default_departure(self, node) -> None: """Default node depart method.""" if not isinstance(node, nodes.Inline): self.level -= 1 if not self.in_simple: self.output.append(self.indent*self.level) self.output.append(node.endtag()) if isinstance(node, (nodes.FixedTextElement, nodes.literal)): self.fixed_text -= 1 if isinstance(node, self.simple_nodes): self.in_simple -= 1 if not self.in_simple: self.output.append(self.newline) # specific visit and depart methods # --------------------------------- def visit_Text(self, node) -> None: text = xml.sax.saxutils.escape(node.astext()) # indent text if we are not in a FixedText element: if not self.fixed_text: text = text.replace('\n', '\n'+self.indent*self.level) self.output.append(text) def depart_Text(self, node) -> None: pass def visit_raw(self, node): if 'xml' not in node.get('format', '').split(): # skip other raw content? # raise nodes.SkipNode self.default_visit(node) return # wrap in element self.default_visit(node) # or not? xml_string = node.astext() self.output.append(xml_string) self.default_departure(node) # or not? # Check validity of raw XML: try: self.xmlparser.parse(StringIO(xml_string)) except xml.sax._exceptions.SAXParseException: col_num = self.the_handle.locator.getColumnNumber() line_num = self.the_handle.locator.getLineNumber() srcline = node.line if not isinstance(node.parent, nodes.TextElement): srcline += 2 # directive content start line msg = 'Invalid raw XML in column %d, line offset %d:\n%s' % ( col_num, line_num, node.astext()) self.warn(msg, source=node.source, line=srcline+line_num-1) raise nodes.SkipNode # content already processed class TestXml(xml.sax.handler.ContentHandler): def setDocumentLocator(self, locator) -> None: self.locator = locator