#!/usr/bin/env python3 # :Copyright: © 2024 Günter Milde. # :License: Released under the terms of the `2-Clause BSD license`_, in short: # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. # This file is offered as-is, without any warranty. # # .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause """Tests for parsers/docutils_xml.py.""" from pathlib import Path import sys import unittest import xml.etree.ElementTree as ET if __name__ == '__main__': # prepend the "docutils root" to the Python library path # so we import the local `docutils` package. sys.path.insert(0, str(Path(__file__).resolve().parents[3])) from docutils import frontend, utils from docutils.parsers import docutils_xml class ParseElementTestCase(unittest.TestCase): """Test the `docutils.xml.parse_element()` function.""" maxDiff = None # supress warnings when passing `document` to `parse_element()` settings = frontend.get_default_settings(docutils_xml.Parser) settings.warning_stream = '' # comment out to see warnings document = utils.new_document('xml input', settings) def test_element_with_child_with_text(self): xml = 'some text' node = docutils_xml.parse_element(xml) self.assertEqual(xml, str(node)) def test_tailing_text_after_root(self): """etree.ElementTree does not accept tailing text in the input. """ xml = 'texttailing text' with self.assertRaisesRegex(ET.ParseError, 'junk after document '): docutils_xml.parse_element(xml) # If a document is provided, report via a "loose" error system message # comment out ``settings.warning_stream = ''`` above to see it). node = docutils_xml.parse_element(xml, self.document) self.assertEqual('text', str(node)) def test_nothing_but_junk_text(self): xml = 'just text' node = docutils_xml.parse_element(xml, self.document) self.assertEqual(node.astext(), 'No XML element found.') def test_nonexistent_element_type(self): xml = '

some text

' node = docutils_xml.parse_element(xml, self.document) self.assertEqual(xml, str(node)) # see test_misc.py for the warning def test_junk_text(self): # insert text also in nodes that are not TextElement instances xml = 'some text' node = docutils_xml.parse_element(xml) self.assertEqual(xml, str(node)) with self.assertRaisesRegex(ValueError, 'Expecting child of type ,' ' not text data "some text"'): node.validate() def test_tailing_junk_text(self): # insert text also in nodes that are not TextElement instances xml = 'some texttailing text' node = docutils_xml.parse_element(xml) self.assertEqual(xml, str(node)) with self.assertRaisesRegex( ValueError, 'Spurious text: "tailing text"'): node.validate() def test_element_with_attributes(self): xml = ('a barking dog') node = docutils_xml.parse_element(xml) # attribute values are normalized: self.assertEqual(xml.replace('4.50 cm', '4.5cm'), str(node)) def test_element_with_invalid_attributes(self): """Silently accept invalid attribute names and values. Validation reports problems. """ xml = ('') node = docutils_xml.parse_element(xml) self.assertEqual(xml, str(node)) with self.assertRaisesRegex( ValueError, 'Element invalid:\n' ' Attribute "breadth" not one of "ids", .*, "loading".\n' ' Attribute "height" has invalid value "three inch".\n' ' "three inch" is no valid measure.'): node.validate() class XmlAttributesTestCase(unittest.TestCase): """ Test correct parsing of the `supported element attributes`_. See also `AttributeTypeTests` in ../../test_nodes.py. __ https://docutils.sourceforge.io/ docs/ref/doctree.html#attribute-reference """ common_attributes = {'classes': [], 'dupnames': [], 'ids': [], 'names': []} def test_alt(self): # CDATA (str) xml = ('a barking dog') expected = {'alt': 'a barking dog', 'align': 'left', 'height': '3ex', 'loading': 'embed', 'scale': 3, 'uri': 'dog.jpg', 'width': '4cm'} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) # 'align': CDATA (str) → test_alt def test_anonymous(self): # yesorno (int) xml = '' expected = {'anonymous': 1, 'ids': ['target-1'], 'refuri': 'example.html'} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) def test_auto(self): # CDATA (str) # also encodes footnote label type: '1': numbered, '*': symbols xml = '' expected = {'auto': '*', 'backrefs': ['footnote-reference-2']} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) # 'backrefs': idrefs.type (list[str]) → test_auto def test_bullet(self): # CDATA (str) xml = '' expected = {'bullet': '*', 'classes': ['first', 'x-2nd']} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) # 'classes': classnames.type (list[str]) → test_bullet def test_colwidth(self): # CDATA (int) # Provisional. Currently, Docutils handles "colwidth" differently # from the Exchange Table Model. This will eventually change # (see https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth). xml = '' expected = {'colwidth': 33, 'stub': 1} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) # Note: the upstream default unit is "pt", not "*". xml = '' expected = {'colwidth': 33, 'stub': 1} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) def test_delimiter(self): # CDATA (str) xml = 'FILE' expected = {'delimiter': '='} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) def test_dupnames(self): # refnames.type (list[str]). xml = r'
' expected = {'dupnames': ['title 1'], 'ids': ['title-1']} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) def test_enumtype(self): # EnumeratedType (str) xml = ('') expected = {'enumtype': 'upperroman', 'prefix': '(', 'start': 2, 'suffix': ')'} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) def test_format(self): # NMTOKENS (str) (space-delimited list of keywords) xml = '' expected = {'format': 'html latex', 'xml:space': 'preserve'} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) # 'height': measure (str) → test_alt # 'ids': ids.type (list[str]) → test_names def test_level(self): # level (int) xml = ('') expected = {'backrefs': [], 'level': 3, 'line': 21, 'source': 'string', 'type': 'ERROR'} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) def test_ltrim(self): # yesorno (int) xml = '' expected = {'ltrim': 1, 'names': ['nbsp']} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) # 'loading': EnumeratedType (str) → test_alt def test_morecols(self): # number (int) xml = '' expected = {'morecols': 1} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) def test_names(self): # refnames.type (list[str]) # internal whitespace in XML escaped xml = r'
' expected = {'ids': ['title-2', 'title-1'], 'names': ['title 2\\', 'title 1']} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) # 'prefix': CDATA (str) → test_enumtype def test_refid(self): # idref.type (str) xml = '' expected = {'refid': 'title-1-1'} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) def test_refname(self): # refname.type (str) xml = '' expected = {'refname': 'title 2'} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) # 'refuri: CDATA (str) → test_anonymous def test_rtrim(self): # yesorno (int) xml = '' expected = {'ltrim': 1, 'names': ['nbsp']} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) # 'scale': number (int) → test_alt # 'source': CDATA (str) → test_title # 'start': number (int) → test_enumtype # 'stub': yesorno (int) → test_colwidth # 'suffix': CDATA (str) → test_enumtype def test_title(self): # CDATA (str) xml = (r'') expected = {'ids': ['test-document'], 'names': ['test document'], 'source': '/tmp/foo.rst', 'title': 'Test Document'} node = docutils_xml.parse_element(xml) self.assertEqual(node.attributes, self.common_attributes | expected) # 'uri': CDATA (str) → test_alt # 'width' measure (str) → test_alt # 'xml:space' EnumeratedType (str) → test_format if __name__ == '__main__': unittest.main()