#!/usr/bin/env python3
# :Copyright: © 2024 Günter Milde.
# :License: Released under the terms of the `2-Clause BSD license`_, in short:
#
# Copying and distribution of this file, with or without modification,
# are permitted in any medium without royalty provided the copyright
# notice and this notice are preserved.
# This file is offered as-is, without any warranty.
#
# .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
"""Tests for parsers/docutils_xml.py."""
from pathlib import Path
import sys
import unittest
import xml.etree.ElementTree as ET
if __name__ == '__main__':
# prepend the "docutils root" to the Python library path
# so we import the local `docutils` package.
sys.path.insert(0, str(Path(__file__).resolve().parents[3]))
from docutils import frontend, utils
from docutils.parsers import docutils_xml
class ParseElementTestCase(unittest.TestCase):
"""Test the `docutils.xml.parse_element()` function."""
maxDiff = None
# supress warnings when passing `document` to `parse_element()`
settings = frontend.get_default_settings(docutils_xml.Parser)
settings.warning_stream = '' # comment out to see warnings
document = utils.new_document('xml input', settings)
def test_element_with_child_with_text(self):
xml = 'some text'
node = docutils_xml.parse_element(xml)
self.assertEqual(xml, str(node))
def test_tailing_text_after_root(self):
"""etree.ElementTree does not accept tailing text in the input.
"""
xml = 'texttailing text'
with self.assertRaisesRegex(ET.ParseError, 'junk after document '):
docutils_xml.parse_element(xml)
# If a document is provided, report via a "loose" error system message
# comment out ``settings.warning_stream = ''`` above to see it).
node = docutils_xml.parse_element(xml, self.document)
self.assertEqual('text', str(node))
def test_nothing_but_junk_text(self):
xml = 'just text'
node = docutils_xml.parse_element(xml, self.document)
self.assertEqual(node.astext(), 'No XML element found.')
def test_nonexistent_element_type(self):
xml = 'some text
'
node = docutils_xml.parse_element(xml, self.document)
self.assertEqual(xml, str(node))
# see test_misc.py for the warning
def test_junk_text(self):
# insert text also in nodes that are not TextElement instances
xml = 'some text'
node = docutils_xml.parse_element(xml)
self.assertEqual(xml, str(node))
with self.assertRaisesRegex(ValueError,
'Expecting child of type
,'
' not text data "some text"'):
node.validate()
def test_tailing_junk_text(self):
# insert text also in nodes that are not TextElement instances
xml = 'some texttailing text'
node = docutils_xml.parse_element(xml)
self.assertEqual(xml, str(node))
with self.assertRaisesRegex(
ValueError, 'Spurious text: "tailing text"'):
node.validate()
def test_element_with_attributes(self):
xml = ('')
node = docutils_xml.parse_element(xml)
# attribute values are normalized:
self.assertEqual(xml.replace('4.50 cm', '4.5cm'), str(node))
def test_element_with_invalid_attributes(self):
"""Silently accept invalid attribute names and values.
Validation reports problems.
"""
xml = ('')
node = docutils_xml.parse_element(xml)
self.assertEqual(xml, str(node))
with self.assertRaisesRegex(
ValueError,
'Element invalid:\n'
' Attribute "breadth" not one of "ids", .*, "loading".\n'
' Attribute "height" has invalid value "three inch".\n'
' "three inch" is no valid measure.'):
node.validate()
class XmlAttributesTestCase(unittest.TestCase):
"""
Test correct parsing of the `supported element attributes`_.
See also `AttributeTypeTests` in ../../test_nodes.py.
__ https://docutils.sourceforge.io/
docs/ref/doctree.html#attribute-reference
"""
common_attributes = {'classes': [],
'dupnames': [],
'ids': [],
'names': []}
def test_alt(self): # CDATA (str)
xml = ('')
expected = {'alt': 'a barking dog',
'align': 'left',
'height': '3ex',
'loading': 'embed',
'scale': 3,
'uri': 'dog.jpg',
'width': '4cm'}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
# 'align': CDATA (str) → test_alt
def test_anonymous(self): # yesorno (int)
xml = ''
expected = {'anonymous': 1,
'ids': ['target-1'],
'refuri': 'example.html'}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
def test_auto(self): # CDATA (str)
# also encodes footnote label type: '1': numbered, '*': symbols
xml = ''
expected = {'auto': '*',
'backrefs': ['footnote-reference-2']}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
# 'backrefs': idrefs.type (list[str]) → test_auto
def test_bullet(self): # CDATA (str)
xml = ''
expected = {'bullet': '*',
'classes': ['first', 'x-2nd']}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
# 'classes': classnames.type (list[str]) → test_bullet
def test_colwidth(self): # CDATA (int)
# Provisional. Currently, Docutils handles "colwidth" differently
# from the Exchange Table Model. This will eventually change
# (see https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth).
xml = ''
expected = {'colwidth': 33, 'stub': 1}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
# Note: the upstream default unit is "pt", not "*".
xml = ''
expected = {'colwidth': 33, 'stub': 1}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
def test_delimiter(self): # CDATA (str)
xml = 'FILE'
expected = {'delimiter': '='}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
def test_dupnames(self): # refnames.type (list[str]).
xml = r''
expected = {'dupnames': ['title 1'],
'ids': ['title-1']}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
def test_enumtype(self): # EnumeratedType (str)
xml = ('')
expected = {'enumtype': 'upperroman',
'prefix': '(',
'start': 2,
'suffix': ')'}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
def test_format(self): # NMTOKENS (str) (space-delimited list of keywords)
xml = ''
expected = {'format': 'html latex',
'xml:space': 'preserve'}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
# 'height': measure (str) → test_alt
# 'ids': ids.type (list[str]) → test_names
def test_level(self): # level (int)
xml = ('')
expected = {'backrefs': [],
'level': 3,
'line': 21,
'source': 'string',
'type': 'ERROR'}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
def test_ltrim(self): # yesorno (int)
xml = ''
expected = {'ltrim': 1, 'names': ['nbsp']}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
# 'loading': EnumeratedType (str) → test_alt
def test_morecols(self): # number (int)
xml = ''
expected = {'morecols': 1}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
def test_names(self): # refnames.type (list[str])
# internal whitespace in XML escaped
xml = r''
expected = {'ids': ['title-2', 'title-1'],
'names': ['title 2\\', 'title 1']}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
# 'prefix': CDATA (str) → test_enumtype
def test_refid(self): # idref.type (str)
xml = ''
expected = {'refid': 'title-1-1'}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
def test_refname(self): # refname.type (str)
xml = ''
expected = {'refname': 'title 2'}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
# 'refuri: CDATA (str) → test_anonymous
def test_rtrim(self): # yesorno (int)
xml = ''
expected = {'ltrim': 1,
'names': ['nbsp']}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
# 'scale': number (int) → test_alt
# 'source': CDATA (str) → test_title
# 'start': number (int) → test_enumtype
# 'stub': yesorno (int) → test_colwidth
# 'suffix': CDATA (str) → test_enumtype
def test_title(self): # CDATA (str)
xml = (r'')
expected = {'ids': ['test-document'],
'names': ['test document'],
'source': '/tmp/foo.rst',
'title': 'Test Document'}
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
# 'uri': CDATA (str) → test_alt
# 'width' measure (str) → test_alt
# 'xml:space' EnumeratedType (str) → test_format
if __name__ == '__main__':
unittest.main()