# :Id: $Id: mathml_elements.py 9810 2024-08-01 07:22:07Z aa-turner $
# :Copyright: 2024 Günter Milde.
#
# :License: Released under the terms of the `2-Clause BSD license`_, in short:
#
# Copying and distribution of this file, with or without modification,
# are permitted in any medium without royalty provided the copyright
# notice and this notice are preserved.
# This file is offered as-is, without any warranty.
#
# .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
"""MathML element classes based on `xml.etree`.
The module is intended for programmatic generation of MathML
and covers the part of `MathML Core`_ that is required by
Docutil's *TeX math to MathML* converter.
This module is PROVISIONAL:
the API is not settled and may change with any minor Docutils version.
.. _MathML Core: https://www.w3.org/TR/mathml-core/
"""
# Usage:
#
# >>> from mathml_elements import *
import numbers
import xml.etree.ElementTree as ET
GLOBAL_ATTRIBUTES = (
'class', # space-separated list of element classes
# 'data-*', # custom data attributes (see HTML)
'dir', # directionality ('ltr', 'rtl')
'displaystyle', # True: normal, False: compact
'id', # unique identifier
# 'mathbackground', # color definition, deprecated
# 'mathcolor', # color definition, deprecated
# 'mathsize', # font-size, deprecated
'nonce', # cryptographic nonce ("number used once")
'scriptlevel', # math-depth for the element
'style', # CSS styling declarations
'tabindex', # indicate if the element takes input focus
)
"""Global MathML attributes
https://w3c.github.io/mathml-core/#global-attributes
"""
# Base classes
# ------------
class MathElement(ET.Element):
"""Base class for MathML elements."""
nchildren = None
"""Expected number of children or None"""
# cf. https://www.w3.org/TR/MathML3/chapter3.html#id.3.1.3.2
parent = None
"""Parent node in MathML element tree."""
def __init__(self, *children, **attributes) -> None:
"""Set up node with `children` and `attributes`.
Attribute names are normalised to lowercase.
You may use "CLASS" to set a "class" attribute.
Attribute values are converted to strings
(with True -> "true" and False -> "false").
>>> math(CLASS='test', level=3, split=True)
math(class='test', level='3', split='true')
>>> math(CLASS='test', level=3, split=True).toxml()
''
"""
attrib = {k.lower(): self.a_str(v) for k, v in attributes.items()}
super().__init__(self.__class__.__name__, **attrib)
self.extend(children)
@staticmethod
def a_str(v):
# Return string representation for attribute value `v`.
if isinstance(v, bool):
return str(v).lower()
return str(v)
def __repr__(self) -> str:
"""Return full string representation."""
args = [repr(child) for child in self]
if self.text:
args.append(repr(self.text))
if self.nchildren != self.__class__.nchildren:
args.append(f'nchildren={self.nchildren}')
if getattr(self, 'switch', None):
args.append('switch=True')
args += [f'{k}={v!r}' for k, v in self.items() if v is not None]
return f'{self.tag}({", ".join(args)})'
def __str__(self) -> str:
"""Return concise, informal string representation."""
if self.text:
args = repr(self.text)
else:
args = ', '.join(f'{child}' for child in self)
return f'{self.tag}({args})'
def set(self, key, value) -> None:
super().set(key, self.a_str(value))
def __setitem__(self, key, value) -> None:
if self.nchildren == 0:
raise TypeError(f'Element "{self}" does not take children.')
if isinstance(value, MathElement):
value.parent = self
else: # value may be an iterable
if self.nchildren and len(self) + len(value) > self.nchildren:
raise TypeError(f'Element "{self}" takes only {self.nchildren}'
' children')
for e in value:
e.parent = self
super().__setitem__(key, value)
def is_full(self):
"""Return boolean indicating whether children may be appended."""
return self.nchildren is not None and len(self) >= self.nchildren
def close(self):
"""Close element and return first non-full anchestor or None."""
self.nchildren = len(self) # mark node as full
parent = self.parent
while parent is not None and parent.is_full():
parent = parent.parent
return parent
def append(self, element):
"""Append `element` and return new "current node" (insertion point).
Append as child element and set the internal `parent` attribute.
If self is already full, raise TypeError.
If self is full after appending, call `self.close()`
(returns first non-full anchestor or None) else return `self`.
"""
if self.is_full():
if self.nchildren:
status = f'takes only {self.nchildren} children'
else:
status = 'does not take children'
raise TypeError(f'Element "{self}" {status}.')
super().append(element)
element.parent = self
if self.is_full():
return self.close()
return self
def extend(self, elements):
"""Sequentially append `elements`. Return new "current node".
Raise TypeError if overfull.
"""
current_node = self
for element in elements:
current_node = self.append(element)
return current_node
def pop(self, index=-1):
element = self[index]
del self[index]
return element
def in_block(self):
"""Return True, if `self` or an ancestor has ``display='block'``.
Used to find out whether we are in inline vs. displayed maths.
"""
if self.get('display') is None:
try:
return self.parent.in_block()
except AttributeError:
return False
return self.get('display') == 'block'
# XML output:
def indent_xml(self, space=' ', level=0) -> None:
"""Format XML output with indents.
Use with care:
Formatting whitespace is permanently added to the
`text` and `tail` attributes of `self` and anchestors!
"""
ET.indent(self, space, level)
def unindent_xml(self) -> None:
"""Strip whitespace at the end of `text` and `tail` attributes...
to revert changes made by the `indent_xml()` method.
Use with care, trailing whitespace from the original may be lost.
"""
for e in self.iter():
if not isinstance(e, MathToken) and e.text:
e.text = e.text.rstrip()
if e.tail:
e.tail = e.tail.rstrip()
def toxml(self, encoding=None):
"""Return an XML representation of the element.
By default, the return value is a `str` instance. With an explicit
`encoding` argument, the result is a `bytes` instance in the
specified encoding. The XML default encoding is UTF-8, any other
encoding must be specified in an XML document header.
Name and encoding handling match `xml.dom.minidom.Node.toxml()`
while `etree.Element.tostring()` returns `bytes` by default.
"""
xml = ET.tostring(self, encoding or 'unicode',
short_empty_elements=False)
# Visible representation for "Apply Function" character:
try:
xml = xml.replace('\u2061', '⁡')
except TypeError:
xml = xml.replace('\u2061'.encode(encoding), b'⁡')
return xml
# Group sub-expressions in a horizontal row
#
# The elements , , , , ,
# , , , and