#! /usr/bin/env python3 # $Id: test_unicode.py 9425 2023-06-30 14:56:47Z milde $ # Author: David Goodger # Copyright: This module has been placed in the public domain. """ Tests for misc.py "unicode" directive. """ from pathlib import Path import sys import unittest if __name__ == '__main__': # prepend the "docutils root" to the Python library path # so we import the local `docutils` package. sys.path.insert(0, str(Path(__file__).resolve().parents[4])) from docutils.frontend import get_default_settings from docutils.parsers.rst import Parser from docutils.utils import new_document class ParserTestCase(unittest.TestCase): def test_parser(self): parser = Parser() settings = get_default_settings(Parser) settings.warning_stream = '' for name, cases in totest.items(): for casenum, (case_input, case_expected) in enumerate(cases): with self.subTest(id=f'totest[{name!r}][{casenum}]'): document = new_document('test data', settings.copy()) parser.parse(case_input, document) output = document.pformat() self.assertEqual(case_expected, output) try: chr(0x111111111111111111) except OverflowError as unichr_exception: unichr_exception_string = f'code too large ({unichr_exception})' except Exception as unichr_exception: unichr_exception_string = str(unichr_exception) else: unichr_exception_string = '' try: chr(0x11111111) except Exception as detail: invalid_char_code = f'{detail.__class__.__name__}: {detail}' else: invalid_char_code = '' totest = {} totest['unicode'] = [ [""" Insert an em-dash (|mdash|), a copyright symbol (|copy|), a non-breaking space (|nbsp|), a backwards-not-equals (|bne|), and a captial omega (|Omega|). .. |mdash| unicode:: 0x02014 .. |copy| unicode:: \\u00A9 .. |nbsp| unicode::   .. |bne| unicode:: U0003D U020E5 .. |Omega| unicode:: U+003A9 """, """\ Insert an em-dash ( mdash ), a copyright symbol ( copy ), a non-breaking space ( nbsp ), a backwards-not-equals ( bne ), and a captial omega ( Omega ). \u2014 \u00A9 \u00A0 = \u20e5 \u03a9 """], [""" Bad input: .. |empty| unicode:: .. |empty too| unicode:: .. comment doesn't count as content .. |not hex| unicode:: 0xHEX .. |not all hex| unicode:: UABCX .. unicode:: not in a substitution definition """, """\ Bad input: Error in "unicode" directive: 1 argument(s) required, 0 supplied. unicode:: Substitution definition "empty" empty or invalid. .. |empty| unicode:: Substitution definition "empty too" empty or invalid. .. |empty too| unicode:: .. comment doesn't count as content 0xHEX UABCX Invalid context: the "unicode" directive can only be used within a substitution definition. .. unicode:: not in a substitution definition """], [""" Testing comments and extra text. Copyright |copy| 2003, |BogusMegaCorp (TM)|. .. |copy| unicode:: 0xA9 .. copyright sign .. |BogusMegaCorp (TM)| unicode:: BogusMegaCorp U+2122 .. with trademark sign """, """\ Testing comments and extra text. Copyright \n\ copy 2003, \n\ BogusMegaCorp (TM) . \u00A9 BogusMegaCorp \u2122 """], [""" .. |too big for int| unicode:: 0x111111111111111111 .. |too big for unicode| unicode:: 0x11111111 """, """\ Invalid character code: 0x111111111111111111 ValueError: %s unicode:: 0x111111111111111111 Substitution definition "too big for int" empty or invalid. .. |too big for int| unicode:: 0x111111111111111111 Invalid character code: 0x11111111 %s unicode:: 0x11111111 Substitution definition "too big for unicode" empty or invalid. .. |too big for unicode| unicode:: 0x11111111 """ % (unichr_exception_string, invalid_char_code)] ] if __name__ == '__main__': unittest.main()