#!/usr/bin/env python3 # $Id: test_smartquotes.py 9480 2023-11-19 09:44:17Z milde $ # :Copyright: © 2011 Günter Milde. # :Maintainer: docutils-develop@lists.sourceforge.net # :License: Released under the terms of the `2-Clause BSD license`_, in short: # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. # This file is offered as-is, without any warranty. # # .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause """ Test module for universal.SmartQuotes transform. """ from pathlib import Path import sys import unittest if __name__ == '__main__': # prepend the "docutils root" to the Python library path # so we import the local `docutils` package. sys.path.insert(0, str(Path(__file__).resolve().parents[2])) from docutils.frontend import get_default_settings from docutils.parsers.rst import Parser from docutils.transforms.universal import (SmartQuotes, FilterMessages, TestMessages) from docutils.utils import new_document class TransformTestCase(unittest.TestCase): maxDiff = None def test_transforms(self): parser = Parser() settings = get_default_settings(Parser) settings.warning_stream = '' settings.smart_quotes = True settings.trim_footnote_ref_space = True for name, (transforms, cases) in totest.items(): for casenum, (case_input, case_expected) in enumerate(cases): with self.subTest(id=f'totest[{name!r}][{casenum}]'): document = new_document('test data', settings.copy()) parser.parse(case_input, document) # Don't do a ``populate_from_components()`` because that # would enable the Transformer's default transforms. document.transformer.add_transforms(transforms) document.transformer.add_transform(TestMessages) document.transformer.apply_transforms() output = document.pformat() self.assertEqual(case_expected, output) settings.language_code = 'de' for name, (transforms, cases) in totest_de.items(): for casenum, (case_input, case_expected) in enumerate(cases): with self.subTest(id=f'totest_de[{name!r}][{casenum}]'): document = new_document('test data', settings.copy()) parser.parse(case_input, document) # Don't do a ``populate_from_components()`` because that # would enable the Transformer's default transforms. document.transformer.add_transforms(transforms) document.transformer.add_transform(TestMessages) # Filter with increased priority: call later, so that # messages added by `TestMessages` are filtered, too. document.transformer.add_transform(FilterMessages, 890) document.transformer.apply_transforms() output = document.pformat() self.assertEqual(case_expected, output) settings.smart_quotes = 'alternative' for name, (transforms, cases) in totest_de_alt.items(): for casenum, (case_input, case_expected) in enumerate(cases): with self.subTest(id=f'totest_de_alt[{name!r}][{casenum}]'): document = new_document('test data', settings.copy()) parser.parse(case_input, document) # Don't do a ``populate_from_components()`` because that # would enable the Transformer's default transforms. document.transformer.add_transforms(transforms) document.transformer.add_transform(TestMessages) document.transformer.apply_transforms() output = document.pformat() self.assertEqual(case_expected, output) settings.smart_quotes = True settings.smartquotes_locales = [('de', '«»()'), ('nl', '„”’’')] for name, (transforms, cases) in totest_locales.items(): for casenum, (case_input, case_expected) in enumerate(cases): with self.subTest(id=f'totest_locales[{name!r}][{casenum}]'): document = new_document('test data', settings.copy()) parser.parse(case_input, document) # Don't do a ``populate_from_components()`` because that # would enable the Transformer's default transforms. document.transformer.add_transforms(transforms) document.transformer.add_transform(TestMessages) document.transformer.apply_transforms() output = document.pformat() self.assertEqual(case_expected, output) totest = {} totest_de = {} totest_de_alt = {} totest_locales = {} totest['smartquotes'] = ((SmartQuotes,), [ ["""\ Test "smart quotes", 'secondary smart quotes', "'nested' smart" quotes -- and ---also long--- dashes. """, """\ Test “smart quotes”, ‘secondary smart quotes’, “‘nested’ smart” quotes – and —also long— dashes. """], [r"""Escaped \"ASCII quotes\" and \'secondary ASCII quotes\'. """, """\ Escaped "ASCII quotes" and 'secondary ASCII quotes'. """], ["""\ Do not "educate" quotes ``inside "literal" text`` and :: "literal" blocks. .. role:: python(code) :class: python Keep quotes straight in code and math: :code:`print "hello"` :python:`print("hello")` :math:`1' 12"`. .. code:: print("hello") .. math:: f'(x) = df(x)/dx """, """\ Do not “educate” quotes \n\ inside "literal" text and "literal" blocks. Keep quotes straight in code and math: print "hello" \n\ print("hello") \n\ 1' 12" . print("hello") f'(x) = df(x)/dx """], ["""\ Closing quotes, if preceded by wor"d char's or punctuation:"a",'a';'a' (TODO: opening quotes if followed by word-char?). Opening quotes after normal space "a" 'a', thin space "a" 'a', em space "a" 'a', NBSP "a" 'a', ZWSP\u200B"a" and\u200B'a', ZWNJ\u200C"a" and\u200C'a', escaped space\\ "a" and\\ 'a', hyphen -"a", -'a' en dash –"a"–'a', em dash —"a"—'a'. opening brackets ("a") ('a') ["a"] ['a'] {"a"} {'a'} But not if followed by (optional punctuation and) whitespace: "-", "–", "—", "(", "a[", "{" '-', '–', '—', '((', '[', '{' """, """\ Closing quotes, if preceded by wor”d char’s or punctuation:”a”,’a’;’a’ (TODO: opening quotes if followed by word-char?). Opening quotes after normal space “a” ‘a’, thin space “a” ‘a’, em space “a” ‘a’, NBSP “a” ‘a’, ZWSP\u200B“a” and\u200B‘a’, ZWNJ\u200C“a” and\u200C‘a’, escaped space“a” and‘a’, hyphen -“a”, -‘a’ en dash –“a”–‘a’, em dash —“a”—‘a’. opening brackets (“a”) (‘a’) [“a”] [‘a’] {“a”} {‘a’} But not if followed by (optional punctuation and) whitespace: “-”, “–”, “—”, “(”, “a[”, “{” ‘-’, ‘–’, ‘—’, ‘((’, ‘[’, ‘{’ """], ["""\ Quotes and inline-elements: * Around "_`targets`", "*emphasized*" or "``literal``" text and links to "targets_". * Inside *"emphasized"* or other `inline "roles"` Do not drop characters from intra-word inline markup like *re*\\ ``Structured``\\ *Text*. """, """\ Quotes and inline-elements: Around “ targets ”, “ emphasized ” or “ literal ” text and links to “ targets ”. Inside \n\ “emphasized” or other \n\ inline “roles” Do not drop characters from intra-word inline markup like re Structured Text . """], ["""\ Do not convert context-character at inline-tag boundaries (in French, smart quotes expand to two characters). .. class:: language-fr-ch-x-altquot Around "_`targets`", "*emphasized*" or "``literal``" text and links to "targets_". Inside *"emphasized"* or other `inline "roles"`: (``"string"``), (``'string'``), *\"betont\"*, \"*betont*". Do not drop characters from intra-word inline markup like *re*\\ ``Structured``\\ *Text*. """, """\ Do not convert context-character at inline-tag boundaries (in French, smart quotes expand to two characters). Around «\u202f targets \u202f», «\u202f emphasized \u202f» or «\u202f literal \u202f» text and links to «\u202f targets \u202f». Inside \n\ «\u202femphasized\u202f» or other \n\ inline «\u202froles\u202f» : ( "string" ), ( 'string' ), \n\ «\u202fbetont\u202f» , «\u202f betont \u202f». Do not drop characters from intra-word inline markup like re Structured Text . """], [r""" Docutils escape mechanism uses the backslash: \Remove \non-escaped \backslashes\: \item \newline \tab \" \' \*. \ Remove-\ escaped-\ white\ space-\ including-\ newlines. \\Keep\\escaped\\backslashes\\ (but\\only\\one). \\ Keep \\ space\\ around \\ backslashes. Keep backslashes ``\in\ literal``, :math:`in \mathrm{math}`, and :code:`in\ code`. Test around inline elements:\ [*]_ *emphasized*, H\ :sub:`2`\ O and :math:`x^2` *emphasized*, H\ :sub:`2`\ O and :math:`x^2` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. [*] and footnotes """, """\ Docutils escape mechanism uses the backslash: Remove non-escaped backslashes: item newline tab " \' *. Remove-escaped-whitespace-including-newlines. \\Keep\\escaped\\backslashes\\ (but\\only\\one). \\ Keep \\ space\\ around \\ backslashes. Keep backslashes \n\ \\in\\ literal , \n\ in \\mathrm{math} , and \n\ in\\ code . Test around inline elements: emphasized , H 2 O and \n\ x^2
emphasized , H 2 O and \n\ x^2 and footnotes """], [r""" Character-level m\ *a*\ **r**\ ``k``\ `u`:title:\p with backslash-escaped whitespace, including new\ lines. """, """\ Character-level m a
r k u p with backslash-escaped whitespace, including newlines. """], ["""\ .. class:: language-de German "smart quotes" and 'secondary smart quotes'. .. class:: language-en-UK-x-altquot British "primary quotes" use single and 'secondary quotes' double quote signs. .. class:: language-foo "Quoting style" for unknown languages is 'ASCII'. .. class:: language-de-x-altquot Alternative German "smart quotes" and 'secondary smart quotes'. """, """\ German „smart quotes“ and ‚secondary smart quotes‘. British ‘primary quotes’ use single and “secondary quotes” double quote signs. "Quoting style" for unknown languages is 'ASCII'. Alternative German »smart quotes« and ›secondary smart quotes‹. No smart quotes defined for language "foo". """], ]) totest_de['smartquotes'] = ((SmartQuotes,), [ ["""\ German "smart quotes" and 'secondary smart quotes'. .. class:: language-en English "smart quotes" and 'secondary smart quotes'. """, """\ German „smart quotes“ and ‚secondary smart quotes‘. English “smart quotes” and ‘secondary smart quotes’. """], ]) totest_de_alt['smartquotes'] = ((SmartQuotes,), [ ["""\ Alternative German "smart quotes" and 'secondary smart quotes'. In this case, the apostrophe isn't a closing secondary quote! .. class:: language-en-UK British "quotes" use single and 'secondary quotes' double quote signs (there are no alternative quotes defined). .. class:: language-ro Romanian "smart quotes" and 'secondary' smart quotes. """, """\ Alternative German »smart quotes« and ›secondary smart quotes‹. In this case, the apostrophe isn’t a closing secondary quote! British ‘quotes’ use single and “secondary quotes” double quote signs (there are no alternative quotes defined). Romanian „smart quotes” and «secondary» smart quotes. """], ]) totest_locales['smartquotes'] = ((SmartQuotes,), [ ["""\ German "smart quotes" and 'secondary smart quotes'. .. class:: language-nl Dutch "smart quotes" and 's Gravenhage (leading apostrophe). """, """\ German «smart quotes» and (secondary smart quotes). Dutch „smart quotes” and ’s Gravenhage (leading apostrophe). """], ]) if __name__ == '__main__': unittest.main()