#! /usr/bin/env python3 # $Id: test_character_level_inline_markup.py 9425 2023-06-30 14:56:47Z milde $ # Author: David Goodger # Copyright: This module has been placed in the public domain. """ Tests for inline markup in docutils/parsers/rst/states.py with the "character-level-inline-markup" setting. Experimental. """ from pathlib import Path import sys import unittest if __name__ == '__main__': # prepend the "docutils root" to the Python library path # so we import the local `docutils` package. sys.path.insert(0, str(Path(__file__).resolve().parents[3])) from docutils.frontend import get_default_settings from docutils.parsers.rst import Parser from docutils.utils import new_document class ParserTestCase(unittest.TestCase): def test_parser(self): parser = Parser() settings = get_default_settings(Parser) settings.warning_stream = '' settings.character_level_inline_markup = True for name, cases in totest.items(): for casenum, (case_input, case_expected) in enumerate(cases): with self.subTest(id=f'totest[{name!r}][{casenum}]'): document = new_document('test data', settings.copy()) parser.parse(case_input, document) output = document.pformat() self.assertEqual(case_expected, output) totest = {} totest['emphasis'] = [ [r"""some punctuation is allowed around inline markup, e.g. /*emphasis*/, -*emphasis*-, and :*emphasis*: (delimiters), (*emphasis*), [*emphasis*], <*emphasis*>, {*emphasis*} (open/close pairs) *emphasis*., *emphasis*,, *emphasis*!, and *emphasis*\ (closing delimiters), With simple-inline-markup also )*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs), x*2* or 2*x* (alphanumeric char before), but not (*), [*], '*' or '"*"' ("quoted" start-string), \*args or * (escaped; whitespace behind start-string), or *the\* *stars\* *inside* (escaped; whitespace before end-string). However, '*args' triggers a warning. Also *this**. """, """\ some punctuation is allowed around inline markup, e.g. / emphasis /, - emphasis -, and : emphasis : (delimiters), ( emphasis ), [ emphasis ], < emphasis >, { emphasis } (open/close pairs) emphasis ., \n\ emphasis ,, \n\ emphasis !, and \n\ emphasis (closing delimiters), With simple-inline-markup also ) emphasis (, ] emphasis [, > emphasis >, } emphasis { (close/open pairs), x 2 or 2 x (alphanumeric char before), but not (*), [*], '*' or '"*"' ("quoted" start-string), *args or * (escaped; whitespace behind start-string), or \n\ the* *stars* *inside (escaped; whitespace before end-string). However, ' * args' triggers a warning. Inline emphasis start-string without end-string. Also \n\ this * . Inline emphasis start-string without end-string. """], [r""" Emphasized asterisk: *\** Emphasized double asterisk: *\*\** (requires two escape chars). """, """\ Emphasized asterisk: \n\ * Emphasized double asterisk: \n\ ** (requires two escape chars). """], ] totest['strong'] = [ [r""" (**strong**) but not (**) or '(** ' However, '**kwargs' and x**2 are recognized as strong markup and \**kwargs or ** as emphasized. """, """\ ( strong ) but not (**) or '(** ' However, ' kwargs' and x 2 are recognized as strong markup and * kwargs or * as emphasized. """], ["""\ Strong asterisk: **\\*** and strong double asterisk: **\\*\\*** require escaping with simple-inline-markup. """, """\ Strong asterisk: \n\ * \n\ and strong double asterisk: \n\ ** \n\ require escaping with simple-inline-markup. """], ] totest['literal'] = [ ["""\ With simple-inline-markup, this is ```interpreted text``` in backquotes! """, """\ With simple-inline-markup, this is \n\ `interpreted text ` in backquotes! """], ["""\ ``literal without closing backquotes """, """\ `` literal without closing backquotes Inline literal start-string without end-string. """], [r""" Python ``list``s use square bracket syntax. """, """\ Python \n\ list s use square bracket syntax. """], ] totest['references'] = [ ["""\ ref_, r_, r_e-f_, -ref_, and anonymousref__, beware of _ref_ or __attr__ or object.__attr__ """, """\ ref , \n\ r , \n\ r_e-f , - ref , and \n\ anonymousref , beware of _ ref or __ attr or object.__ attr """], ] totest['embedded_uris'] = [ [r""" Escape chars in URIs: ``_ ``__ ``__ """, """\ Escape chars in URIs: reference:1 anonymous\\call anonymous_call """], ] totest['inline_targets'] = [ ["""\ This isn't a _target; targets require backquotes. With simple-inline-markup, _`this`_ is a a target followed by an underscore. """, """\ This isn't a _target; targets require backquotes. With simple-inline-markup, \n\ this _ is a a target followed by an underscore. """], ] totest['footnote_reference'] = [ ["""\ Adjacent footnote refs are possible with simple-inline-markup: [*]_[#label]_ [#]_[2]_ [1]_[*]_ .. [#] test1 .. [*] test2 """, """\ Adjacent footnote refs are possible with simple-inline-markup: \n\ 2 \n\ 1 test1 test2 """], ] totest['citation_reference'] = [ ["""\ Adjacent citation refs are possible with simple-inline-markup: [citation]_[CIT1]_ """, """\ Adjacent citation refs are possible with simple-inline-markup: citation CIT1 """], ] totest['substitution_references'] = [ ["""\ |sub|ref """, """\ sub ref """], ] totest['standalone_hyperlink'] = [ [r""" Valid URLs with escaped markup characters: http://example.com/\*content\*/whatever Invalid with the simple-inline-markup setting: http://example.com/\*content*/whatever http://example.com/rST_for_all.html """, """\ Valid URLs with escaped markup characters: http://example.com/*content*/whatever Invalid with the simple-inline-markup setting: http://example.com/*content * /whatever http://example.com/ rST_for all.html Inline emphasis start-string without end-string. """], ] totest['markup_recognition_rules'] = [ ["""\ __This__ is an anonymous reference with simple-inline-markup. """, """\ __ This is an anonymous reference with simple-inline-markup. """], [r""" Character-level m*a***r**``k``\ `u`:title:\p with backslash-escaped whitespace, including new\ lines. """, """\ Character-level m a r k u p with backslash-escaped whitespace, including newlines. """], ["""\ text-*separated*\u2010*by*\u2011*various*\u2012*dashes*\u2013*and*\u2014*hyphens*. \u00bf*punctuation*? \u00a1*examples*!\xa0*no-break-space*\xa0. """, """\ text- separated \u2010 by \u2011 various \u2012 dashes \u2013 and \u2014 hyphens . \xbf punctuation ? \xa1 examples !\xa0 no-break-space \xa0. """], # Whitespace characters: ["""\ inline markup surrounded by various whitespace characters: *newline* or *space* or one of \xa0*NO-BREAK SPACE*\xa0, \u1680*OGHAM SPACE MARK*\u1680, \u180e*MONGOLIAN VOWEL SEPARATOR*\u180e, \u2000*EN QUAD*\u2000, \u2001*EM QUAD*\u2001, \u2002*EN SPACE*\u2002, \u2003*EM SPACE*\u2003, \u2004*THREE-PER-EM SPACE*\u2004, \u2005*FOUR-PER-EM SPACE*\u2005, \u2006*SIX-PER-EM SPACE*\u2006, \u2007*FIGURE SPACE*\u2007, \u2008*PUNCTUATION SPACE*\u2008, \u2009*THIN SPACE*\u2009, \u200a*HAIR SPACE*\u200a, \u202f*NARROW NO-BREAK SPACE*\u202f, \u205f*MEDIUM MATHEMATICAL SPACE*\u205f, \u3000*IDEOGRAPHIC SPACE*\u3000, \u2028*LINE SEPARATOR*\u2028 """, """\ inline markup surrounded by various whitespace characters: newline \n\ or \n\ space or one of \xa0 NO-BREAK SPACE \xa0, \u1680 OGHAM SPACE MARK \u1680, \u180e MONGOLIAN VOWEL SEPARATOR \u180e, \u2000 EN QUAD \u2000, \u2001 EM QUAD \u2001, \u2002 EN SPACE \u2002, \u2003 EM SPACE \u2003, \u2004 THREE-PER-EM SPACE \u2004, \u2005 FOUR-PER-EM SPACE \u2005, \u2006 SIX-PER-EM SPACE \u2006, \u2007 FIGURE SPACE \u2007, \u2008 PUNCTUATION SPACE \u2008, \u2009 THIN SPACE \u2009, \u200a HAIR SPACE \u200a, \u202f NARROW NO-BREAK SPACE \u202f, \u205f MEDIUM MATHEMATICAL SPACE \u205f, \u3000 IDEOGRAPHIC SPACE \u3000, LINE SEPARATOR """], ["""\ no inline markup due to whitespace inside and behind: * newline * * space * or one of *\xa0NO-BREAK SPACE\xa0* *\u1680OGHAM SPACE MARK\u1680* *\u2000EN QUAD\u2000* *\u2001EM QUAD\u2001* *\u2002EN SPACE\u2002* *\u2003EM SPACE\u2003* *\u2004THREE-PER-EM SPACE\u2004* *\u2005FOUR-PER-EM SPACE\u2005* *\u2006SIX-PER-EM SPACE\u2006* *\u2007FIGURE SPACE\u2007* *\u2008PUNCTUATION SPACE\u2008* *\u2009THIN SPACE\u2009* *\u200aHAIR SPACE\u200a* *\u202fNARROW NO-BREAK SPACE\u202f* *\u205fMEDIUM MATHEMATICAL SPACE\u205f* *\u3000IDEOGRAPHIC SPACE\u3000* *\u2028LINE SEPARATOR\u2028* """, """\ no inline markup due to whitespace inside and behind: * newline * * space * or one of *\xa0NO-BREAK SPACE\xa0* *\u1680OGHAM SPACE MARK\u1680* *\u2000EN QUAD\u2000* *\u2001EM QUAD\u2001* *\u2002EN SPACE\u2002* *\u2003EM SPACE\u2003* *\u2004THREE-PER-EM SPACE\u2004* *\u2005FOUR-PER-EM SPACE\u2005* *\u2006SIX-PER-EM SPACE\u2006* *\u2007FIGURE SPACE\u2007* *\u2008PUNCTUATION SPACE\u2008* *\u2009THIN SPACE\u2009* *\u200aHAIR SPACE\u200a* *\u202fNARROW NO-BREAK SPACE\u202f* *\u205fMEDIUM MATHEMATICAL SPACE\u205f* *\u3000IDEOGRAPHIC SPACE\u3000* * LINE SEPARATOR * """], # « * » ‹ * › « * » ‹ * › « * » ‹ * › French, ["""\ "Quoted" markup start-string (matched openers & closers) -> no markup: '*' "*" (*) <*> [*] {*} ⁅*⁆ Some international quoting styles: ‘*’ “*” English, ..., „*“ ‚*‘ »*« ›*‹ German, Czech, ..., „*” «*» Romanian, “*„ ‘*‚ Greek, 「*」『*』traditional Chinese, ”*” ’*’ »*» ›*› Swedish, Finnish, „*” ‚*’ Polish, „*” »*« ’*’ Hungarian, But this is „*’ emphasized »*‹. """, """\ "Quoted" markup start-string (matched openers & closers) -> no markup: '*' "*" (*) <*> [*] {*} ⁅*⁆ Some international quoting styles: ‘*’ “*” English, ..., „*“ ‚*‘ »*« ›*‹ German, Czech, ..., „*” «*» Romanian, “*„ ‘*‚ Greek, 「*」『*』traditional Chinese, ”*” ’*’ »*» ›*› Swedish, Finnish, „*” ‚*’ Polish, „*” »*« ’*’ Hungarian, But this is „ ’ emphasized » ‹. """], ] if __name__ == '__main__': unittest.main()