#!/usr/bin/env python3
# :Copyright: © 2020 Günter Milde.
# :License: Released under the terms of the `2-Clause BSD license`_, in short:
#
# Copying and distribution of this file, with or without modification,
# are permitted in any medium without royalty provided the copyright
# notice and this notice are preserved.
# This file is offered as-is, without any warranty.
#
# .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
"""
Tests for inline markup in CommonMark parsers
Cf. the `CommonMark Specification `__
"""
from pathlib import Path
import sys
import unittest
if __name__ == '__main__':
# prepend the "docutils root" to the Python library path
# so we import the local `docutils` package.
sys.path.insert(0, str(Path(__file__).resolve().parents[3]))
from docutils.frontend import get_default_settings
from docutils.parsers.recommonmark_wrapper import Parser
from docutils.utils import new_document
class RecommonmarkParserTestCase(unittest.TestCase):
def test_parser(self):
parser = Parser()
settings = get_default_settings(Parser)
for name, cases in totest.items():
for casenum, (case_input, case_expected) in enumerate(cases):
with self.subTest(id=f'totest[{name!r}][{casenum}]'):
document = new_document('test data', settings.copy())
parser.parse(case_input, document)
output = document.pformat()
self.assertEqual(case_expected, output)
totest = {}
totest['emphasis'] = [
["""\
*emphasis*
_also emphasis_
""",
"""\
emphasis
\n\
also emphasis
"""],
["""\
Partially*emphasised*word.
""",
"""\
Partially
emphasised
word.
"""],
["""\
*emphasized sentence
across lines*
""",
"""\
emphasized sentence
across lines
"""],
["""\
*no emphasis without closing asterisk
""",
"""\
*no emphasis without closing asterisk
"""],
[r"""
No markup when \*escaped or unbalanced *.
What about *this**?
Unbalanced _markup__ is kept as-is without warning.
""",
"""\
No markup when *escaped or unbalanced *.
What about \n\
this
*?
Unbalanced \n\
markup
_ is kept as-is without warning.
"""],
[r"""
Emphasized asterisk: *\**
Emphasized double asterisk: *\*\**
""",
"""\
Emphasized asterisk: \n\
*
Emphasized double asterisk: \n\
**
"""],
]
totest['strong'] = [
["""\
**strong**
__also strong__
""",
"""\
strong
\n\
also strong
"""],
["""\
Strong asterisk must be escaped **\\***
Strong double asterisk: **\\*\\***
""",
"""\
Strong asterisk must be escaped \n\
*
Strong double asterisk: \n\
**
"""],
["""\
**not strong without closing asterisks
""",
"""\
**not strong without closing asterisks
"""],
]
totest['literal'] = [
["""\
Inline `literals` are called `code spans` in CommonMark.
""",
"""\
Inline \n\
literals
are called \n\
code spans
in CommonMark.
"""],
[r"""
`\*literal`
""",
"""\
\\*literal
"""],
[r"""
``lite\ral``
""",
"""\
lite\\ral
"""],
[r"""
``literal\``
""",
"""\
literal\\
"""],
["""\
l'``literal`` and l\u2019``literal`` with apostrophe
""",
"""\
l'
literal
and l\u2019
literal
with apostrophe
"""],
["""\
quoted '``literal``', quoted "``literal``",
quoted \u2018``literal``\u2019, quoted \u201c``literal``\u201d,
quoted \xab``literal``\xbb
""",
"""\
quoted '
literal
', quoted "
literal
",
quoted \u2018
literal
\u2019, quoted \u201c
literal
\u201d,
quoted \xab
literal
\xbb
"""],
["""\
``'literal'`` with quotes, ``"literal"`` with quotes,
``\u2018literal\u2019`` with quotes, ``\u201cliteral\u201d`` with quotes,
``\xabliteral\xbb`` with quotes
""",
"""\
'literal'
with quotes, \n\
"literal"
with quotes,
\u2018literal\u2019
with quotes, \n\
\u201cliteral\u201d
with quotes,
\xabliteral\xbb
with quotes
"""],
[r"""
``literal ``no literal
No warning for `standalone TeX quotes' or other *unbalanced markup**.
""",
"""\
literal \n\
no literal
No warning for `standalone TeX quotes\' or other \n\
unbalanced markup
*.
"""],
["""\
``not literal without closing backquotes
""",
"""\
``not literal without closing backquotes
"""],
[r"""
Python ``list``s use square bracket syntax.
""",
"""\
Python \n\
list
s use square bracket syntax.
"""],
[r"""
Blank after opening `` not allowed.
""",
"""\
Blank after opening `` not allowed.
"""],
[r"""
no blank ``after closing``still ends a literal.
""",
"""\
no blank \n\
after closing
still ends a literal.
"""],
]
totest['references'] = [
["""\
[ref]
[ref]: /uri
""",
"""\
ref
"""],
# Fails with recommonmark 0.6.0:
# ["""\
# Inline image ![foo *bar*]
# in a paragraph.
#
# [foo *bar*]: train.jpg "train & tracks"
# """,
# """\
#
#
# Inline image \n\
#
# \n\
# in a paragraph.
# """],
["""\
[phrase reference]
[phrase reference]: /uri
""",
"""\
phrase reference
"""],
["""\
No whitespace required around a[phrase reference].
[phrase reference]: /uri
""",
"""\
No whitespace required around a
phrase reference
.
"""],
["""\
[phrase reference
across lines]
[phrase reference across lines]: /uri
""",
"""\
phrase reference
across lines
"""],
]
totest['appended_uris'] = [
["""\
[anonymous reference](http://example.com)
""",
"""\
anonymous reference
"""],
["""\
Inline image  more text.
""",
"""\
Inline image \n\
more text.
"""],
# recommonmark 0.6.0 drops the "title"
# ["""\
# Inline image  more text.
# """,
# """\
#
#
# Inline image \n\
#
# more text.
# """],
["""\
[URI must follow immediately]
(http://example.com)
""",
"""\
[URI must follow immediately]
(http://example.com)
"""],
["""\
Relative URIs' reference text can't be omitted:
[reference](reference)
""",
"""\
Relative URIs' reference text can't be omitted:
reference
"""],
]
totest['standalone_hyperlink'] = [
["""\
CommonMark calls standalone hyperlinks
like "autolinks".
""",
"""\
CommonMark calls standalone hyperlinks
like \n\
http://example.com
"autolinks".
"""],
]
totest['raw_html'] = [
["""\
foo bar
""",
"""\
foo \n\
bar
"""],
["""\
foo
bar
and
""",
"""\
foo \n\
bar
and \n\
"""],
["""\
Hard line breaks are not supported by Docutils.
"recommonmark 0.6.0" converts both, invisible \n\
(two or more trailing spaces) nor visible\\
(trailing backslash) to raw HTML.
""",
"""\
Hard line breaks are not supported by Docutils.
"recommonmark 0.6.0" converts both, invisible
(two or more trailing spaces) nor visible
(trailing backslash) to raw HTML.
"""],
]
totest['markup_recognition_rules'] = [
[r"""
Character-level m*a***r**`k`_u_p
works except for underline.
""",
"""\
Character-level m
a
r
k
_u_p
works except for underline.
"""],
]
if __name__ == '__main__':
unittest.main()