1import re
2import string
3from collections import abc
4
5__version__ = "2.0.0a1"
6
7_striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)")
8
9
10class Markup(str):
11    """A string that is ready to be safely inserted into an HTML or XML
12    document, either because it was escaped or because it was marked
13    safe.
14
15    Passing an object to the constructor converts it to text and wraps
16    it to mark it safe without escaping. To escape the text, use the
17    :meth:`escape` class method instead.
18
19    >>> Markup("Hello, <em>World</em>!")
20    Markup('Hello, <em>World</em>!')
21    >>> Markup(42)
22    Markup('42')
23    >>> Markup.escape("Hello, <em>World</em>!")
24    Markup('Hello &lt;em&gt;World&lt;/em&gt;!')
25
26    This implements the ``__html__()`` interface that some frameworks
27    use. Passing an object that implements ``__html__()`` will wrap the
28    output of that method, marking it safe.
29
30    >>> class Foo:
31    ...     def __html__(self):
32    ...         return '<a href="/foo">foo</a>'
33    ...
34    >>> Markup(Foo())
35    Markup('<a href="/foo">foo</a>')
36
37    This is a subclass of :class:`str`. It has the same methods, but
38    escapes their arguments and returns a ``Markup`` instance.
39
40    >>> Markup("<em>%s</em>") % ("foo & bar",)
41    Markup('<em>foo &amp; bar</em>')
42    >>> Markup("<em>Hello</em> ") + "<foo>"
43    Markup('<em>Hello</em> &lt;foo&gt;')
44    """
45
46    __slots__ = ()
47
48    def __new__(cls, base="", encoding=None, errors="strict"):
49        if hasattr(base, "__html__"):
50            base = base.__html__()
51        if encoding is None:
52            return super().__new__(cls, base)
53        return super().__new__(cls, base, encoding, errors)
54
55    def __html__(self):
56        return self
57
58    def __add__(self, other):
59        if isinstance(other, str) or hasattr(other, "__html__"):
60            return self.__class__(super().__add__(self.escape(other)))
61        return NotImplemented
62
63    def __radd__(self, other):
64        if isinstance(other, str) or hasattr(other, "__html__"):
65            return self.escape(other).__add__(self)
66        return NotImplemented
67
68    def __mul__(self, num):
69        if isinstance(num, int):
70            return self.__class__(super().__mul__(num))
71        return NotImplemented
72
73    __rmul__ = __mul__
74
75    def __mod__(self, arg):
76        if isinstance(arg, tuple):
77            arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg)
78        else:
79            arg = _MarkupEscapeHelper(arg, self.escape)
80        return self.__class__(super().__mod__(arg))
81
82    def __repr__(self):
83        return f"{self.__class__.__name__}({super().__repr__()})"
84
85    def join(self, seq):
86        return self.__class__(super().join(map(self.escape, seq)))
87
88    join.__doc__ = str.join.__doc__
89
90    def split(self, *args, **kwargs):
91        return list(map(self.__class__, super().split(*args, **kwargs)))
92
93    split.__doc__ = str.split.__doc__
94
95    def rsplit(self, *args, **kwargs):
96        return list(map(self.__class__, super().rsplit(*args, **kwargs)))
97
98    rsplit.__doc__ = str.rsplit.__doc__
99
100    def splitlines(self, *args, **kwargs):
101        return list(map(self.__class__, super().splitlines(*args, **kwargs)))
102
103    splitlines.__doc__ = str.splitlines.__doc__
104
105    def unescape(self):
106        """Convert escaped markup back into a text string. This replaces
107        HTML entities with the characters they represent.
108
109        >>> Markup("Main &raquo; <em>About</em>").unescape()
110        'Main » <em>About</em>'
111        """
112        from html import unescape
113
114        return unescape(str(self))
115
116    def striptags(self):
117        """:meth:`unescape` the markup, remove tags, and normalize
118        whitespace to single spaces.
119
120        >>> Markup("Main &raquo;\t<em>About</em>").striptags()
121        'Main » About'
122        """
123        stripped = " ".join(_striptags_re.sub("", self).split())
124        return Markup(stripped).unescape()
125
126    @classmethod
127    def escape(cls, s):
128        """Escape a string. Calls :func:`escape` and ensures that for
129        subclasses the correct type is returned.
130        """
131        rv = escape(s)
132        if rv.__class__ is not cls:
133            return cls(rv)
134        return rv
135
136    def make_simple_escaping_wrapper(name):  # noqa: B902
137        orig = getattr(str, name)
138
139        def func(self, *args, **kwargs):
140            args = _escape_argspec(list(args), enumerate(args), self.escape)
141            _escape_argspec(kwargs, kwargs.items(), self.escape)
142            return self.__class__(orig(self, *args, **kwargs))
143
144        func.__name__ = orig.__name__
145        func.__doc__ = orig.__doc__
146        return func
147
148    for method in (
149        "__getitem__",
150        "capitalize",
151        "title",
152        "lower",
153        "upper",
154        "replace",
155        "ljust",
156        "rjust",
157        "lstrip",
158        "rstrip",
159        "center",
160        "strip",
161        "translate",
162        "expandtabs",
163        "swapcase",
164        "zfill",
165    ):
166        locals()[method] = make_simple_escaping_wrapper(method)
167
168    del method, make_simple_escaping_wrapper
169
170    def partition(self, sep):
171        return tuple(map(self.__class__, super().partition(self.escape(sep))))
172
173    def rpartition(self, sep):
174        return tuple(map(self.__class__, super().rpartition(self.escape(sep))))
175
176    def format(self, *args, **kwargs):
177        formatter = EscapeFormatter(self.escape)
178        kwargs = _MagicFormatMapping(args, kwargs)
179        return self.__class__(formatter.vformat(self, args, kwargs))
180
181    def __html_format__(self, format_spec):
182        if format_spec:
183            raise ValueError("Unsupported format specification for Markup.")
184        return self
185
186
187class _MagicFormatMapping(abc.Mapping):
188    """This class implements a dummy wrapper to fix a bug in the Python
189    standard library for string formatting.
190
191    See http://bugs.python.org/issue13598 for information about why
192    this is necessary.
193    """
194
195    def __init__(self, args, kwargs):
196        self._args = args
197        self._kwargs = kwargs
198        self._last_index = 0
199
200    def __getitem__(self, key):
201        if key == "":
202            idx = self._last_index
203            self._last_index += 1
204            try:
205                return self._args[idx]
206            except LookupError:
207                pass
208            key = str(idx)
209        return self._kwargs[key]
210
211    def __iter__(self):
212        return iter(self._kwargs)
213
214    def __len__(self):
215        return len(self._kwargs)
216
217
218class EscapeFormatter(string.Formatter):
219    def __init__(self, escape):
220        self.escape = escape
221
222    def format_field(self, value, format_spec):
223        if hasattr(value, "__html_format__"):
224            rv = value.__html_format__(format_spec)
225        elif hasattr(value, "__html__"):
226            if format_spec:
227                raise ValueError(
228                    f"Format specifier {format_spec} given, but {type(value)} does not"
229                    " define __html_format__. A class that defines __html__ must define"
230                    " __html_format__ to work with format specifiers."
231                )
232            rv = value.__html__()
233        else:
234            # We need to make sure the format spec is str here as
235            # otherwise the wrong callback methods are invoked.
236            rv = string.Formatter.format_field(self, value, str(format_spec))
237        return str(self.escape(rv))
238
239
240def _escape_argspec(obj, iterable, escape):
241    """Helper for various string-wrapped functions."""
242    for key, value in iterable:
243        if isinstance(value, str) or hasattr(value, "__html__"):
244            obj[key] = escape(value)
245    return obj
246
247
248class _MarkupEscapeHelper:
249    """Helper for :meth:`Markup.__mod__`."""
250
251    def __init__(self, obj, escape):
252        self.obj = obj
253        self.escape = escape
254
255    def __getitem__(self, item):
256        return _MarkupEscapeHelper(self.obj[item], self.escape)
257
258    def __str__(self):
259        return str(self.escape(self.obj))
260
261    def __repr__(self):
262        return str(self.escape(repr(self.obj)))
263
264    def __int__(self):
265        return int(self.obj)
266
267    def __float__(self):
268        return float(self.obj)
269
270
271# circular import
272try:
273    from ._speedups import escape
274    from ._speedups import escape_silent
275    from ._speedups import soft_str
276    from ._speedups import soft_unicode
277except ImportError:
278    from ._native import escape
279    from ._native import escape_silent  # noqa: F401
280    from ._native import soft_str  # noqa: F401
281    from ._native import soft_unicode  # noqa: F401
282