1import re 2import string 3from collections import abc 4 5__version__ = "2.0.0a1" 6 7_striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)") 8 9 10class Markup(str): 11 """A string that is ready to be safely inserted into an HTML or XML 12 document, either because it was escaped or because it was marked 13 safe. 14 15 Passing an object to the constructor converts it to text and wraps 16 it to mark it safe without escaping. To escape the text, use the 17 :meth:`escape` class method instead. 18 19 >>> Markup("Hello, <em>World</em>!") 20 Markup('Hello, <em>World</em>!') 21 >>> Markup(42) 22 Markup('42') 23 >>> Markup.escape("Hello, <em>World</em>!") 24 Markup('Hello <em>World</em>!') 25 26 This implements the ``__html__()`` interface that some frameworks 27 use. Passing an object that implements ``__html__()`` will wrap the 28 output of that method, marking it safe. 29 30 >>> class Foo: 31 ... def __html__(self): 32 ... return '<a href="/foo">foo</a>' 33 ... 34 >>> Markup(Foo()) 35 Markup('<a href="/foo">foo</a>') 36 37 This is a subclass of :class:`str`. It has the same methods, but 38 escapes their arguments and returns a ``Markup`` instance. 39 40 >>> Markup("<em>%s</em>") % ("foo & bar",) 41 Markup('<em>foo & bar</em>') 42 >>> Markup("<em>Hello</em> ") + "<foo>" 43 Markup('<em>Hello</em> <foo>') 44 """ 45 46 __slots__ = () 47 48 def __new__(cls, base="", encoding=None, errors="strict"): 49 if hasattr(base, "__html__"): 50 base = base.__html__() 51 if encoding is None: 52 return super().__new__(cls, base) 53 return super().__new__(cls, base, encoding, errors) 54 55 def __html__(self): 56 return self 57 58 def __add__(self, other): 59 if isinstance(other, str) or hasattr(other, "__html__"): 60 return self.__class__(super().__add__(self.escape(other))) 61 return NotImplemented 62 63 def __radd__(self, other): 64 if isinstance(other, str) or hasattr(other, "__html__"): 65 return self.escape(other).__add__(self) 66 return NotImplemented 67 68 def __mul__(self, num): 69 if isinstance(num, int): 70 return self.__class__(super().__mul__(num)) 71 return NotImplemented 72 73 __rmul__ = __mul__ 74 75 def __mod__(self, arg): 76 if isinstance(arg, tuple): 77 arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg) 78 else: 79 arg = _MarkupEscapeHelper(arg, self.escape) 80 return self.__class__(super().__mod__(arg)) 81 82 def __repr__(self): 83 return f"{self.__class__.__name__}({super().__repr__()})" 84 85 def join(self, seq): 86 return self.__class__(super().join(map(self.escape, seq))) 87 88 join.__doc__ = str.join.__doc__ 89 90 def split(self, *args, **kwargs): 91 return list(map(self.__class__, super().split(*args, **kwargs))) 92 93 split.__doc__ = str.split.__doc__ 94 95 def rsplit(self, *args, **kwargs): 96 return list(map(self.__class__, super().rsplit(*args, **kwargs))) 97 98 rsplit.__doc__ = str.rsplit.__doc__ 99 100 def splitlines(self, *args, **kwargs): 101 return list(map(self.__class__, super().splitlines(*args, **kwargs))) 102 103 splitlines.__doc__ = str.splitlines.__doc__ 104 105 def unescape(self): 106 """Convert escaped markup back into a text string. This replaces 107 HTML entities with the characters they represent. 108 109 >>> Markup("Main » <em>About</em>").unescape() 110 'Main » <em>About</em>' 111 """ 112 from html import unescape 113 114 return unescape(str(self)) 115 116 def striptags(self): 117 """:meth:`unescape` the markup, remove tags, and normalize 118 whitespace to single spaces. 119 120 >>> Markup("Main »\t<em>About</em>").striptags() 121 'Main » About' 122 """ 123 stripped = " ".join(_striptags_re.sub("", self).split()) 124 return Markup(stripped).unescape() 125 126 @classmethod 127 def escape(cls, s): 128 """Escape a string. Calls :func:`escape` and ensures that for 129 subclasses the correct type is returned. 130 """ 131 rv = escape(s) 132 if rv.__class__ is not cls: 133 return cls(rv) 134 return rv 135 136 def make_simple_escaping_wrapper(name): # noqa: B902 137 orig = getattr(str, name) 138 139 def func(self, *args, **kwargs): 140 args = _escape_argspec(list(args), enumerate(args), self.escape) 141 _escape_argspec(kwargs, kwargs.items(), self.escape) 142 return self.__class__(orig(self, *args, **kwargs)) 143 144 func.__name__ = orig.__name__ 145 func.__doc__ = orig.__doc__ 146 return func 147 148 for method in ( 149 "__getitem__", 150 "capitalize", 151 "title", 152 "lower", 153 "upper", 154 "replace", 155 "ljust", 156 "rjust", 157 "lstrip", 158 "rstrip", 159 "center", 160 "strip", 161 "translate", 162 "expandtabs", 163 "swapcase", 164 "zfill", 165 ): 166 locals()[method] = make_simple_escaping_wrapper(method) 167 168 del method, make_simple_escaping_wrapper 169 170 def partition(self, sep): 171 return tuple(map(self.__class__, super().partition(self.escape(sep)))) 172 173 def rpartition(self, sep): 174 return tuple(map(self.__class__, super().rpartition(self.escape(sep)))) 175 176 def format(self, *args, **kwargs): 177 formatter = EscapeFormatter(self.escape) 178 kwargs = _MagicFormatMapping(args, kwargs) 179 return self.__class__(formatter.vformat(self, args, kwargs)) 180 181 def __html_format__(self, format_spec): 182 if format_spec: 183 raise ValueError("Unsupported format specification for Markup.") 184 return self 185 186 187class _MagicFormatMapping(abc.Mapping): 188 """This class implements a dummy wrapper to fix a bug in the Python 189 standard library for string formatting. 190 191 See http://bugs.python.org/issue13598 for information about why 192 this is necessary. 193 """ 194 195 def __init__(self, args, kwargs): 196 self._args = args 197 self._kwargs = kwargs 198 self._last_index = 0 199 200 def __getitem__(self, key): 201 if key == "": 202 idx = self._last_index 203 self._last_index += 1 204 try: 205 return self._args[idx] 206 except LookupError: 207 pass 208 key = str(idx) 209 return self._kwargs[key] 210 211 def __iter__(self): 212 return iter(self._kwargs) 213 214 def __len__(self): 215 return len(self._kwargs) 216 217 218class EscapeFormatter(string.Formatter): 219 def __init__(self, escape): 220 self.escape = escape 221 222 def format_field(self, value, format_spec): 223 if hasattr(value, "__html_format__"): 224 rv = value.__html_format__(format_spec) 225 elif hasattr(value, "__html__"): 226 if format_spec: 227 raise ValueError( 228 f"Format specifier {format_spec} given, but {type(value)} does not" 229 " define __html_format__. A class that defines __html__ must define" 230 " __html_format__ to work with format specifiers." 231 ) 232 rv = value.__html__() 233 else: 234 # We need to make sure the format spec is str here as 235 # otherwise the wrong callback methods are invoked. 236 rv = string.Formatter.format_field(self, value, str(format_spec)) 237 return str(self.escape(rv)) 238 239 240def _escape_argspec(obj, iterable, escape): 241 """Helper for various string-wrapped functions.""" 242 for key, value in iterable: 243 if isinstance(value, str) or hasattr(value, "__html__"): 244 obj[key] = escape(value) 245 return obj 246 247 248class _MarkupEscapeHelper: 249 """Helper for :meth:`Markup.__mod__`.""" 250 251 def __init__(self, obj, escape): 252 self.obj = obj 253 self.escape = escape 254 255 def __getitem__(self, item): 256 return _MarkupEscapeHelper(self.obj[item], self.escape) 257 258 def __str__(self): 259 return str(self.escape(self.obj)) 260 261 def __repr__(self): 262 return str(self.escape(repr(self.obj))) 263 264 def __int__(self): 265 return int(self.obj) 266 267 def __float__(self): 268 return float(self.obj) 269 270 271# circular import 272try: 273 from ._speedups import escape 274 from ._speedups import escape_silent 275 from ._speedups import soft_str 276 from ._speedups import soft_unicode 277except ImportError: 278 from ._native import escape 279 from ._native import escape_silent # noqa: F401 280 from ._native import soft_str # noqa: F401 281 from ._native import soft_unicode # noqa: F401 282