1from __future__ import annotations 2 3import re 4from functools import lru_cache 5from itertools import chain, count 6from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple 7 8try: 9 from lxml import etree 10except ImportError: 11 # lxml is required for subsetting SVG, but we prefer to delay the import error 12 # until subset_glyphs() is called (i.e. if font to subset has an 'SVG ' table) 13 etree = None 14 15from fontTools import ttLib 16from fontTools.subset.util import _add_method 17from fontTools.ttLib.tables.S_V_G_ import SVGDocument 18 19 20__all__ = ["subset_glyphs"] 21 22 23GID_RE = re.compile(r"^glyph(\d+)$") 24 25NAMESPACES = { 26 "svg": "http://www.w3.org/2000/svg", 27 "xlink": "http://www.w3.org/1999/xlink", 28} 29XLINK_HREF = f'{{{NAMESPACES["xlink"]}}}href' 30 31 32# TODO(antrotype): Replace with functools.cache once we are 3.9+ 33@lru_cache(maxsize=None) 34def xpath(path): 35 # compile XPath upfront, caching result to reuse on multiple elements 36 return etree.XPath(path, namespaces=NAMESPACES) 37 38 39def group_elements_by_id(tree: etree.Element) -> Dict[str, etree.Element]: 40 # select all svg elements with 'id' attribute no matter where they are 41 # including the root element itself: 42 # https://github.com/fonttools/fonttools/issues/2548 43 return {el.attrib["id"]: el for el in xpath("//svg:*[@id]")(tree)} 44 45 46def parse_css_declarations(style_attr: str) -> Dict[str, str]: 47 # https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/style 48 # https://developer.mozilla.org/en-US/docs/Web/CSS/Syntax#css_declarations 49 result = {} 50 for declaration in style_attr.split(";"): 51 if declaration.count(":") == 1: 52 property_name, value = declaration.split(":") 53 property_name = property_name.strip() 54 result[property_name] = value.strip() 55 elif declaration.strip(): 56 raise ValueError(f"Invalid CSS declaration syntax: {declaration}") 57 return result 58 59 60def iter_referenced_ids(tree: etree.Element) -> Iterator[str]: 61 # Yield all the ids that can be reached via references from this element tree. 62 # We currently support xlink:href (as used by <use> and gradient templates), 63 # and local url(#...) links found in fill or clip-path attributes 64 # TODO(anthrotype): Check we aren't missing other supported kinds of reference 65 find_svg_elements_with_references = xpath( 66 ".//svg:*[ " 67 "starts-with(@xlink:href, '#') " 68 "or starts-with(@fill, 'url(#') " 69 "or starts-with(@clip-path, 'url(#') " 70 "or contains(@style, ':url(#') " 71 "]", 72 ) 73 for el in chain([tree], find_svg_elements_with_references(tree)): 74 ref_id = href_local_target(el) 75 if ref_id is not None: 76 yield ref_id 77 78 attrs = el.attrib 79 if "style" in attrs: 80 attrs = {**dict(attrs), **parse_css_declarations(el.attrib["style"])} 81 for attr in ("fill", "clip-path"): 82 if attr in attrs: 83 value = attrs[attr] 84 if value.startswith("url(#") and value.endswith(")"): 85 ref_id = value[5:-1] 86 assert ref_id 87 yield ref_id 88 89 90def closure_element_ids( 91 elements: Dict[str, etree.Element], element_ids: Set[str] 92) -> None: 93 # Expand the initial subset of element ids to include ids that can be reached 94 # via references from the initial set. 95 unvisited = element_ids 96 while unvisited: 97 referenced: Set[str] = set() 98 for el_id in unvisited: 99 if el_id not in elements: 100 # ignore dangling reference; not our job to validate svg 101 continue 102 referenced.update(iter_referenced_ids(elements[el_id])) 103 referenced -= element_ids 104 element_ids.update(referenced) 105 unvisited = referenced 106 107 108def subset_elements(el: etree.Element, retained_ids: Set[str]) -> bool: 109 # Keep elements if their id is in the subset, or any of their children's id is. 110 # Drop elements whose id is not in the subset, and either have no children, 111 # or all their children are being dropped. 112 if el.attrib.get("id") in retained_ids: 113 # if id is in the set, don't recurse; keep whole subtree 114 return True 115 # recursively subset all the children; we use a list comprehension instead 116 # of a parentheses-less generator expression because we don't want any() to 117 # short-circuit, as our function has a side effect of dropping empty elements. 118 if any([subset_elements(e, retained_ids) for e in el]): 119 return True 120 assert len(el) == 0 121 parent = el.getparent() 122 if parent is not None: 123 parent.remove(el) 124 return False 125 126 127def remap_glyph_ids( 128 svg: etree.Element, glyph_index_map: Dict[int, int] 129) -> Dict[str, str]: 130 # Given {old_gid: new_gid} map, rename all elements containing id="glyph{gid}" 131 # special attributes 132 elements = group_elements_by_id(svg) 133 id_map = {} 134 for el_id, el in elements.items(): 135 m = GID_RE.match(el_id) 136 if not m: 137 continue 138 old_index = int(m.group(1)) 139 new_index = glyph_index_map.get(old_index) 140 if new_index is not None: 141 if old_index == new_index: 142 continue 143 new_id = f"glyph{new_index}" 144 else: 145 # If the old index is missing, the element correspond to a glyph that was 146 # excluded from the font's subset. 147 # We rename it to avoid clashes with the new GIDs or other element ids. 148 new_id = f".{el_id}" 149 n = count(1) 150 while new_id in elements: 151 new_id = f"{new_id}.{next(n)}" 152 153 id_map[el_id] = new_id 154 el.attrib["id"] = new_id 155 156 return id_map 157 158 159def href_local_target(el: etree.Element) -> Optional[str]: 160 if XLINK_HREF in el.attrib: 161 href = el.attrib[XLINK_HREF] 162 if href.startswith("#") and len(href) > 1: 163 return href[1:] # drop the leading # 164 return None 165 166 167def update_glyph_href_links(svg: etree.Element, id_map: Dict[str, str]) -> None: 168 # update all xlink:href="#glyph..." attributes to point to the new glyph ids 169 for el in xpath(".//svg:*[starts-with(@xlink:href, '#glyph')]")(svg): 170 old_id = href_local_target(el) 171 assert old_id is not None 172 if old_id in id_map: 173 new_id = id_map[old_id] 174 el.attrib[XLINK_HREF] = f"#{new_id}" 175 176 177def ranges(ints: Iterable[int]) -> Iterator[Tuple[int, int]]: 178 # Yield sorted, non-overlapping (min, max) ranges of consecutive integers 179 sorted_ints = iter(sorted(set(ints))) 180 try: 181 start = end = next(sorted_ints) 182 except StopIteration: 183 return 184 for v in sorted_ints: 185 if v - 1 == end: 186 end = v 187 else: 188 yield (start, end) 189 start = end = v 190 yield (start, end) 191 192 193@_add_method(ttLib.getTableClass("SVG ")) 194def subset_glyphs(self, s) -> bool: 195 if etree is None: 196 raise ImportError("No module named 'lxml', required to subset SVG") 197 198 # glyph names (before subsetting) 199 glyph_order: List[str] = s.orig_glyph_order 200 # map from glyph names to original glyph indices 201 rev_orig_glyph_map: Dict[str, int] = s.reverseOrigGlyphMap 202 # map from original to new glyph indices (after subsetting) 203 glyph_index_map: Dict[int, int] = s.glyph_index_map 204 205 new_docs: List[SVGDocument] = [] 206 for doc in self.docList: 207 glyphs = { 208 glyph_order[i] for i in range(doc.startGlyphID, doc.endGlyphID + 1) 209 }.intersection(s.glyphs) 210 if not glyphs: 211 # no intersection: we can drop the whole record 212 continue 213 214 svg = etree.fromstring( 215 # encode because fromstring dislikes xml encoding decl if input is str. 216 # SVG xml encoding must be utf-8 as per OT spec. 217 doc.data.encode("utf-8"), 218 parser=etree.XMLParser( 219 # Disable libxml2 security restrictions to support very deep trees. 220 # Without this we would get an error like this: 221 # `lxml.etree.XMLSyntaxError: internal error: Huge input lookup` 222 # when parsing big fonts e.g. noto-emoji-picosvg.ttf. 223 huge_tree=True, 224 # ignore blank text as it's not meaningful in OT-SVG; it also prevents 225 # dangling tail text after removing an element when pretty_print=True 226 remove_blank_text=True, 227 # don't replace entities; we don't expect any in OT-SVG and they may 228 # be abused for XXE attacks 229 resolve_entities=False, 230 ), 231 ) 232 233 elements = group_elements_by_id(svg) 234 gids = {rev_orig_glyph_map[g] for g in glyphs} 235 element_ids = {f"glyph{i}" for i in gids} 236 closure_element_ids(elements, element_ids) 237 238 if not subset_elements(svg, element_ids): 239 continue 240 241 if not s.options.retain_gids: 242 id_map = remap_glyph_ids(svg, glyph_index_map) 243 update_glyph_href_links(svg, id_map) 244 245 new_doc = etree.tostring(svg, pretty_print=s.options.pretty_svg).decode("utf-8") 246 247 new_gids = (glyph_index_map[i] for i in gids) 248 for start, end in ranges(new_gids): 249 new_docs.append(SVGDocument(new_doc, start, end, doc.compressed)) 250 251 self.docList = new_docs 252 253 return bool(self.docList) 254