1*e1fe3e4aSElliott Hughesfrom __future__ import annotations 2*e1fe3e4aSElliott Hughes 3*e1fe3e4aSElliott Hughesimport re 4*e1fe3e4aSElliott Hughesfrom functools import lru_cache 5*e1fe3e4aSElliott Hughesfrom itertools import chain, count 6*e1fe3e4aSElliott Hughesfrom typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple 7*e1fe3e4aSElliott Hughes 8*e1fe3e4aSElliott Hughestry: 9*e1fe3e4aSElliott Hughes from lxml import etree 10*e1fe3e4aSElliott Hughesexcept ImportError: 11*e1fe3e4aSElliott Hughes # lxml is required for subsetting SVG, but we prefer to delay the import error 12*e1fe3e4aSElliott Hughes # until subset_glyphs() is called (i.e. if font to subset has an 'SVG ' table) 13*e1fe3e4aSElliott Hughes etree = None 14*e1fe3e4aSElliott Hughes 15*e1fe3e4aSElliott Hughesfrom fontTools import ttLib 16*e1fe3e4aSElliott Hughesfrom fontTools.subset.util import _add_method 17*e1fe3e4aSElliott Hughesfrom fontTools.ttLib.tables.S_V_G_ import SVGDocument 18*e1fe3e4aSElliott Hughes 19*e1fe3e4aSElliott Hughes 20*e1fe3e4aSElliott Hughes__all__ = ["subset_glyphs"] 21*e1fe3e4aSElliott Hughes 22*e1fe3e4aSElliott Hughes 23*e1fe3e4aSElliott HughesGID_RE = re.compile(r"^glyph(\d+)$") 24*e1fe3e4aSElliott Hughes 25*e1fe3e4aSElliott HughesNAMESPACES = { 26*e1fe3e4aSElliott Hughes "svg": "http://www.w3.org/2000/svg", 27*e1fe3e4aSElliott Hughes "xlink": "http://www.w3.org/1999/xlink", 28*e1fe3e4aSElliott Hughes} 29*e1fe3e4aSElliott HughesXLINK_HREF = f'{{{NAMESPACES["xlink"]}}}href' 30*e1fe3e4aSElliott Hughes 31*e1fe3e4aSElliott Hughes 32*e1fe3e4aSElliott Hughes# TODO(antrotype): Replace with functools.cache once we are 3.9+ 33*e1fe3e4aSElliott Hughes@lru_cache(maxsize=None) 34*e1fe3e4aSElliott Hughesdef xpath(path): 35*e1fe3e4aSElliott Hughes # compile XPath upfront, caching result to reuse on multiple elements 36*e1fe3e4aSElliott Hughes return etree.XPath(path, namespaces=NAMESPACES) 37*e1fe3e4aSElliott Hughes 38*e1fe3e4aSElliott Hughes 39*e1fe3e4aSElliott Hughesdef group_elements_by_id(tree: etree.Element) -> Dict[str, etree.Element]: 40*e1fe3e4aSElliott Hughes # select all svg elements with 'id' attribute no matter where they are 41*e1fe3e4aSElliott Hughes # including the root element itself: 42*e1fe3e4aSElliott Hughes # https://github.com/fonttools/fonttools/issues/2548 43*e1fe3e4aSElliott Hughes return {el.attrib["id"]: el for el in xpath("//svg:*[@id]")(tree)} 44*e1fe3e4aSElliott Hughes 45*e1fe3e4aSElliott Hughes 46*e1fe3e4aSElliott Hughesdef parse_css_declarations(style_attr: str) -> Dict[str, str]: 47*e1fe3e4aSElliott Hughes # https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/style 48*e1fe3e4aSElliott Hughes # https://developer.mozilla.org/en-US/docs/Web/CSS/Syntax#css_declarations 49*e1fe3e4aSElliott Hughes result = {} 50*e1fe3e4aSElliott Hughes for declaration in style_attr.split(";"): 51*e1fe3e4aSElliott Hughes if declaration.count(":") == 1: 52*e1fe3e4aSElliott Hughes property_name, value = declaration.split(":") 53*e1fe3e4aSElliott Hughes property_name = property_name.strip() 54*e1fe3e4aSElliott Hughes result[property_name] = value.strip() 55*e1fe3e4aSElliott Hughes elif declaration.strip(): 56*e1fe3e4aSElliott Hughes raise ValueError(f"Invalid CSS declaration syntax: {declaration}") 57*e1fe3e4aSElliott Hughes return result 58*e1fe3e4aSElliott Hughes 59*e1fe3e4aSElliott Hughes 60*e1fe3e4aSElliott Hughesdef iter_referenced_ids(tree: etree.Element) -> Iterator[str]: 61*e1fe3e4aSElliott Hughes # Yield all the ids that can be reached via references from this element tree. 62*e1fe3e4aSElliott Hughes # We currently support xlink:href (as used by <use> and gradient templates), 63*e1fe3e4aSElliott Hughes # and local url(#...) links found in fill or clip-path attributes 64*e1fe3e4aSElliott Hughes # TODO(anthrotype): Check we aren't missing other supported kinds of reference 65*e1fe3e4aSElliott Hughes find_svg_elements_with_references = xpath( 66*e1fe3e4aSElliott Hughes ".//svg:*[ " 67*e1fe3e4aSElliott Hughes "starts-with(@xlink:href, '#') " 68*e1fe3e4aSElliott Hughes "or starts-with(@fill, 'url(#') " 69*e1fe3e4aSElliott Hughes "or starts-with(@clip-path, 'url(#') " 70*e1fe3e4aSElliott Hughes "or contains(@style, ':url(#') " 71*e1fe3e4aSElliott Hughes "]", 72*e1fe3e4aSElliott Hughes ) 73*e1fe3e4aSElliott Hughes for el in chain([tree], find_svg_elements_with_references(tree)): 74*e1fe3e4aSElliott Hughes ref_id = href_local_target(el) 75*e1fe3e4aSElliott Hughes if ref_id is not None: 76*e1fe3e4aSElliott Hughes yield ref_id 77*e1fe3e4aSElliott Hughes 78*e1fe3e4aSElliott Hughes attrs = el.attrib 79*e1fe3e4aSElliott Hughes if "style" in attrs: 80*e1fe3e4aSElliott Hughes attrs = {**dict(attrs), **parse_css_declarations(el.attrib["style"])} 81*e1fe3e4aSElliott Hughes for attr in ("fill", "clip-path"): 82*e1fe3e4aSElliott Hughes if attr in attrs: 83*e1fe3e4aSElliott Hughes value = attrs[attr] 84*e1fe3e4aSElliott Hughes if value.startswith("url(#") and value.endswith(")"): 85*e1fe3e4aSElliott Hughes ref_id = value[5:-1] 86*e1fe3e4aSElliott Hughes assert ref_id 87*e1fe3e4aSElliott Hughes yield ref_id 88*e1fe3e4aSElliott Hughes 89*e1fe3e4aSElliott Hughes 90*e1fe3e4aSElliott Hughesdef closure_element_ids( 91*e1fe3e4aSElliott Hughes elements: Dict[str, etree.Element], element_ids: Set[str] 92*e1fe3e4aSElliott Hughes) -> None: 93*e1fe3e4aSElliott Hughes # Expand the initial subset of element ids to include ids that can be reached 94*e1fe3e4aSElliott Hughes # via references from the initial set. 95*e1fe3e4aSElliott Hughes unvisited = element_ids 96*e1fe3e4aSElliott Hughes while unvisited: 97*e1fe3e4aSElliott Hughes referenced: Set[str] = set() 98*e1fe3e4aSElliott Hughes for el_id in unvisited: 99*e1fe3e4aSElliott Hughes if el_id not in elements: 100*e1fe3e4aSElliott Hughes # ignore dangling reference; not our job to validate svg 101*e1fe3e4aSElliott Hughes continue 102*e1fe3e4aSElliott Hughes referenced.update(iter_referenced_ids(elements[el_id])) 103*e1fe3e4aSElliott Hughes referenced -= element_ids 104*e1fe3e4aSElliott Hughes element_ids.update(referenced) 105*e1fe3e4aSElliott Hughes unvisited = referenced 106*e1fe3e4aSElliott Hughes 107*e1fe3e4aSElliott Hughes 108*e1fe3e4aSElliott Hughesdef subset_elements(el: etree.Element, retained_ids: Set[str]) -> bool: 109*e1fe3e4aSElliott Hughes # Keep elements if their id is in the subset, or any of their children's id is. 110*e1fe3e4aSElliott Hughes # Drop elements whose id is not in the subset, and either have no children, 111*e1fe3e4aSElliott Hughes # or all their children are being dropped. 112*e1fe3e4aSElliott Hughes if el.attrib.get("id") in retained_ids: 113*e1fe3e4aSElliott Hughes # if id is in the set, don't recurse; keep whole subtree 114*e1fe3e4aSElliott Hughes return True 115*e1fe3e4aSElliott Hughes # recursively subset all the children; we use a list comprehension instead 116*e1fe3e4aSElliott Hughes # of a parentheses-less generator expression because we don't want any() to 117*e1fe3e4aSElliott Hughes # short-circuit, as our function has a side effect of dropping empty elements. 118*e1fe3e4aSElliott Hughes if any([subset_elements(e, retained_ids) for e in el]): 119*e1fe3e4aSElliott Hughes return True 120*e1fe3e4aSElliott Hughes assert len(el) == 0 121*e1fe3e4aSElliott Hughes parent = el.getparent() 122*e1fe3e4aSElliott Hughes if parent is not None: 123*e1fe3e4aSElliott Hughes parent.remove(el) 124*e1fe3e4aSElliott Hughes return False 125*e1fe3e4aSElliott Hughes 126*e1fe3e4aSElliott Hughes 127*e1fe3e4aSElliott Hughesdef remap_glyph_ids( 128*e1fe3e4aSElliott Hughes svg: etree.Element, glyph_index_map: Dict[int, int] 129*e1fe3e4aSElliott Hughes) -> Dict[str, str]: 130*e1fe3e4aSElliott Hughes # Given {old_gid: new_gid} map, rename all elements containing id="glyph{gid}" 131*e1fe3e4aSElliott Hughes # special attributes 132*e1fe3e4aSElliott Hughes elements = group_elements_by_id(svg) 133*e1fe3e4aSElliott Hughes id_map = {} 134*e1fe3e4aSElliott Hughes for el_id, el in elements.items(): 135*e1fe3e4aSElliott Hughes m = GID_RE.match(el_id) 136*e1fe3e4aSElliott Hughes if not m: 137*e1fe3e4aSElliott Hughes continue 138*e1fe3e4aSElliott Hughes old_index = int(m.group(1)) 139*e1fe3e4aSElliott Hughes new_index = glyph_index_map.get(old_index) 140*e1fe3e4aSElliott Hughes if new_index is not None: 141*e1fe3e4aSElliott Hughes if old_index == new_index: 142*e1fe3e4aSElliott Hughes continue 143*e1fe3e4aSElliott Hughes new_id = f"glyph{new_index}" 144*e1fe3e4aSElliott Hughes else: 145*e1fe3e4aSElliott Hughes # If the old index is missing, the element correspond to a glyph that was 146*e1fe3e4aSElliott Hughes # excluded from the font's subset. 147*e1fe3e4aSElliott Hughes # We rename it to avoid clashes with the new GIDs or other element ids. 148*e1fe3e4aSElliott Hughes new_id = f".{el_id}" 149*e1fe3e4aSElliott Hughes n = count(1) 150*e1fe3e4aSElliott Hughes while new_id in elements: 151*e1fe3e4aSElliott Hughes new_id = f"{new_id}.{next(n)}" 152*e1fe3e4aSElliott Hughes 153*e1fe3e4aSElliott Hughes id_map[el_id] = new_id 154*e1fe3e4aSElliott Hughes el.attrib["id"] = new_id 155*e1fe3e4aSElliott Hughes 156*e1fe3e4aSElliott Hughes return id_map 157*e1fe3e4aSElliott Hughes 158*e1fe3e4aSElliott Hughes 159*e1fe3e4aSElliott Hughesdef href_local_target(el: etree.Element) -> Optional[str]: 160*e1fe3e4aSElliott Hughes if XLINK_HREF in el.attrib: 161*e1fe3e4aSElliott Hughes href = el.attrib[XLINK_HREF] 162*e1fe3e4aSElliott Hughes if href.startswith("#") and len(href) > 1: 163*e1fe3e4aSElliott Hughes return href[1:] # drop the leading # 164*e1fe3e4aSElliott Hughes return None 165*e1fe3e4aSElliott Hughes 166*e1fe3e4aSElliott Hughes 167*e1fe3e4aSElliott Hughesdef update_glyph_href_links(svg: etree.Element, id_map: Dict[str, str]) -> None: 168*e1fe3e4aSElliott Hughes # update all xlink:href="#glyph..." attributes to point to the new glyph ids 169*e1fe3e4aSElliott Hughes for el in xpath(".//svg:*[starts-with(@xlink:href, '#glyph')]")(svg): 170*e1fe3e4aSElliott Hughes old_id = href_local_target(el) 171*e1fe3e4aSElliott Hughes assert old_id is not None 172*e1fe3e4aSElliott Hughes if old_id in id_map: 173*e1fe3e4aSElliott Hughes new_id = id_map[old_id] 174*e1fe3e4aSElliott Hughes el.attrib[XLINK_HREF] = f"#{new_id}" 175*e1fe3e4aSElliott Hughes 176*e1fe3e4aSElliott Hughes 177*e1fe3e4aSElliott Hughesdef ranges(ints: Iterable[int]) -> Iterator[Tuple[int, int]]: 178*e1fe3e4aSElliott Hughes # Yield sorted, non-overlapping (min, max) ranges of consecutive integers 179*e1fe3e4aSElliott Hughes sorted_ints = iter(sorted(set(ints))) 180*e1fe3e4aSElliott Hughes try: 181*e1fe3e4aSElliott Hughes start = end = next(sorted_ints) 182*e1fe3e4aSElliott Hughes except StopIteration: 183*e1fe3e4aSElliott Hughes return 184*e1fe3e4aSElliott Hughes for v in sorted_ints: 185*e1fe3e4aSElliott Hughes if v - 1 == end: 186*e1fe3e4aSElliott Hughes end = v 187*e1fe3e4aSElliott Hughes else: 188*e1fe3e4aSElliott Hughes yield (start, end) 189*e1fe3e4aSElliott Hughes start = end = v 190*e1fe3e4aSElliott Hughes yield (start, end) 191*e1fe3e4aSElliott Hughes 192*e1fe3e4aSElliott Hughes 193*e1fe3e4aSElliott Hughes@_add_method(ttLib.getTableClass("SVG ")) 194*e1fe3e4aSElliott Hughesdef subset_glyphs(self, s) -> bool: 195*e1fe3e4aSElliott Hughes if etree is None: 196*e1fe3e4aSElliott Hughes raise ImportError("No module named 'lxml', required to subset SVG") 197*e1fe3e4aSElliott Hughes 198*e1fe3e4aSElliott Hughes # glyph names (before subsetting) 199*e1fe3e4aSElliott Hughes glyph_order: List[str] = s.orig_glyph_order 200*e1fe3e4aSElliott Hughes # map from glyph names to original glyph indices 201*e1fe3e4aSElliott Hughes rev_orig_glyph_map: Dict[str, int] = s.reverseOrigGlyphMap 202*e1fe3e4aSElliott Hughes # map from original to new glyph indices (after subsetting) 203*e1fe3e4aSElliott Hughes glyph_index_map: Dict[int, int] = s.glyph_index_map 204*e1fe3e4aSElliott Hughes 205*e1fe3e4aSElliott Hughes new_docs: List[SVGDocument] = [] 206*e1fe3e4aSElliott Hughes for doc in self.docList: 207*e1fe3e4aSElliott Hughes glyphs = { 208*e1fe3e4aSElliott Hughes glyph_order[i] for i in range(doc.startGlyphID, doc.endGlyphID + 1) 209*e1fe3e4aSElliott Hughes }.intersection(s.glyphs) 210*e1fe3e4aSElliott Hughes if not glyphs: 211*e1fe3e4aSElliott Hughes # no intersection: we can drop the whole record 212*e1fe3e4aSElliott Hughes continue 213*e1fe3e4aSElliott Hughes 214*e1fe3e4aSElliott Hughes svg = etree.fromstring( 215*e1fe3e4aSElliott Hughes # encode because fromstring dislikes xml encoding decl if input is str. 216*e1fe3e4aSElliott Hughes # SVG xml encoding must be utf-8 as per OT spec. 217*e1fe3e4aSElliott Hughes doc.data.encode("utf-8"), 218*e1fe3e4aSElliott Hughes parser=etree.XMLParser( 219*e1fe3e4aSElliott Hughes # Disable libxml2 security restrictions to support very deep trees. 220*e1fe3e4aSElliott Hughes # Without this we would get an error like this: 221*e1fe3e4aSElliott Hughes # `lxml.etree.XMLSyntaxError: internal error: Huge input lookup` 222*e1fe3e4aSElliott Hughes # when parsing big fonts e.g. noto-emoji-picosvg.ttf. 223*e1fe3e4aSElliott Hughes huge_tree=True, 224*e1fe3e4aSElliott Hughes # ignore blank text as it's not meaningful in OT-SVG; it also prevents 225*e1fe3e4aSElliott Hughes # dangling tail text after removing an element when pretty_print=True 226*e1fe3e4aSElliott Hughes remove_blank_text=True, 227*e1fe3e4aSElliott Hughes # don't replace entities; we don't expect any in OT-SVG and they may 228*e1fe3e4aSElliott Hughes # be abused for XXE attacks 229*e1fe3e4aSElliott Hughes resolve_entities=False, 230*e1fe3e4aSElliott Hughes ), 231*e1fe3e4aSElliott Hughes ) 232*e1fe3e4aSElliott Hughes 233*e1fe3e4aSElliott Hughes elements = group_elements_by_id(svg) 234*e1fe3e4aSElliott Hughes gids = {rev_orig_glyph_map[g] for g in glyphs} 235*e1fe3e4aSElliott Hughes element_ids = {f"glyph{i}" for i in gids} 236*e1fe3e4aSElliott Hughes closure_element_ids(elements, element_ids) 237*e1fe3e4aSElliott Hughes 238*e1fe3e4aSElliott Hughes if not subset_elements(svg, element_ids): 239*e1fe3e4aSElliott Hughes continue 240*e1fe3e4aSElliott Hughes 241*e1fe3e4aSElliott Hughes if not s.options.retain_gids: 242*e1fe3e4aSElliott Hughes id_map = remap_glyph_ids(svg, glyph_index_map) 243*e1fe3e4aSElliott Hughes update_glyph_href_links(svg, id_map) 244*e1fe3e4aSElliott Hughes 245*e1fe3e4aSElliott Hughes new_doc = etree.tostring(svg, pretty_print=s.options.pretty_svg).decode("utf-8") 246*e1fe3e4aSElliott Hughes 247*e1fe3e4aSElliott Hughes new_gids = (glyph_index_map[i] for i in gids) 248*e1fe3e4aSElliott Hughes for start, end in ranges(new_gids): 249*e1fe3e4aSElliott Hughes new_docs.append(SVGDocument(new_doc, start, end, doc.compressed)) 250*e1fe3e4aSElliott Hughes 251*e1fe3e4aSElliott Hughes self.docList = new_docs 252*e1fe3e4aSElliott Hughes 253*e1fe3e4aSElliott Hughes return bool(self.docList) 254