xref: /aosp_15_r20/external/fonttools/Lib/fontTools/subset/svg.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1*e1fe3e4aSElliott Hughesfrom __future__ import annotations
2*e1fe3e4aSElliott Hughes
3*e1fe3e4aSElliott Hughesimport re
4*e1fe3e4aSElliott Hughesfrom functools import lru_cache
5*e1fe3e4aSElliott Hughesfrom itertools import chain, count
6*e1fe3e4aSElliott Hughesfrom typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple
7*e1fe3e4aSElliott Hughes
8*e1fe3e4aSElliott Hughestry:
9*e1fe3e4aSElliott Hughes    from lxml import etree
10*e1fe3e4aSElliott Hughesexcept ImportError:
11*e1fe3e4aSElliott Hughes    # lxml is required for subsetting SVG, but we prefer to delay the import error
12*e1fe3e4aSElliott Hughes    # until subset_glyphs() is called (i.e. if font to subset has an 'SVG ' table)
13*e1fe3e4aSElliott Hughes    etree = None
14*e1fe3e4aSElliott Hughes
15*e1fe3e4aSElliott Hughesfrom fontTools import ttLib
16*e1fe3e4aSElliott Hughesfrom fontTools.subset.util import _add_method
17*e1fe3e4aSElliott Hughesfrom fontTools.ttLib.tables.S_V_G_ import SVGDocument
18*e1fe3e4aSElliott Hughes
19*e1fe3e4aSElliott Hughes
20*e1fe3e4aSElliott Hughes__all__ = ["subset_glyphs"]
21*e1fe3e4aSElliott Hughes
22*e1fe3e4aSElliott Hughes
23*e1fe3e4aSElliott HughesGID_RE = re.compile(r"^glyph(\d+)$")
24*e1fe3e4aSElliott Hughes
25*e1fe3e4aSElliott HughesNAMESPACES = {
26*e1fe3e4aSElliott Hughes    "svg": "http://www.w3.org/2000/svg",
27*e1fe3e4aSElliott Hughes    "xlink": "http://www.w3.org/1999/xlink",
28*e1fe3e4aSElliott Hughes}
29*e1fe3e4aSElliott HughesXLINK_HREF = f'{{{NAMESPACES["xlink"]}}}href'
30*e1fe3e4aSElliott Hughes
31*e1fe3e4aSElliott Hughes
32*e1fe3e4aSElliott Hughes# TODO(antrotype): Replace with functools.cache once we are 3.9+
33*e1fe3e4aSElliott Hughes@lru_cache(maxsize=None)
34*e1fe3e4aSElliott Hughesdef xpath(path):
35*e1fe3e4aSElliott Hughes    # compile XPath upfront, caching result to reuse on multiple elements
36*e1fe3e4aSElliott Hughes    return etree.XPath(path, namespaces=NAMESPACES)
37*e1fe3e4aSElliott Hughes
38*e1fe3e4aSElliott Hughes
39*e1fe3e4aSElliott Hughesdef group_elements_by_id(tree: etree.Element) -> Dict[str, etree.Element]:
40*e1fe3e4aSElliott Hughes    # select all svg elements with 'id' attribute no matter where they are
41*e1fe3e4aSElliott Hughes    # including the root element itself:
42*e1fe3e4aSElliott Hughes    # https://github.com/fonttools/fonttools/issues/2548
43*e1fe3e4aSElliott Hughes    return {el.attrib["id"]: el for el in xpath("//svg:*[@id]")(tree)}
44*e1fe3e4aSElliott Hughes
45*e1fe3e4aSElliott Hughes
46*e1fe3e4aSElliott Hughesdef parse_css_declarations(style_attr: str) -> Dict[str, str]:
47*e1fe3e4aSElliott Hughes    # https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/style
48*e1fe3e4aSElliott Hughes    # https://developer.mozilla.org/en-US/docs/Web/CSS/Syntax#css_declarations
49*e1fe3e4aSElliott Hughes    result = {}
50*e1fe3e4aSElliott Hughes    for declaration in style_attr.split(";"):
51*e1fe3e4aSElliott Hughes        if declaration.count(":") == 1:
52*e1fe3e4aSElliott Hughes            property_name, value = declaration.split(":")
53*e1fe3e4aSElliott Hughes            property_name = property_name.strip()
54*e1fe3e4aSElliott Hughes            result[property_name] = value.strip()
55*e1fe3e4aSElliott Hughes        elif declaration.strip():
56*e1fe3e4aSElliott Hughes            raise ValueError(f"Invalid CSS declaration syntax: {declaration}")
57*e1fe3e4aSElliott Hughes    return result
58*e1fe3e4aSElliott Hughes
59*e1fe3e4aSElliott Hughes
60*e1fe3e4aSElliott Hughesdef iter_referenced_ids(tree: etree.Element) -> Iterator[str]:
61*e1fe3e4aSElliott Hughes    # Yield all the ids that can be reached via references from this element tree.
62*e1fe3e4aSElliott Hughes    # We currently support xlink:href (as used by <use> and gradient templates),
63*e1fe3e4aSElliott Hughes    # and local url(#...) links found in fill or clip-path attributes
64*e1fe3e4aSElliott Hughes    # TODO(anthrotype): Check we aren't missing other supported kinds of reference
65*e1fe3e4aSElliott Hughes    find_svg_elements_with_references = xpath(
66*e1fe3e4aSElliott Hughes        ".//svg:*[ "
67*e1fe3e4aSElliott Hughes        "starts-with(@xlink:href, '#') "
68*e1fe3e4aSElliott Hughes        "or starts-with(@fill, 'url(#') "
69*e1fe3e4aSElliott Hughes        "or starts-with(@clip-path, 'url(#') "
70*e1fe3e4aSElliott Hughes        "or contains(@style, ':url(#') "
71*e1fe3e4aSElliott Hughes        "]",
72*e1fe3e4aSElliott Hughes    )
73*e1fe3e4aSElliott Hughes    for el in chain([tree], find_svg_elements_with_references(tree)):
74*e1fe3e4aSElliott Hughes        ref_id = href_local_target(el)
75*e1fe3e4aSElliott Hughes        if ref_id is not None:
76*e1fe3e4aSElliott Hughes            yield ref_id
77*e1fe3e4aSElliott Hughes
78*e1fe3e4aSElliott Hughes        attrs = el.attrib
79*e1fe3e4aSElliott Hughes        if "style" in attrs:
80*e1fe3e4aSElliott Hughes            attrs = {**dict(attrs), **parse_css_declarations(el.attrib["style"])}
81*e1fe3e4aSElliott Hughes        for attr in ("fill", "clip-path"):
82*e1fe3e4aSElliott Hughes            if attr in attrs:
83*e1fe3e4aSElliott Hughes                value = attrs[attr]
84*e1fe3e4aSElliott Hughes                if value.startswith("url(#") and value.endswith(")"):
85*e1fe3e4aSElliott Hughes                    ref_id = value[5:-1]
86*e1fe3e4aSElliott Hughes                    assert ref_id
87*e1fe3e4aSElliott Hughes                    yield ref_id
88*e1fe3e4aSElliott Hughes
89*e1fe3e4aSElliott Hughes
90*e1fe3e4aSElliott Hughesdef closure_element_ids(
91*e1fe3e4aSElliott Hughes    elements: Dict[str, etree.Element], element_ids: Set[str]
92*e1fe3e4aSElliott Hughes) -> None:
93*e1fe3e4aSElliott Hughes    # Expand the initial subset of element ids to include ids that can be reached
94*e1fe3e4aSElliott Hughes    # via references from the initial set.
95*e1fe3e4aSElliott Hughes    unvisited = element_ids
96*e1fe3e4aSElliott Hughes    while unvisited:
97*e1fe3e4aSElliott Hughes        referenced: Set[str] = set()
98*e1fe3e4aSElliott Hughes        for el_id in unvisited:
99*e1fe3e4aSElliott Hughes            if el_id not in elements:
100*e1fe3e4aSElliott Hughes                # ignore dangling reference; not our job to validate svg
101*e1fe3e4aSElliott Hughes                continue
102*e1fe3e4aSElliott Hughes            referenced.update(iter_referenced_ids(elements[el_id]))
103*e1fe3e4aSElliott Hughes        referenced -= element_ids
104*e1fe3e4aSElliott Hughes        element_ids.update(referenced)
105*e1fe3e4aSElliott Hughes        unvisited = referenced
106*e1fe3e4aSElliott Hughes
107*e1fe3e4aSElliott Hughes
108*e1fe3e4aSElliott Hughesdef subset_elements(el: etree.Element, retained_ids: Set[str]) -> bool:
109*e1fe3e4aSElliott Hughes    # Keep elements if their id is in the subset, or any of their children's id is.
110*e1fe3e4aSElliott Hughes    # Drop elements whose id is not in the subset, and either have no children,
111*e1fe3e4aSElliott Hughes    # or all their children are being dropped.
112*e1fe3e4aSElliott Hughes    if el.attrib.get("id") in retained_ids:
113*e1fe3e4aSElliott Hughes        # if id is in the set, don't recurse; keep whole subtree
114*e1fe3e4aSElliott Hughes        return True
115*e1fe3e4aSElliott Hughes    # recursively subset all the children; we use a list comprehension instead
116*e1fe3e4aSElliott Hughes    # of a parentheses-less generator expression because we don't want any() to
117*e1fe3e4aSElliott Hughes    # short-circuit, as our function has a side effect of dropping empty elements.
118*e1fe3e4aSElliott Hughes    if any([subset_elements(e, retained_ids) for e in el]):
119*e1fe3e4aSElliott Hughes        return True
120*e1fe3e4aSElliott Hughes    assert len(el) == 0
121*e1fe3e4aSElliott Hughes    parent = el.getparent()
122*e1fe3e4aSElliott Hughes    if parent is not None:
123*e1fe3e4aSElliott Hughes        parent.remove(el)
124*e1fe3e4aSElliott Hughes    return False
125*e1fe3e4aSElliott Hughes
126*e1fe3e4aSElliott Hughes
127*e1fe3e4aSElliott Hughesdef remap_glyph_ids(
128*e1fe3e4aSElliott Hughes    svg: etree.Element, glyph_index_map: Dict[int, int]
129*e1fe3e4aSElliott Hughes) -> Dict[str, str]:
130*e1fe3e4aSElliott Hughes    # Given {old_gid: new_gid} map, rename all elements containing id="glyph{gid}"
131*e1fe3e4aSElliott Hughes    # special attributes
132*e1fe3e4aSElliott Hughes    elements = group_elements_by_id(svg)
133*e1fe3e4aSElliott Hughes    id_map = {}
134*e1fe3e4aSElliott Hughes    for el_id, el in elements.items():
135*e1fe3e4aSElliott Hughes        m = GID_RE.match(el_id)
136*e1fe3e4aSElliott Hughes        if not m:
137*e1fe3e4aSElliott Hughes            continue
138*e1fe3e4aSElliott Hughes        old_index = int(m.group(1))
139*e1fe3e4aSElliott Hughes        new_index = glyph_index_map.get(old_index)
140*e1fe3e4aSElliott Hughes        if new_index is not None:
141*e1fe3e4aSElliott Hughes            if old_index == new_index:
142*e1fe3e4aSElliott Hughes                continue
143*e1fe3e4aSElliott Hughes            new_id = f"glyph{new_index}"
144*e1fe3e4aSElliott Hughes        else:
145*e1fe3e4aSElliott Hughes            # If the old index is missing, the element correspond to a glyph that was
146*e1fe3e4aSElliott Hughes            # excluded from the font's subset.
147*e1fe3e4aSElliott Hughes            # We rename it to avoid clashes with the new GIDs or other element ids.
148*e1fe3e4aSElliott Hughes            new_id = f".{el_id}"
149*e1fe3e4aSElliott Hughes            n = count(1)
150*e1fe3e4aSElliott Hughes            while new_id in elements:
151*e1fe3e4aSElliott Hughes                new_id = f"{new_id}.{next(n)}"
152*e1fe3e4aSElliott Hughes
153*e1fe3e4aSElliott Hughes        id_map[el_id] = new_id
154*e1fe3e4aSElliott Hughes        el.attrib["id"] = new_id
155*e1fe3e4aSElliott Hughes
156*e1fe3e4aSElliott Hughes    return id_map
157*e1fe3e4aSElliott Hughes
158*e1fe3e4aSElliott Hughes
159*e1fe3e4aSElliott Hughesdef href_local_target(el: etree.Element) -> Optional[str]:
160*e1fe3e4aSElliott Hughes    if XLINK_HREF in el.attrib:
161*e1fe3e4aSElliott Hughes        href = el.attrib[XLINK_HREF]
162*e1fe3e4aSElliott Hughes        if href.startswith("#") and len(href) > 1:
163*e1fe3e4aSElliott Hughes            return href[1:]  # drop the leading #
164*e1fe3e4aSElliott Hughes    return None
165*e1fe3e4aSElliott Hughes
166*e1fe3e4aSElliott Hughes
167*e1fe3e4aSElliott Hughesdef update_glyph_href_links(svg: etree.Element, id_map: Dict[str, str]) -> None:
168*e1fe3e4aSElliott Hughes    # update all xlink:href="#glyph..." attributes to point to the new glyph ids
169*e1fe3e4aSElliott Hughes    for el in xpath(".//svg:*[starts-with(@xlink:href, '#glyph')]")(svg):
170*e1fe3e4aSElliott Hughes        old_id = href_local_target(el)
171*e1fe3e4aSElliott Hughes        assert old_id is not None
172*e1fe3e4aSElliott Hughes        if old_id in id_map:
173*e1fe3e4aSElliott Hughes            new_id = id_map[old_id]
174*e1fe3e4aSElliott Hughes            el.attrib[XLINK_HREF] = f"#{new_id}"
175*e1fe3e4aSElliott Hughes
176*e1fe3e4aSElliott Hughes
177*e1fe3e4aSElliott Hughesdef ranges(ints: Iterable[int]) -> Iterator[Tuple[int, int]]:
178*e1fe3e4aSElliott Hughes    # Yield sorted, non-overlapping (min, max) ranges of consecutive integers
179*e1fe3e4aSElliott Hughes    sorted_ints = iter(sorted(set(ints)))
180*e1fe3e4aSElliott Hughes    try:
181*e1fe3e4aSElliott Hughes        start = end = next(sorted_ints)
182*e1fe3e4aSElliott Hughes    except StopIteration:
183*e1fe3e4aSElliott Hughes        return
184*e1fe3e4aSElliott Hughes    for v in sorted_ints:
185*e1fe3e4aSElliott Hughes        if v - 1 == end:
186*e1fe3e4aSElliott Hughes            end = v
187*e1fe3e4aSElliott Hughes        else:
188*e1fe3e4aSElliott Hughes            yield (start, end)
189*e1fe3e4aSElliott Hughes            start = end = v
190*e1fe3e4aSElliott Hughes    yield (start, end)
191*e1fe3e4aSElliott Hughes
192*e1fe3e4aSElliott Hughes
193*e1fe3e4aSElliott Hughes@_add_method(ttLib.getTableClass("SVG "))
194*e1fe3e4aSElliott Hughesdef subset_glyphs(self, s) -> bool:
195*e1fe3e4aSElliott Hughes    if etree is None:
196*e1fe3e4aSElliott Hughes        raise ImportError("No module named 'lxml', required to subset SVG")
197*e1fe3e4aSElliott Hughes
198*e1fe3e4aSElliott Hughes    # glyph names (before subsetting)
199*e1fe3e4aSElliott Hughes    glyph_order: List[str] = s.orig_glyph_order
200*e1fe3e4aSElliott Hughes    # map from glyph names to original glyph indices
201*e1fe3e4aSElliott Hughes    rev_orig_glyph_map: Dict[str, int] = s.reverseOrigGlyphMap
202*e1fe3e4aSElliott Hughes    # map from original to new glyph indices (after subsetting)
203*e1fe3e4aSElliott Hughes    glyph_index_map: Dict[int, int] = s.glyph_index_map
204*e1fe3e4aSElliott Hughes
205*e1fe3e4aSElliott Hughes    new_docs: List[SVGDocument] = []
206*e1fe3e4aSElliott Hughes    for doc in self.docList:
207*e1fe3e4aSElliott Hughes        glyphs = {
208*e1fe3e4aSElliott Hughes            glyph_order[i] for i in range(doc.startGlyphID, doc.endGlyphID + 1)
209*e1fe3e4aSElliott Hughes        }.intersection(s.glyphs)
210*e1fe3e4aSElliott Hughes        if not glyphs:
211*e1fe3e4aSElliott Hughes            # no intersection: we can drop the whole record
212*e1fe3e4aSElliott Hughes            continue
213*e1fe3e4aSElliott Hughes
214*e1fe3e4aSElliott Hughes        svg = etree.fromstring(
215*e1fe3e4aSElliott Hughes            # encode because fromstring dislikes xml encoding decl if input is str.
216*e1fe3e4aSElliott Hughes            # SVG xml encoding must be utf-8 as per OT spec.
217*e1fe3e4aSElliott Hughes            doc.data.encode("utf-8"),
218*e1fe3e4aSElliott Hughes            parser=etree.XMLParser(
219*e1fe3e4aSElliott Hughes                # Disable libxml2 security restrictions to support very deep trees.
220*e1fe3e4aSElliott Hughes                # Without this we would get an error like this:
221*e1fe3e4aSElliott Hughes                # `lxml.etree.XMLSyntaxError: internal error: Huge input lookup`
222*e1fe3e4aSElliott Hughes                # when parsing big fonts e.g. noto-emoji-picosvg.ttf.
223*e1fe3e4aSElliott Hughes                huge_tree=True,
224*e1fe3e4aSElliott Hughes                # ignore blank text as it's not meaningful in OT-SVG; it also prevents
225*e1fe3e4aSElliott Hughes                # dangling tail text after removing an element when pretty_print=True
226*e1fe3e4aSElliott Hughes                remove_blank_text=True,
227*e1fe3e4aSElliott Hughes                # don't replace entities; we don't expect any in OT-SVG and they may
228*e1fe3e4aSElliott Hughes                # be abused for XXE attacks
229*e1fe3e4aSElliott Hughes                resolve_entities=False,
230*e1fe3e4aSElliott Hughes            ),
231*e1fe3e4aSElliott Hughes        )
232*e1fe3e4aSElliott Hughes
233*e1fe3e4aSElliott Hughes        elements = group_elements_by_id(svg)
234*e1fe3e4aSElliott Hughes        gids = {rev_orig_glyph_map[g] for g in glyphs}
235*e1fe3e4aSElliott Hughes        element_ids = {f"glyph{i}" for i in gids}
236*e1fe3e4aSElliott Hughes        closure_element_ids(elements, element_ids)
237*e1fe3e4aSElliott Hughes
238*e1fe3e4aSElliott Hughes        if not subset_elements(svg, element_ids):
239*e1fe3e4aSElliott Hughes            continue
240*e1fe3e4aSElliott Hughes
241*e1fe3e4aSElliott Hughes        if not s.options.retain_gids:
242*e1fe3e4aSElliott Hughes            id_map = remap_glyph_ids(svg, glyph_index_map)
243*e1fe3e4aSElliott Hughes            update_glyph_href_links(svg, id_map)
244*e1fe3e4aSElliott Hughes
245*e1fe3e4aSElliott Hughes        new_doc = etree.tostring(svg, pretty_print=s.options.pretty_svg).decode("utf-8")
246*e1fe3e4aSElliott Hughes
247*e1fe3e4aSElliott Hughes        new_gids = (glyph_index_map[i] for i in gids)
248*e1fe3e4aSElliott Hughes        for start, end in ranges(new_gids):
249*e1fe3e4aSElliott Hughes            new_docs.append(SVGDocument(new_doc, start, end, doc.compressed))
250*e1fe3e4aSElliott Hughes
251*e1fe3e4aSElliott Hughes    self.docList = new_docs
252*e1fe3e4aSElliott Hughes
253*e1fe3e4aSElliott Hughes    return bool(self.docList)
254