varStore.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9) - OpenGrok cross reference for /aosp_15_r20/external/fonttools/Lib/fontTools/varLib/varStore.py

from fontTools.misc.roundTools import noRound, otRound
from fontTools.misc.intTools import bit_count
from fontTools.ttLib.tables import otTables as ot
from fontTools.varLib.models import supportScalar
from fontTools.varLib.builder import (
    buildVarRegionList,
    buildVarStore,
    buildVarRegion,
    buildVarData,
)
from functools import partial
from collections import defaultdict
from heapq import heappush, heappop


NO_VARIATION_INDEX = ot.NO_VARIATION_INDEX
ot.VarStore.NO_VARIATION_INDEX = NO_VARIATION_INDEX


def _getLocationKey(loc):
    return tuple(sorted(loc.items(), key=lambda kv: kv[0]))


class OnlineVarStoreBuilder(object):
    def __init__(self, axisTags):
        self._axisTags = axisTags
        self._regionMap = {}
        self._regionList = buildVarRegionList([], axisTags)
        self._store = buildVarStore(self._regionList, [])
        self._data = None
        self._model = None
        self._supports = None
        self._varDataIndices = {}
        self._varDataCaches = {}
        self._cache = {}

    def setModel(self, model):
        self.setSupports(model.supports)
        self._model = model

    def setSupports(self, supports):
        self._model = None
        self._supports = list(supports)
        if not self._supports[0]:
            del self._supports[0]  # Drop base master support
        self._cache = {}
        self._data = None

    def finish(self, optimize=True):
        self._regionList.RegionCount = len(self._regionList.Region)
        self._store.VarDataCount = len(self._store.VarData)
        for data in self._store.VarData:
            data.ItemCount = len(data.Item)
            data.calculateNumShorts(optimize=optimize)
        return self._store

    def _add_VarData(self):
        regionMap = self._regionMap
        regionList = self._regionList

        regions = self._supports
        regionIndices = []
        for region in regions:
            key = _getLocationKey(region)
            idx = regionMap.get(key)
            if idx is None:
                varRegion = buildVarRegion(region, self._axisTags)
                idx = regionMap[key] = len(regionList.Region)
                regionList.Region.append(varRegion)
            regionIndices.append(idx)

        # Check if we have one already...
        key = tuple(regionIndices)
        varDataIdx = self._varDataIndices.get(key)
        if varDataIdx is not None:
            self._outer = varDataIdx
            self._data = self._store.VarData[varDataIdx]
            self._cache = self._varDataCaches[key]
            if len(self._data.Item) == 0xFFFF:
                # This is full.  Need new one.
                varDataIdx = None

        if varDataIdx is None:
            self._data = buildVarData(regionIndices, [], optimize=False)
            self._outer = len(self._store.VarData)
            self._store.VarData.append(self._data)
            self._varDataIndices[key] = self._outer
            if key not in self._varDataCaches:
                self._varDataCaches[key] = {}
            self._cache = self._varDataCaches[key]

    def storeMasters(self, master_values, *, round=round):
        deltas = self._model.getDeltas(master_values, round=round)
        base = deltas.pop(0)
        return base, self.storeDeltas(deltas, round=noRound)

    def storeDeltas(self, deltas, *, round=round):
        deltas = [round(d) for d in deltas]
        if len(deltas) == len(self._supports) + 1:
            deltas = tuple(deltas[1:])
        else:
            assert len(deltas) == len(self._supports)
            deltas = tuple(deltas)

        varIdx = self._cache.get(deltas)
        if varIdx is not None:
            return varIdx

        if not self._data:
            self._add_VarData()
        inner = len(self._data.Item)
        if inner == 0xFFFF:
            # Full array. Start new one.
            self._add_VarData()
            return self.storeDeltas(deltas)
        self._data.addItem(deltas, round=noRound)

        varIdx = (self._outer << 16) + inner
        self._cache[deltas] = varIdx
        return varIdx


def VarData_addItem(self, deltas, *, round=round):
    deltas = [round(d) for d in deltas]

    countUs = self.VarRegionCount
    countThem = len(deltas)
    if countUs + 1 == countThem:
        deltas = list(deltas[1:])
    else:
        assert countUs == countThem, (countUs, countThem)
        deltas = list(deltas)
    self.Item.append(deltas)
    self.ItemCount = len(self.Item)


ot.VarData.addItem = VarData_addItem


def VarRegion_get_support(self, fvar_axes):
    return {
        fvar_axes[i].axisTag: (reg.StartCoord, reg.PeakCoord, reg.EndCoord)
        for i, reg in enumerate(self.VarRegionAxis)
        if reg.PeakCoord != 0
    }


ot.VarRegion.get_support = VarRegion_get_support


def VarStore___bool__(self):
    return bool(self.VarData)


ot.VarStore.__bool__ = VarStore___bool__


class VarStoreInstancer(object):
    def __init__(self, varstore, fvar_axes, location={}):
        self.fvar_axes = fvar_axes
        assert varstore is None or varstore.Format == 1
        self._varData = varstore.VarData if varstore else []
        self._regions = varstore.VarRegionList.Region if varstore else []
        self.setLocation(location)

    def setLocation(self, location):
        self.location = dict(location)
        self._clearCaches()

    def _clearCaches(self):
        self._scalars = {}

    def _getScalar(self, regionIdx):
        scalar = self._scalars.get(regionIdx)
        if scalar is None:
            support = self._regions[regionIdx].get_support(self.fvar_axes)
            scalar = supportScalar(self.location, support)
            self._scalars[regionIdx] = scalar
        return scalar

    @staticmethod
    def interpolateFromDeltasAndScalars(deltas, scalars):
        delta = 0.0
        for d, s in zip(deltas, scalars):
            if not s:
                continue
            delta += d * s
        return delta

    def __getitem__(self, varidx):
        major, minor = varidx >> 16, varidx & 0xFFFF
        if varidx == NO_VARIATION_INDEX:
            return 0.0
        varData = self._varData
        scalars = [self._getScalar(ri) for ri in varData[major].VarRegionIndex]
        deltas = varData[major].Item[minor]
        return self.interpolateFromDeltasAndScalars(deltas, scalars)

    def interpolateFromDeltas(self, varDataIndex, deltas):
        varData = self._varData
        scalars = [self._getScalar(ri) for ri in varData[varDataIndex].VarRegionIndex]
        return self.interpolateFromDeltasAndScalars(deltas, scalars)


#
# Optimizations
#
# retainFirstMap - If true, major 0 mappings are retained. Deltas for unused indices are zeroed
# advIdxes - Set of major 0 indices for advance deltas to be listed first. Other major 0 indices follow.


def VarStore_subset_varidxes(
    self, varIdxes, optimize=True, retainFirstMap=False, advIdxes=set()
):
    # Sort out used varIdxes by major/minor.
    used = {}
    for varIdx in varIdxes:
        if varIdx == NO_VARIATION_INDEX:
            continue
        major = varIdx >> 16
        minor = varIdx & 0xFFFF
        d = used.get(major)
        if d is None:
            d = used[major] = set()
        d.add(minor)
    del varIdxes

    #
    # Subset VarData
    #

    varData = self.VarData
    newVarData = []
    varDataMap = {NO_VARIATION_INDEX: NO_VARIATION_INDEX}
    for major, data in enumerate(varData):
        usedMinors = used.get(major)
        if usedMinors is None:
            continue
        newMajor = len(newVarData)
        newVarData.append(data)

        items = data.Item
        newItems = []
        if major == 0 and retainFirstMap:
            for minor in range(len(items)):
                newItems.append(
                    items[minor] if minor in usedMinors else [0] * len(items[minor])
                )
                varDataMap[minor] = minor
        else:
            if major == 0:
                minors = sorted(advIdxes) + sorted(usedMinors - advIdxes)
            else:
                minors = sorted(usedMinors)
            for minor in minors:
                newMinor = len(newItems)
                newItems.append(items[minor])
                varDataMap[(major << 16) + minor] = (newMajor << 16) + newMinor

        data.Item = newItems
        data.ItemCount = len(data.Item)

        data.calculateNumShorts(optimize=optimize)

    self.VarData = newVarData
    self.VarDataCount = len(self.VarData)

    self.prune_regions()

    return varDataMap


ot.VarStore.subset_varidxes = VarStore_subset_varidxes


def VarStore_prune_regions(self):
    """Remove unused VarRegions."""
    #
    # Subset VarRegionList
    #

    # Collect.
    usedRegions = set()
    for data in self.VarData:
        usedRegions.update(data.VarRegionIndex)
    # Subset.
    regionList = self.VarRegionList
    regions = regionList.Region
    newRegions = []
    regionMap = {}
    for i in sorted(usedRegions):
        regionMap[i] = len(newRegions)
        newRegions.append(regions[i])
    regionList.Region = newRegions
    regionList.RegionCount = len(regionList.Region)
    # Map.
    for data in self.VarData:
        data.VarRegionIndex = [regionMap[i] for i in data.VarRegionIndex]


ot.VarStore.prune_regions = VarStore_prune_regions


def _visit(self, func):
    """Recurse down from self, if type of an object is ot.Device,
    call func() on it.  Works on otData-style classes."""

    if type(self) == ot.Device:
        func(self)

    elif isinstance(self, list):
        for that in self:
            _visit(that, func)

    elif hasattr(self, "getConverters") and not hasattr(self, "postRead"):
        for conv in self.getConverters():
            that = getattr(self, conv.name, None)
            if that is not None:
                _visit(that, func)

    elif isinstance(self, ot.ValueRecord):
        for that in self.__dict__.values():
            _visit(that, func)


def _Device_recordVarIdx(self, s):
    """Add VarIdx in this Device table (if any) to the set s."""
    if self.DeltaFormat == 0x8000:
        s.add((self.StartSize << 16) + self.EndSize)


def Object_collect_device_varidxes(self, varidxes):
    adder = partial(_Device_recordVarIdx, s=varidxes)
    _visit(self, adder)


ot.GDEF.collect_device_varidxes = Object_collect_device_varidxes
ot.GPOS.collect_device_varidxes = Object_collect_device_varidxes


def _Device_mapVarIdx(self, mapping, done):
    """Map VarIdx in this Device table (if any) through mapping."""
    if id(self) in done:
        return
    done.add(id(self))
    if self.DeltaFormat == 0x8000:
        varIdx = mapping[(self.StartSize << 16) + self.EndSize]
        self.StartSize = varIdx >> 16
        self.EndSize = varIdx & 0xFFFF


def Object_remap_device_varidxes(self, varidxes_map):
    mapper = partial(_Device_mapVarIdx, mapping=varidxes_map, done=set())
    _visit(self, mapper)


ot.GDEF.remap_device_varidxes = Object_remap_device_varidxes
ot.GPOS.remap_device_varidxes = Object_remap_device_varidxes


class _Encoding(object):
    def __init__(self, chars):
        self.chars = chars
        self.width = bit_count(chars)
        self.columns = self._columns(chars)
        self.overhead = self._characteristic_overhead(self.columns)
        self.items = set()

    def append(self, row):
        self.items.add(row)

    def extend(self, lst):
        self.items.update(lst)

    def get_room(self):
        """Maximum number of bytes that can be added to characteristic
        while still being beneficial to merge it into another one."""
        count = len(self.items)
        return max(0, (self.overhead - 1) // count - self.width)

    room = property(get_room)

    def get_gain(self):
        """Maximum possible byte gain from merging this into another
        characteristic."""
        count = len(self.items)
        return max(0, self.overhead - count)

    gain = property(get_gain)

    def gain_sort_key(self):
        return self.gain, self.chars

    def width_sort_key(self):
        return self.width, self.chars

    @staticmethod
    def _characteristic_overhead(columns):
        """Returns overhead in bytes of encoding this characteristic
        as a VarData."""
        c = 4 + 6  # 4 bytes for LOffset, 6 bytes for VarData header
        c += bit_count(columns) * 2
        return c

    @staticmethod
    def _columns(chars):
        cols = 0
        i = 1
        while chars:
            if chars & 0b1111:
                cols |= i
            chars >>= 4
            i <<= 1
        return cols

    def gain_from_merging(self, other_encoding):
        combined_chars = other_encoding.chars | self.chars
        combined_width = bit_count(combined_chars)
        combined_columns = self.columns | other_encoding.columns
        combined_overhead = _Encoding._characteristic_overhead(combined_columns)
        combined_gain = (
            +self.overhead
            + other_encoding.overhead
            - combined_overhead
            - (combined_width - self.width) * len(self.items)
            - (combined_width - other_encoding.width) * len(other_encoding.items)
        )
        return combined_gain


class _EncodingDict(dict):
    def __missing__(self, chars):
        r = self[chars] = _Encoding(chars)
        return r

    def add_row(self, row):
        chars = self._row_characteristics(row)
        self[chars].append(row)

    @staticmethod
    def _row_characteristics(row):
        """Returns encoding characteristics for a row."""
        longWords = False

        chars = 0
        i = 1
        for v in row:
            if v:
                chars += i
            if not (-128 <= v <= 127):
                chars += i * 0b0010
            if not (-32768 <= v <= 32767):
                longWords = True
                break
            i <<= 4

        if longWords:
            # Redo; only allow 2byte/4byte encoding
            chars = 0
            i = 1
            for v in row:
                if v:
                    chars += i * 0b0011
                if not (-32768 <= v <= 32767):
                    chars += i * 0b1100
                i <<= 4

        return chars


def VarStore_optimize(self, use_NO_VARIATION_INDEX=True, quantization=1):
    """Optimize storage. Returns mapping from old VarIdxes to new ones."""

    # Overview:
    #
    # For each VarData row, we first extend it with zeroes to have
    # one column per region in VarRegionList. We then group the
    # rows into _Encoding objects, by their "characteristic" bitmap.
    # The characteristic bitmap is a binary number representing how
    # many bytes each column of the data takes up to encode. Each
    # column is encoded in four bits. For example, if a column has
    # only values in the range -128..127, it would only have a single
    # bit set in the characteristic bitmap for that column. If it has
    # values in the range -32768..32767, it would have two bits set.
    # The number of ones in the characteristic bitmap is the "width"
    # of the encoding.
    #
    # Each encoding as such has a number of "active" (ie. non-zero)
    # columns. The overhead of encoding the characteristic bitmap
    # is 10 bytes, plus 2 bytes per active column.
    #
    # When an encoding is merged into another one, if the characteristic
    # of the old encoding is a subset of the new one, then the overhead
    # of the old encoding is completely eliminated. However, each row
    # now would require more bytes to encode, to the tune of one byte
    # per characteristic bit that is active in the new encoding but not
    # in the old one. The number of bits that can be added to an encoding
    # while still beneficial to merge it into another encoding is called
    # the "room" for that encoding.
    #
    # The "gain" of an encodings is the maximum number of bytes we can
    # save by merging it into another encoding. The "gain" of merging
    # two encodings is how many bytes we save by doing so.
    #
    # High-level algorithm:
    #
    # - Each encoding has a minimal way to encode it. However, because
    #   of the overhead of encoding the characteristic bitmap, it may
    #   be beneficial to merge two encodings together, if there is
    #   gain in doing so. As such, we need to search for the best
    #   such successive merges.
    #
    # Algorithm:
    #
    # - Put all encodings into a "todo" list.
    #
    # - Sort todo list by decreasing gain (for stability).
    #
    # - Make a priority-queue of the gain from combining each two
    #   encodings in the todo list. The priority queue is sorted by
    #   decreasing gain. Only positive gains are included.
    #
    # - While priority queue is not empty:
    #   - Pop the first item from the priority queue,
    #   - Merge the two encodings it represents,
    #   - Remove the two encodings from the todo list,
    #   - Insert positive gains from combining the new encoding with
    #     all existing todo list items into the priority queue,
    #   - If a todo list item with the same characteristic bitmap as
    #     the new encoding exists, remove it from the todo list and
    #     merge it into the new encoding.
    #   - Insert the new encoding into the todo list,
    #
    # - Encode all remaining items in the todo list.
    #
    # The output is then sorted for stability, in the following way:
    # - The VarRegionList of the input is kept intact.
    # - All encodings are sorted before the main algorithm, by
    #   gain_key_sort(), which is a tuple of the following items:
    #   * The gain of the encoding.
    #   * The characteristic bitmap of the encoding, with higher-numbered
    #     columns compared first.
    # - The VarData is sorted by width_sort_key(), which is a tuple
    #   of the following items:
    #   * The "width" of the encoding.
    #   * The characteristic bitmap of the encoding, with higher-numbered
    #     columns compared first.
    # - Within each VarData, the items are sorted as vectors of numbers.
    #
    # Finally, each VarData is optimized to remove the empty columns and
    # reorder columns as needed.

    # TODO
    # Check that no two VarRegions are the same; if they are, fold them.

    n = len(self.VarRegionList.Region)  # Number of columns
    zeroes = [0] * n

    front_mapping = {}  # Map from old VarIdxes to full row tuples

    encodings = _EncodingDict()

    # Collect all items into a set of full rows (with lots of zeroes.)
    for major, data in enumerate(self.VarData):
        regionIndices = data.VarRegionIndex

        for minor, item in enumerate(data.Item):
            row = list(zeroes)

            if quantization == 1:
                for regionIdx, v in zip(regionIndices, item):
                    row[regionIdx] += v
            else:
                for regionIdx, v in zip(regionIndices, item):
                    row[regionIdx] += (
                        round(v / quantization) * quantization
                    )  # TODO https://github.com/fonttools/fonttools/pull/3126#discussion_r1205439785

            row = tuple(row)

            if use_NO_VARIATION_INDEX and not any(row):
                front_mapping[(major << 16) + minor] = None
                continue

            encodings.add_row(row)
            front_mapping[(major << 16) + minor] = row

    # Prepare for the main algorithm.
    todo = sorted(encodings.values(), key=_Encoding.gain_sort_key)
    del encodings

    # Repeatedly pick two best encodings to combine, and combine them.

    heap = []
    for i, encoding in enumerate(todo):
        for j in range(i + 1, len(todo)):
            other_encoding = todo[j]
            combining_gain = encoding.gain_from_merging(other_encoding)
            if combining_gain > 0:
                heappush(heap, (-combining_gain, i, j))

    while heap:
        _, i, j = heappop(heap)
        if todo[i] is None or todo[j] is None:
            continue

        encoding, other_encoding = todo[i], todo[j]
        todo[i], todo[j] = None, None

        # Combine the two encodings
        combined_chars = other_encoding.chars | encoding.chars
        combined_encoding = _Encoding(combined_chars)
        combined_encoding.extend(encoding.items)
        combined_encoding.extend(other_encoding.items)

        for k, enc in enumerate(todo):
            if enc is None:
                continue

            # In the unlikely event that the same encoding exists already,
            # combine it.
            if enc.chars == combined_chars:
                combined_encoding.extend(enc.items)
                todo[k] = None
                continue

            combining_gain = combined_encoding.gain_from_merging(enc)
            if combining_gain > 0:
                heappush(heap, (-combining_gain, k, len(todo)))

        todo.append(combined_encoding)

    encodings = [encoding for encoding in todo if encoding is not None]

    # Assemble final store.
    back_mapping = {}  # Mapping from full rows to new VarIdxes
    encodings.sort(key=_Encoding.width_sort_key)
    self.VarData = []
    for encoding in encodings:
        items = sorted(encoding.items)

        while items:
            major = len(self.VarData)
            data = ot.VarData()
            self.VarData.append(data)
            data.VarRegionIndex = range(n)
            data.VarRegionCount = len(data.VarRegionIndex)

            # Each major can only encode up to 0xFFFF entries.
            data.Item, items = items[:0xFFFF], items[0xFFFF:]

            for minor, item in enumerate(data.Item):
                back_mapping[item] = (major << 16) + minor

    # Compile final mapping.
    varidx_map = {NO_VARIATION_INDEX: NO_VARIATION_INDEX}
    for k, v in front_mapping.items():
        varidx_map[k] = back_mapping[v] if v is not None else NO_VARIATION_INDEX

    # Recalculate things and go home.
    self.VarRegionList.RegionCount = len(self.VarRegionList.Region)
    self.VarDataCount = len(self.VarData)
    for data in self.VarData:
        data.ItemCount = len(data.Item)
        data.optimize()

    # Remove unused regions.
    self.prune_regions()

    return varidx_map


ot.VarStore.optimize = VarStore_optimize


def main(args=None):
    """Optimize a font's GDEF variation store"""
    from argparse import ArgumentParser
    from fontTools import configLogger
    from fontTools.ttLib import TTFont
    from fontTools.ttLib.tables.otBase import OTTableWriter

    parser = ArgumentParser(prog="varLib.varStore", description=main.__doc__)
    parser.add_argument("--quantization", type=int, default=1)
    parser.add_argument("fontfile")
    parser.add_argument("outfile", nargs="?")
    options = parser.parse_args(args)

    # TODO: allow user to configure logging via command-line options
    configLogger(level="INFO")

    quantization = options.quantization
    fontfile = options.fontfile
    outfile = options.outfile

    font = TTFont(fontfile)
    gdef = font["GDEF"]
    store = gdef.table.VarStore

    writer = OTTableWriter()
    store.compile(writer, font)
    size = len(writer.getAllData())
    print("Before: %7d bytes" % size)

    varidx_map = store.optimize(quantization=quantization)

    writer = OTTableWriter()
    store.compile(writer, font)
    size = len(writer.getAllData())
    print("After:  %7d bytes" % size)

    if outfile is not None:
        gdef.table.remap_device_varidxes(varidx_map)
        if "GPOS" in font:
            font["GPOS"].table.remap_device_varidxes(varidx_map)

        font.save(outfile)


if __name__ == "__main__":
    import sys

    if len(sys.argv) > 1:
        sys.exit(main())
    import doctest

    sys.exit(doctest.testmod().failed)