xref: /aosp_15_r20/external/toolchain-utils/bestflags/genetic_algorithm.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1*760c253cSXin Li# Copyright 2013 The ChromiumOS Authors
2*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be
3*760c253cSXin Li# found in the LICENSE file.
4*760c253cSXin Li"""The hill genetic algorithm.
5*760c253cSXin Li
6*760c253cSXin LiPart of the Chrome build flags optimization.
7*760c253cSXin Li"""
8*760c253cSXin Li
9*760c253cSXin Li__author__ = "[email protected] (Yuheng Long)"
10*760c253cSXin Li
11*760c253cSXin Liimport random
12*760c253cSXin Li
13*760c253cSXin Liimport flags
14*760c253cSXin Lifrom flags import Flag
15*760c253cSXin Lifrom flags import FlagSet
16*760c253cSXin Lifrom generation import Generation
17*760c253cSXin Lifrom task import Task
18*760c253cSXin Li
19*760c253cSXin Li
20*760c253cSXin Lidef CrossoverWith(first_flag, second_flag):
21*760c253cSXin Li    """Get a crossed over gene.
22*760c253cSXin Li
23*760c253cSXin Li    At present, this just picks either/or of these values.  However, it could be
24*760c253cSXin Li    implemented as an integer maskover effort, if required.
25*760c253cSXin Li
26*760c253cSXin Li    Args:
27*760c253cSXin Li      first_flag: The first gene (Flag) to cross over with.
28*760c253cSXin Li      second_flag: The second gene (Flag) to cross over with.
29*760c253cSXin Li
30*760c253cSXin Li    Returns:
31*760c253cSXin Li      A Flag that can be considered appropriately randomly blended between the
32*760c253cSXin Li      first and second input flag.
33*760c253cSXin Li    """
34*760c253cSXin Li
35*760c253cSXin Li    return first_flag if random.randint(0, 1) else second_flag
36*760c253cSXin Li
37*760c253cSXin Li
38*760c253cSXin Lidef RandomMutate(specs, flag_set, mutation_rate):
39*760c253cSXin Li    """Randomly mutate the content of a task.
40*760c253cSXin Li
41*760c253cSXin Li    Args:
42*760c253cSXin Li      specs: A list of spec from which the flag set is created.
43*760c253cSXin Li      flag_set: The current flag set being mutated
44*760c253cSXin Li      mutation_rate: What fraction of genes to mutate.
45*760c253cSXin Li
46*760c253cSXin Li    Returns:
47*760c253cSXin Li      A Genetic Task constructed by randomly mutating the input flag set.
48*760c253cSXin Li    """
49*760c253cSXin Li
50*760c253cSXin Li    results_flags = []
51*760c253cSXin Li
52*760c253cSXin Li    for spec in specs:
53*760c253cSXin Li        # Randomly choose whether this flag should be mutated.
54*760c253cSXin Li        if random.randint(0, int(1 / mutation_rate)):
55*760c253cSXin Li            continue
56*760c253cSXin Li
57*760c253cSXin Li        # If the flag is not already in the flag set, it is added.
58*760c253cSXin Li        if spec not in flag_set:
59*760c253cSXin Li            results_flags.append(Flag(spec))
60*760c253cSXin Li            continue
61*760c253cSXin Li
62*760c253cSXin Li        # If the flag is already in the flag set, it is mutated.
63*760c253cSXin Li        numeric_flag_match = flags.Search(spec)
64*760c253cSXin Li
65*760c253cSXin Li        # The value of a numeric flag will be changed, and a boolean flag will be
66*760c253cSXin Li        # dropped.
67*760c253cSXin Li        if not numeric_flag_match:
68*760c253cSXin Li            continue
69*760c253cSXin Li
70*760c253cSXin Li        value = flag_set[spec].GetValue()
71*760c253cSXin Li
72*760c253cSXin Li        # Randomly select a nearby value of the current value of the flag.
73*760c253cSXin Li        rand_arr = [value]
74*760c253cSXin Li        if value + 1 < int(numeric_flag_match.group("end")):
75*760c253cSXin Li            rand_arr.append(value + 1)
76*760c253cSXin Li
77*760c253cSXin Li        rand_arr.append(value - 1)
78*760c253cSXin Li        value = random.sample(rand_arr, 1)[0]
79*760c253cSXin Li
80*760c253cSXin Li        # If the value is smaller than the start of the spec, this flag will be
81*760c253cSXin Li        # dropped.
82*760c253cSXin Li        if value != int(numeric_flag_match.group("start")) - 1:
83*760c253cSXin Li            results_flags.append(Flag(spec, value))
84*760c253cSXin Li
85*760c253cSXin Li    return GATask(FlagSet(results_flags))
86*760c253cSXin Li
87*760c253cSXin Li
88*760c253cSXin Liclass GATask(Task):
89*760c253cSXin Li    def __init__(self, flag_set):
90*760c253cSXin Li        Task.__init__(self, flag_set)
91*760c253cSXin Li
92*760c253cSXin Li    def ReproduceWith(self, other, specs, mutation_rate):
93*760c253cSXin Li        """Reproduce with other FlagSet.
94*760c253cSXin Li
95*760c253cSXin Li        Args:
96*760c253cSXin Li          other: A FlagSet to reproduce with.
97*760c253cSXin Li          specs: A list of spec from which the flag set is created.
98*760c253cSXin Li          mutation_rate: one in mutation_rate flags will be mutated (replaced by a
99*760c253cSXin Li            random version of the same flag, instead of one from either of the
100*760c253cSXin Li            parents).  Set to 0 to disable mutation.
101*760c253cSXin Li
102*760c253cSXin Li        Returns:
103*760c253cSXin Li          A GA task made by mixing self with other.
104*760c253cSXin Li        """
105*760c253cSXin Li
106*760c253cSXin Li        # Get the flag dictionary.
107*760c253cSXin Li        father_flags = self.GetFlags().GetFlags()
108*760c253cSXin Li        mother_flags = other.GetFlags().GetFlags()
109*760c253cSXin Li
110*760c253cSXin Li        # Flags that are common in both parents and flags that belong to only one
111*760c253cSXin Li        # parent.
112*760c253cSXin Li        self_flags = []
113*760c253cSXin Li        other_flags = []
114*760c253cSXin Li        common_flags = []
115*760c253cSXin Li
116*760c253cSXin Li        # Find out flags that are common to both parent and flags that belong soly
117*760c253cSXin Li        # to one parent.
118*760c253cSXin Li        for self_flag in father_flags:
119*760c253cSXin Li            if self_flag in mother_flags:
120*760c253cSXin Li                common_flags.append(self_flag)
121*760c253cSXin Li            else:
122*760c253cSXin Li                self_flags.append(self_flag)
123*760c253cSXin Li
124*760c253cSXin Li        for other_flag in mother_flags:
125*760c253cSXin Li            if other_flag not in father_flags:
126*760c253cSXin Li                other_flags.append(other_flag)
127*760c253cSXin Li
128*760c253cSXin Li        # Randomly select flags that belong to only one parent.
129*760c253cSXin Li        output_flags = [
130*760c253cSXin Li            father_flags[f] for f in self_flags if random.randint(0, 1)
131*760c253cSXin Li        ]
132*760c253cSXin Li        others = [mother_flags[f] for f in other_flags if random.randint(0, 1)]
133*760c253cSXin Li        output_flags.extend(others)
134*760c253cSXin Li        # Turn on flags that belong to both parent. Randomly choose the value of the
135*760c253cSXin Li        # flag from either parent.
136*760c253cSXin Li        for flag in common_flags:
137*760c253cSXin Li            output_flags.append(
138*760c253cSXin Li                CrossoverWith(father_flags[flag], mother_flags[flag])
139*760c253cSXin Li            )
140*760c253cSXin Li
141*760c253cSXin Li        # Mutate flags
142*760c253cSXin Li        if mutation_rate:
143*760c253cSXin Li            return RandomMutate(specs, FlagSet(output_flags), mutation_rate)
144*760c253cSXin Li
145*760c253cSXin Li        return GATask(FlagSet(output_flags))
146*760c253cSXin Li
147*760c253cSXin Li
148*760c253cSXin Liclass GAGeneration(Generation):
149*760c253cSXin Li    """The Genetic Algorithm."""
150*760c253cSXin Li
151*760c253cSXin Li    # The value checks whether the algorithm has converged and arrives at a fixed
152*760c253cSXin Li    # point. If STOP_THRESHOLD of generations have not seen any performance
153*760c253cSXin Li    # improvement, the Genetic Algorithm stops.
154*760c253cSXin Li    STOP_THRESHOLD = None
155*760c253cSXin Li
156*760c253cSXin Li    # Number of tasks in each generation.
157*760c253cSXin Li    NUM_CHROMOSOMES = None
158*760c253cSXin Li
159*760c253cSXin Li    # The value checks whether the algorithm has converged and arrives at a fixed
160*760c253cSXin Li    # point. If NUM_TRIALS of trials have been attempted to generate a new task
161*760c253cSXin Li    # without a success, the Genetic Algorithm stops.
162*760c253cSXin Li    NUM_TRIALS = None
163*760c253cSXin Li
164*760c253cSXin Li    # The flags that can be used to generate new tasks.
165*760c253cSXin Li    SPECS = None
166*760c253cSXin Li
167*760c253cSXin Li    # What fraction of genes to mutate.
168*760c253cSXin Li    MUTATION_RATE = 0
169*760c253cSXin Li
170*760c253cSXin Li    @staticmethod
171*760c253cSXin Li    def InitMetaData(
172*760c253cSXin Li        stop_threshold, num_chromosomes, num_trials, specs, mutation_rate
173*760c253cSXin Li    ):
174*760c253cSXin Li        """Set up the meta data for the Genetic Algorithm.
175*760c253cSXin Li
176*760c253cSXin Li        Args:
177*760c253cSXin Li          stop_threshold: The number of generations, upon which no performance has
178*760c253cSXin Li            seen, the Genetic Algorithm stops.
179*760c253cSXin Li          num_chromosomes: Number of tasks in each generation.
180*760c253cSXin Li          num_trials: The number of trials, upon which new task has been tried to
181*760c253cSXin Li            generated without success, the Genetic Algorithm stops.
182*760c253cSXin Li          specs: The flags that can be used to generate new tasks.
183*760c253cSXin Li          mutation_rate: What fraction of genes to mutate.
184*760c253cSXin Li        """
185*760c253cSXin Li
186*760c253cSXin Li        GAGeneration.STOP_THRESHOLD = stop_threshold
187*760c253cSXin Li        GAGeneration.NUM_CHROMOSOMES = num_chromosomes
188*760c253cSXin Li        GAGeneration.NUM_TRIALS = num_trials
189*760c253cSXin Li        GAGeneration.SPECS = specs
190*760c253cSXin Li        GAGeneration.MUTATION_RATE = mutation_rate
191*760c253cSXin Li
192*760c253cSXin Li    def __init__(self, tasks, parents, total_stucks):
193*760c253cSXin Li        """Set up the meta data for the Genetic Algorithm.
194*760c253cSXin Li
195*760c253cSXin Li        Args:
196*760c253cSXin Li          tasks: A set of tasks to be run.
197*760c253cSXin Li          parents: A set of tasks from which this new generation is produced. This
198*760c253cSXin Li            set also contains the best tasks generated so far.
199*760c253cSXin Li          total_stucks: The number of generations that have not seen improvement.
200*760c253cSXin Li            The Genetic Algorithm will stop once the total_stucks equals to
201*760c253cSXin Li            NUM_TRIALS defined in the GAGeneration class.
202*760c253cSXin Li        """
203*760c253cSXin Li
204*760c253cSXin Li        Generation.__init__(self, tasks, parents)
205*760c253cSXin Li        self._total_stucks = total_stucks
206*760c253cSXin Li
207*760c253cSXin Li    def IsImproved(self):
208*760c253cSXin Li        """True if this generation has improvement upon its parent generation."""
209*760c253cSXin Li
210*760c253cSXin Li        tasks = self.Pool()
211*760c253cSXin Li        parents = self.CandidatePool()
212*760c253cSXin Li
213*760c253cSXin Li        # The first generate does not have parents.
214*760c253cSXin Li        if not parents:
215*760c253cSXin Li            return True
216*760c253cSXin Li
217*760c253cSXin Li        # Found out whether a task has improvement upon the best task in the
218*760c253cSXin Li        # parent generation.
219*760c253cSXin Li        best_parent = sorted(parents, key=lambda task: task.GetTestResult())[0]
220*760c253cSXin Li        best_current = sorted(tasks, key=lambda task: task.GetTestResult())[0]
221*760c253cSXin Li
222*760c253cSXin Li        # At least one task has improvement.
223*760c253cSXin Li        if best_current.IsImproved(best_parent):
224*760c253cSXin Li            self._total_stucks = 0
225*760c253cSXin Li            return True
226*760c253cSXin Li
227*760c253cSXin Li        # If STOP_THRESHOLD of generations have no improvement, the algorithm stops.
228*760c253cSXin Li        if self._total_stucks >= GAGeneration.STOP_THRESHOLD:
229*760c253cSXin Li            return False
230*760c253cSXin Li
231*760c253cSXin Li        self._total_stucks += 1
232*760c253cSXin Li        return True
233*760c253cSXin Li
234*760c253cSXin Li    def Next(self, cache):
235*760c253cSXin Li        """Calculate the next generation.
236*760c253cSXin Li
237*760c253cSXin Li        Generate a new generation from the a set of tasks. This set contains the
238*760c253cSXin Li          best set seen so far and the tasks executed in the parent generation.
239*760c253cSXin Li
240*760c253cSXin Li        Args:
241*760c253cSXin Li          cache: A set of tasks that have been generated before.
242*760c253cSXin Li
243*760c253cSXin Li        Returns:
244*760c253cSXin Li          A set of new generations.
245*760c253cSXin Li        """
246*760c253cSXin Li
247*760c253cSXin Li        target_len = GAGeneration.NUM_CHROMOSOMES
248*760c253cSXin Li        specs = GAGeneration.SPECS
249*760c253cSXin Li        mutation_rate = GAGeneration.MUTATION_RATE
250*760c253cSXin Li
251*760c253cSXin Li        # Collect a set of size target_len of tasks. This set will be used to
252*760c253cSXin Li        # produce a new generation of tasks.
253*760c253cSXin Li        gen_tasks = [task for task in self.Pool()]
254*760c253cSXin Li
255*760c253cSXin Li        parents = self.CandidatePool()
256*760c253cSXin Li        if parents:
257*760c253cSXin Li            gen_tasks.extend(parents)
258*760c253cSXin Li
259*760c253cSXin Li        # A set of tasks that are the best. This set will be used as the parent
260*760c253cSXin Li        # generation to produce the next generation.
261*760c253cSXin Li        sort_func = lambda task: task.GetTestResult()
262*760c253cSXin Li        retained_tasks = sorted(gen_tasks, key=sort_func)[:target_len]
263*760c253cSXin Li
264*760c253cSXin Li        child_pool = set()
265*760c253cSXin Li        for father in retained_tasks:
266*760c253cSXin Li            num_trials = 0
267*760c253cSXin Li            # Try num_trials times to produce a new child.
268*760c253cSXin Li            while num_trials < GAGeneration.NUM_TRIALS:
269*760c253cSXin Li                # Randomly select another parent.
270*760c253cSXin Li                mother = random.choice(retained_tasks)
271*760c253cSXin Li                # Cross over.
272*760c253cSXin Li                child = mother.ReproduceWith(father, specs, mutation_rate)
273*760c253cSXin Li                if child not in child_pool and child not in cache:
274*760c253cSXin Li                    child_pool.add(child)
275*760c253cSXin Li                    break
276*760c253cSXin Li                else:
277*760c253cSXin Li                    num_trials += 1
278*760c253cSXin Li
279*760c253cSXin Li        num_trials = 0
280*760c253cSXin Li
281*760c253cSXin Li        while (
282*760c253cSXin Li            len(child_pool) < target_len
283*760c253cSXin Li            and num_trials < GAGeneration.NUM_TRIALS
284*760c253cSXin Li        ):
285*760c253cSXin Li            for keep_task in retained_tasks:
286*760c253cSXin Li                # Mutation.
287*760c253cSXin Li                child = RandomMutate(specs, keep_task.GetFlags(), mutation_rate)
288*760c253cSXin Li                if child not in child_pool and child not in cache:
289*760c253cSXin Li                    child_pool.add(child)
290*760c253cSXin Li                    if len(child_pool) >= target_len:
291*760c253cSXin Li                        break
292*760c253cSXin Li                else:
293*760c253cSXin Li                    num_trials += 1
294*760c253cSXin Li
295*760c253cSXin Li        # If NUM_TRIALS of tries have been attempted without generating a set of new
296*760c253cSXin Li        # tasks, the algorithm stops.
297*760c253cSXin Li        if num_trials >= GAGeneration.NUM_TRIALS:
298*760c253cSXin Li            return []
299*760c253cSXin Li
300*760c253cSXin Li        assert len(child_pool) == target_len
301*760c253cSXin Li
302*760c253cSXin Li        return [
303*760c253cSXin Li            GAGeneration(child_pool, set(retained_tasks), self._total_stucks)
304*760c253cSXin Li        ]
305