1*760c253cSXin Li# Copyright 2013 The ChromiumOS Authors 2*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be 3*760c253cSXin Li# found in the LICENSE file. 4*760c253cSXin Li"""The hill genetic algorithm. 5*760c253cSXin Li 6*760c253cSXin LiPart of the Chrome build flags optimization. 7*760c253cSXin Li""" 8*760c253cSXin Li 9*760c253cSXin Li__author__ = "[email protected] (Yuheng Long)" 10*760c253cSXin Li 11*760c253cSXin Liimport random 12*760c253cSXin Li 13*760c253cSXin Liimport flags 14*760c253cSXin Lifrom flags import Flag 15*760c253cSXin Lifrom flags import FlagSet 16*760c253cSXin Lifrom generation import Generation 17*760c253cSXin Lifrom task import Task 18*760c253cSXin Li 19*760c253cSXin Li 20*760c253cSXin Lidef CrossoverWith(first_flag, second_flag): 21*760c253cSXin Li """Get a crossed over gene. 22*760c253cSXin Li 23*760c253cSXin Li At present, this just picks either/or of these values. However, it could be 24*760c253cSXin Li implemented as an integer maskover effort, if required. 25*760c253cSXin Li 26*760c253cSXin Li Args: 27*760c253cSXin Li first_flag: The first gene (Flag) to cross over with. 28*760c253cSXin Li second_flag: The second gene (Flag) to cross over with. 29*760c253cSXin Li 30*760c253cSXin Li Returns: 31*760c253cSXin Li A Flag that can be considered appropriately randomly blended between the 32*760c253cSXin Li first and second input flag. 33*760c253cSXin Li """ 34*760c253cSXin Li 35*760c253cSXin Li return first_flag if random.randint(0, 1) else second_flag 36*760c253cSXin Li 37*760c253cSXin Li 38*760c253cSXin Lidef RandomMutate(specs, flag_set, mutation_rate): 39*760c253cSXin Li """Randomly mutate the content of a task. 40*760c253cSXin Li 41*760c253cSXin Li Args: 42*760c253cSXin Li specs: A list of spec from which the flag set is created. 43*760c253cSXin Li flag_set: The current flag set being mutated 44*760c253cSXin Li mutation_rate: What fraction of genes to mutate. 45*760c253cSXin Li 46*760c253cSXin Li Returns: 47*760c253cSXin Li A Genetic Task constructed by randomly mutating the input flag set. 48*760c253cSXin Li """ 49*760c253cSXin Li 50*760c253cSXin Li results_flags = [] 51*760c253cSXin Li 52*760c253cSXin Li for spec in specs: 53*760c253cSXin Li # Randomly choose whether this flag should be mutated. 54*760c253cSXin Li if random.randint(0, int(1 / mutation_rate)): 55*760c253cSXin Li continue 56*760c253cSXin Li 57*760c253cSXin Li # If the flag is not already in the flag set, it is added. 58*760c253cSXin Li if spec not in flag_set: 59*760c253cSXin Li results_flags.append(Flag(spec)) 60*760c253cSXin Li continue 61*760c253cSXin Li 62*760c253cSXin Li # If the flag is already in the flag set, it is mutated. 63*760c253cSXin Li numeric_flag_match = flags.Search(spec) 64*760c253cSXin Li 65*760c253cSXin Li # The value of a numeric flag will be changed, and a boolean flag will be 66*760c253cSXin Li # dropped. 67*760c253cSXin Li if not numeric_flag_match: 68*760c253cSXin Li continue 69*760c253cSXin Li 70*760c253cSXin Li value = flag_set[spec].GetValue() 71*760c253cSXin Li 72*760c253cSXin Li # Randomly select a nearby value of the current value of the flag. 73*760c253cSXin Li rand_arr = [value] 74*760c253cSXin Li if value + 1 < int(numeric_flag_match.group("end")): 75*760c253cSXin Li rand_arr.append(value + 1) 76*760c253cSXin Li 77*760c253cSXin Li rand_arr.append(value - 1) 78*760c253cSXin Li value = random.sample(rand_arr, 1)[0] 79*760c253cSXin Li 80*760c253cSXin Li # If the value is smaller than the start of the spec, this flag will be 81*760c253cSXin Li # dropped. 82*760c253cSXin Li if value != int(numeric_flag_match.group("start")) - 1: 83*760c253cSXin Li results_flags.append(Flag(spec, value)) 84*760c253cSXin Li 85*760c253cSXin Li return GATask(FlagSet(results_flags)) 86*760c253cSXin Li 87*760c253cSXin Li 88*760c253cSXin Liclass GATask(Task): 89*760c253cSXin Li def __init__(self, flag_set): 90*760c253cSXin Li Task.__init__(self, flag_set) 91*760c253cSXin Li 92*760c253cSXin Li def ReproduceWith(self, other, specs, mutation_rate): 93*760c253cSXin Li """Reproduce with other FlagSet. 94*760c253cSXin Li 95*760c253cSXin Li Args: 96*760c253cSXin Li other: A FlagSet to reproduce with. 97*760c253cSXin Li specs: A list of spec from which the flag set is created. 98*760c253cSXin Li mutation_rate: one in mutation_rate flags will be mutated (replaced by a 99*760c253cSXin Li random version of the same flag, instead of one from either of the 100*760c253cSXin Li parents). Set to 0 to disable mutation. 101*760c253cSXin Li 102*760c253cSXin Li Returns: 103*760c253cSXin Li A GA task made by mixing self with other. 104*760c253cSXin Li """ 105*760c253cSXin Li 106*760c253cSXin Li # Get the flag dictionary. 107*760c253cSXin Li father_flags = self.GetFlags().GetFlags() 108*760c253cSXin Li mother_flags = other.GetFlags().GetFlags() 109*760c253cSXin Li 110*760c253cSXin Li # Flags that are common in both parents and flags that belong to only one 111*760c253cSXin Li # parent. 112*760c253cSXin Li self_flags = [] 113*760c253cSXin Li other_flags = [] 114*760c253cSXin Li common_flags = [] 115*760c253cSXin Li 116*760c253cSXin Li # Find out flags that are common to both parent and flags that belong soly 117*760c253cSXin Li # to one parent. 118*760c253cSXin Li for self_flag in father_flags: 119*760c253cSXin Li if self_flag in mother_flags: 120*760c253cSXin Li common_flags.append(self_flag) 121*760c253cSXin Li else: 122*760c253cSXin Li self_flags.append(self_flag) 123*760c253cSXin Li 124*760c253cSXin Li for other_flag in mother_flags: 125*760c253cSXin Li if other_flag not in father_flags: 126*760c253cSXin Li other_flags.append(other_flag) 127*760c253cSXin Li 128*760c253cSXin Li # Randomly select flags that belong to only one parent. 129*760c253cSXin Li output_flags = [ 130*760c253cSXin Li father_flags[f] for f in self_flags if random.randint(0, 1) 131*760c253cSXin Li ] 132*760c253cSXin Li others = [mother_flags[f] for f in other_flags if random.randint(0, 1)] 133*760c253cSXin Li output_flags.extend(others) 134*760c253cSXin Li # Turn on flags that belong to both parent. Randomly choose the value of the 135*760c253cSXin Li # flag from either parent. 136*760c253cSXin Li for flag in common_flags: 137*760c253cSXin Li output_flags.append( 138*760c253cSXin Li CrossoverWith(father_flags[flag], mother_flags[flag]) 139*760c253cSXin Li ) 140*760c253cSXin Li 141*760c253cSXin Li # Mutate flags 142*760c253cSXin Li if mutation_rate: 143*760c253cSXin Li return RandomMutate(specs, FlagSet(output_flags), mutation_rate) 144*760c253cSXin Li 145*760c253cSXin Li return GATask(FlagSet(output_flags)) 146*760c253cSXin Li 147*760c253cSXin Li 148*760c253cSXin Liclass GAGeneration(Generation): 149*760c253cSXin Li """The Genetic Algorithm.""" 150*760c253cSXin Li 151*760c253cSXin Li # The value checks whether the algorithm has converged and arrives at a fixed 152*760c253cSXin Li # point. If STOP_THRESHOLD of generations have not seen any performance 153*760c253cSXin Li # improvement, the Genetic Algorithm stops. 154*760c253cSXin Li STOP_THRESHOLD = None 155*760c253cSXin Li 156*760c253cSXin Li # Number of tasks in each generation. 157*760c253cSXin Li NUM_CHROMOSOMES = None 158*760c253cSXin Li 159*760c253cSXin Li # The value checks whether the algorithm has converged and arrives at a fixed 160*760c253cSXin Li # point. If NUM_TRIALS of trials have been attempted to generate a new task 161*760c253cSXin Li # without a success, the Genetic Algorithm stops. 162*760c253cSXin Li NUM_TRIALS = None 163*760c253cSXin Li 164*760c253cSXin Li # The flags that can be used to generate new tasks. 165*760c253cSXin Li SPECS = None 166*760c253cSXin Li 167*760c253cSXin Li # What fraction of genes to mutate. 168*760c253cSXin Li MUTATION_RATE = 0 169*760c253cSXin Li 170*760c253cSXin Li @staticmethod 171*760c253cSXin Li def InitMetaData( 172*760c253cSXin Li stop_threshold, num_chromosomes, num_trials, specs, mutation_rate 173*760c253cSXin Li ): 174*760c253cSXin Li """Set up the meta data for the Genetic Algorithm. 175*760c253cSXin Li 176*760c253cSXin Li Args: 177*760c253cSXin Li stop_threshold: The number of generations, upon which no performance has 178*760c253cSXin Li seen, the Genetic Algorithm stops. 179*760c253cSXin Li num_chromosomes: Number of tasks in each generation. 180*760c253cSXin Li num_trials: The number of trials, upon which new task has been tried to 181*760c253cSXin Li generated without success, the Genetic Algorithm stops. 182*760c253cSXin Li specs: The flags that can be used to generate new tasks. 183*760c253cSXin Li mutation_rate: What fraction of genes to mutate. 184*760c253cSXin Li """ 185*760c253cSXin Li 186*760c253cSXin Li GAGeneration.STOP_THRESHOLD = stop_threshold 187*760c253cSXin Li GAGeneration.NUM_CHROMOSOMES = num_chromosomes 188*760c253cSXin Li GAGeneration.NUM_TRIALS = num_trials 189*760c253cSXin Li GAGeneration.SPECS = specs 190*760c253cSXin Li GAGeneration.MUTATION_RATE = mutation_rate 191*760c253cSXin Li 192*760c253cSXin Li def __init__(self, tasks, parents, total_stucks): 193*760c253cSXin Li """Set up the meta data for the Genetic Algorithm. 194*760c253cSXin Li 195*760c253cSXin Li Args: 196*760c253cSXin Li tasks: A set of tasks to be run. 197*760c253cSXin Li parents: A set of tasks from which this new generation is produced. This 198*760c253cSXin Li set also contains the best tasks generated so far. 199*760c253cSXin Li total_stucks: The number of generations that have not seen improvement. 200*760c253cSXin Li The Genetic Algorithm will stop once the total_stucks equals to 201*760c253cSXin Li NUM_TRIALS defined in the GAGeneration class. 202*760c253cSXin Li """ 203*760c253cSXin Li 204*760c253cSXin Li Generation.__init__(self, tasks, parents) 205*760c253cSXin Li self._total_stucks = total_stucks 206*760c253cSXin Li 207*760c253cSXin Li def IsImproved(self): 208*760c253cSXin Li """True if this generation has improvement upon its parent generation.""" 209*760c253cSXin Li 210*760c253cSXin Li tasks = self.Pool() 211*760c253cSXin Li parents = self.CandidatePool() 212*760c253cSXin Li 213*760c253cSXin Li # The first generate does not have parents. 214*760c253cSXin Li if not parents: 215*760c253cSXin Li return True 216*760c253cSXin Li 217*760c253cSXin Li # Found out whether a task has improvement upon the best task in the 218*760c253cSXin Li # parent generation. 219*760c253cSXin Li best_parent = sorted(parents, key=lambda task: task.GetTestResult())[0] 220*760c253cSXin Li best_current = sorted(tasks, key=lambda task: task.GetTestResult())[0] 221*760c253cSXin Li 222*760c253cSXin Li # At least one task has improvement. 223*760c253cSXin Li if best_current.IsImproved(best_parent): 224*760c253cSXin Li self._total_stucks = 0 225*760c253cSXin Li return True 226*760c253cSXin Li 227*760c253cSXin Li # If STOP_THRESHOLD of generations have no improvement, the algorithm stops. 228*760c253cSXin Li if self._total_stucks >= GAGeneration.STOP_THRESHOLD: 229*760c253cSXin Li return False 230*760c253cSXin Li 231*760c253cSXin Li self._total_stucks += 1 232*760c253cSXin Li return True 233*760c253cSXin Li 234*760c253cSXin Li def Next(self, cache): 235*760c253cSXin Li """Calculate the next generation. 236*760c253cSXin Li 237*760c253cSXin Li Generate a new generation from the a set of tasks. This set contains the 238*760c253cSXin Li best set seen so far and the tasks executed in the parent generation. 239*760c253cSXin Li 240*760c253cSXin Li Args: 241*760c253cSXin Li cache: A set of tasks that have been generated before. 242*760c253cSXin Li 243*760c253cSXin Li Returns: 244*760c253cSXin Li A set of new generations. 245*760c253cSXin Li """ 246*760c253cSXin Li 247*760c253cSXin Li target_len = GAGeneration.NUM_CHROMOSOMES 248*760c253cSXin Li specs = GAGeneration.SPECS 249*760c253cSXin Li mutation_rate = GAGeneration.MUTATION_RATE 250*760c253cSXin Li 251*760c253cSXin Li # Collect a set of size target_len of tasks. This set will be used to 252*760c253cSXin Li # produce a new generation of tasks. 253*760c253cSXin Li gen_tasks = [task for task in self.Pool()] 254*760c253cSXin Li 255*760c253cSXin Li parents = self.CandidatePool() 256*760c253cSXin Li if parents: 257*760c253cSXin Li gen_tasks.extend(parents) 258*760c253cSXin Li 259*760c253cSXin Li # A set of tasks that are the best. This set will be used as the parent 260*760c253cSXin Li # generation to produce the next generation. 261*760c253cSXin Li sort_func = lambda task: task.GetTestResult() 262*760c253cSXin Li retained_tasks = sorted(gen_tasks, key=sort_func)[:target_len] 263*760c253cSXin Li 264*760c253cSXin Li child_pool = set() 265*760c253cSXin Li for father in retained_tasks: 266*760c253cSXin Li num_trials = 0 267*760c253cSXin Li # Try num_trials times to produce a new child. 268*760c253cSXin Li while num_trials < GAGeneration.NUM_TRIALS: 269*760c253cSXin Li # Randomly select another parent. 270*760c253cSXin Li mother = random.choice(retained_tasks) 271*760c253cSXin Li # Cross over. 272*760c253cSXin Li child = mother.ReproduceWith(father, specs, mutation_rate) 273*760c253cSXin Li if child not in child_pool and child not in cache: 274*760c253cSXin Li child_pool.add(child) 275*760c253cSXin Li break 276*760c253cSXin Li else: 277*760c253cSXin Li num_trials += 1 278*760c253cSXin Li 279*760c253cSXin Li num_trials = 0 280*760c253cSXin Li 281*760c253cSXin Li while ( 282*760c253cSXin Li len(child_pool) < target_len 283*760c253cSXin Li and num_trials < GAGeneration.NUM_TRIALS 284*760c253cSXin Li ): 285*760c253cSXin Li for keep_task in retained_tasks: 286*760c253cSXin Li # Mutation. 287*760c253cSXin Li child = RandomMutate(specs, keep_task.GetFlags(), mutation_rate) 288*760c253cSXin Li if child not in child_pool and child not in cache: 289*760c253cSXin Li child_pool.add(child) 290*760c253cSXin Li if len(child_pool) >= target_len: 291*760c253cSXin Li break 292*760c253cSXin Li else: 293*760c253cSXin Li num_trials += 1 294*760c253cSXin Li 295*760c253cSXin Li # If NUM_TRIALS of tries have been attempted without generating a set of new 296*760c253cSXin Li # tasks, the algorithm stops. 297*760c253cSXin Li if num_trials >= GAGeneration.NUM_TRIALS: 298*760c253cSXin Li return [] 299*760c253cSXin Li 300*760c253cSXin Li assert len(child_pool) == target_len 301*760c253cSXin Li 302*760c253cSXin Li return [ 303*760c253cSXin Li GAGeneration(child_pool, set(retained_tasks), self._total_stucks) 304*760c253cSXin Li ] 305