Logo ROOT   6.07/09
Reference Guide
GeneticAlgorithm.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Peter Speckmayer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : TMVA::GeneticAlgorithm *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
15  * *
16  * Copyright (c) 2005: *
17  * CERN, Switzerland *
18  * MPI-K Heidelberg, Germany *
19  * *
20  * Redistribution and use in source and binary forms, with or without *
21  * modification, are permitted according to the terms listed in LICENSE *
22  * (http://tmva.sourceforge.net/LICENSE) *
23  **********************************************************************************/
24 
25 //_______________________________________________________________________
26 //
27 // Base definition for genetic algorithm
28 //_______________________________________________________________________
29 
30 #include <iostream>
31 #include <algorithm>
32 #include <float.h>
33 
34 #ifdef _GLIBCXX_PARALLEL
35 #include <omp.h>
36 #endif
37 
38 #include "TMVA/GeneticAlgorithm.h"
39 #include "TMVA/Interval.h"
40 #include "TMVA/IFitterTarget.h"
41 #include "TMVA/MsgLogger.h"
42 #include "TMVA/Types.h"
43 
44 #include "RtypesCore.h"
45 #include "Rtypes.h"
46 #include "TMath.h"
47 
48 namespace TMVA {
50 }
51 
53 
54 ////////////////////////////////////////////////////////////////////////////////
55 /// Constructor
56 /// Parameters:
57 /// int populationSize : defines the number of "Individuals" which are created and tested
58 /// within one Generation (Iteration of the Evolution)
59 /// std::vector<TMVA::Interval*> ranges : Interval holds the information of an interval, where the GetMin
60 /// gets the low and GetMax gets the high constraint of the variable
61 /// the size of "ranges" is the number of coefficients which are optimised
62 /// Purpose:
63 /// Creates a random population with individuals of the size ranges.size()
64 
66  const std::vector<Interval*>& ranges, UInt_t seed )
67 : fConvCounter(-1),
68  fFitterTarget( target ),
69  fConvValue(0.),
70  fLastResult(DBL_MAX),
71  fSpread(0.1),
72  fMirror(kTRUE),
73  fFirstTime(kTRUE),
74  fMakeCopies(kFALSE),
75  fPopulationSize(populationSize),
76  fRanges( ranges ),
77  fPopulation(ranges, populationSize, seed),
78  fBestFitness(DBL_MAX),
79  fLogger( new MsgLogger("GeneticAlgorithm") )
80 {
81  fPopulation.SetRandomSeed( seed );
82 }
83 
85 {
86  // destructor; deletes fLogger
87  delete fLogger;
88 }
89 
90 
91 ////////////////////////////////////////////////////////////////////////////////
92 /// calls evolution, but if it is not the first time.
93 /// If it's the first time, the random population created by the
94 /// constructor is still not evaluated, .. therefore we wait for the
95 /// second time init is called.
96 
98 {
99  if ( fFirstTime ) fFirstTime = kFALSE;
100  else {
101  Evolution();
102  }
103 }
104 
105 ////////////////////////////////////////////////////////////////////////////////
106 /// if the "fitnessFunction" is called multiple times for one set of
107 /// factors (because i.e. each event of a TTree has to be assessed with
108 /// each set of Factors proposed by the Genetic Algorithm) the value
109 /// of the current calculation has to be added(? or else) to the value
110 /// obtained up to now.
111 /// example: some chi-square is calculated for every event,
112 /// after every event the new chi-square (newValue) has to be simply
113 /// added to the oldValue.
114 ///
115 /// this function has to be overridden eventually
116 /// it might contain only the following return statement.
117 /// return oldValue + newValue;
118 
120 {
121  return newValue;
122 }
123 
124 ////////////////////////////////////////////////////////////////////////////////
125 /// starts the evaluation of the fitness of all different individuals of
126 /// the population.
127 ///
128 /// this function calls implicitly (many times) the "fitnessFunction" which
129 /// has been overridden by the user.
130 
132 {
133  fBestFitness = DBL_MAX;
134 #ifdef _GLIBCXX_PARALLEL
135 
136  const int nt = omp_get_num_threads();
137  Double_t bests[nt];
138  for ( int i =0; i < nt; ++i )
139  bests[i] = fBestFitness;
140 
141 #pragma omp parallel
142  {
143  int thread_number = omp_get_thread_num();
144 #pragma omp for
145  for ( int index = 0; index < fPopulation.GetPopulationSize(); ++index )
146  {
147  GeneticGenes* genes = fPopulation.GetGenes(index);
148  Double_t fitness = NewFitness( genes->GetFitness(),
149  fFitterTarget.EstimatorFunction(genes->GetFactors()) );
150  genes->SetFitness( fitness );
151 
152  if ( bests[thread_number] > fitness )
153  bests[thread_number] = fitness;
154  }
155  }
156 
157  fBestFitness = *std::min_element(bests, bests+nt);
158 
159 #else
160 
161  for ( int index = 0; index < fPopulation.GetPopulationSize(); ++index ) {
162  GeneticGenes* genes = fPopulation.GetGenes(index);
163  Double_t fitness = NewFitness( genes->GetFitness(),
164  fFitterTarget.EstimatorFunction(genes->GetFactors()) );
165  genes->SetFitness( fitness );
166 
167  if ( fBestFitness > fitness )
168  fBestFitness = fitness;
169 
170  }
171 
172 #endif
173 
174  fPopulation.Sort();
175 
176  return fBestFitness;
177 }
178 
179 ////////////////////////////////////////////////////////////////////////////////
180 /// this function is called from "init" and controls the evolution of the
181 /// individuals.
182 /// the function can be overridden to change the parameters for mutation rate
183 /// sexual reproduction and so on.
184 
186 {
187  if ( fMakeCopies )
188  fPopulation.MakeCopies( 5 );
189  fPopulation.MakeChildren();
190 
191  fPopulation.Mutate( 10, 3, kTRUE, fSpread, fMirror );
192  fPopulation.Mutate( 40, fPopulation.GetPopulationSize()*3/4 );
193 }
194 
195 ////////////////////////////////////////////////////////////////////////////////
196 /// this function provides the ability to change the stepSize of a mutation according to
197 /// the success of the last generations.
198 ///
199 /// Parameters:
200 /// int ofSteps : = if OF the number of STEPS given in this variable (ofSteps)
201 /// int successSteps : >sucessSteps Generations could improve the result
202 /// double factor : than multiply the stepSize ( spread ) by this factor
203 /// (if ofSteps == successSteps nothing is changed, if ofSteps < successSteps, the spread
204 /// is divided by the factor)
205 ///
206 /// using this function one can increase the stepSize of the mutation when we have
207 /// good success (to pass fast through the easy phase-space) and reduce the stepSize
208 /// if we are in a difficult "territory" of the phase-space.
209 ///
210 
212 {
213  // < is valid for "less" comparison
214  if ( fBestFitness < fLastResult || fSuccessList.size() <=0 ) {
215  fLastResult = fBestFitness;
216  fSuccessList.push_front( 1 ); // it got better
217  }
218  else {
219  fSuccessList.push_front( 0 ); // it stayed the same
220  }
221  Int_t n = 0;
222  Int_t sum = 0;
223  std::deque<Int_t>::iterator vec = fSuccessList.begin();
224  for (; vec != fSuccessList.end() ; vec++) {
225  sum += *vec;
226  n++;
227  }
228 
229  if ( n >= ofSteps ) {
230  fSuccessList.pop_back();
231  if ( sum > successSteps ) { // too much success
232  fSpread /= factor;
233  if (GeneticAlgorithm__DEBUG__) Log() << kINFO << ">" << std::flush;
234  }
235  else if ( sum == successSteps ) { // on the optimal path
236  if (GeneticAlgorithm__DEBUG__) Log() << "=" << std::flush;
237  }
238  else { // not very successful
239  fSpread *= factor;
240  if (GeneticAlgorithm__DEBUG__) Log() << "<" << std::flush;
241  }
242  }
243 
244  return fSpread;
245 }
246 
247 ////////////////////////////////////////////////////////////////////////////////
248 /// gives back true if the last "steps" steps have lead to an improvement of the
249 /// "fitness" of the "individuals" of at least "improvement"
250 ///
251 /// this gives a simple measure of if the fitness of the individuals is
252 /// converging and no major improvement is to be expected soon.
253 ///
254 
256 {
257  if (fConvCounter < 0) {
258  fConvValue = fBestFitness;
259  }
260  if (TMath::Abs(fBestFitness - fConvValue) <= improvement || steps<0) {
261  fConvCounter ++;
262  }
263  else {
264  fConvCounter = 0;
265  fConvValue = fBestFitness;
266  }
267  if (GeneticAlgorithm__DEBUG__) Log() << "." << std::flush;
268  if (fConvCounter < steps) return kFALSE;
269  return kTRUE;
270 }
static long int sum(long int i)
Definition: Factory.cxx:1785
Double_t Log(Double_t x)
Definition: TMath.h:526
virtual Double_t CalculateFitness()
starts the evaluation of the fitness of all different individuals of the population.
virtual Double_t SpreadControl(Int_t steps, Int_t ofSteps, Double_t factor)
this function provides the ability to change the stepSize of a mutation according to the success of t...
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
STL namespace.
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
void SetFitness(Double_t fitness)
Definition: GeneticGenes.h:53
unsigned int UInt_t
Definition: RtypesCore.h:42
#define ClassImp(name)
Definition: Rtypes.h:279
double Double_t
Definition: RtypesCore.h:55
Double_t GetFitness() const
Definition: GeneticGenes.h:54
virtual Bool_t HasConverged(Int_t steps=10, Double_t ratio=0.1)
gives back true if the last "steps" steps have lead to an improvement of the "fitness" of the "indivi...
std::vector< Double_t > & GetFactors()
Definition: GeneticGenes.h:51
virtual void Evolution()
this function is called from "init" and controls the evolution of the individuals.
Abstract ClassifierFactory template that handles arbitrary types.
virtual Double_t NewFitness(Double_t oldValue, Double_t newValue)
if the "fitnessFunction" is called multiple times for one set of factors (because i...
const Bool_t kTRUE
Definition: Rtypes.h:91
void Init()
calls evolution, but if it is not the first time.
const Int_t n
Definition: legend1.C:16
const Bool_t GeneticAlgorithm__DEBUG__