Logo ROOT   6.10/09
Reference Guide
GeneticAlgorithm.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Peter Speckmayer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : TMVA::GeneticAlgorithm *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
15  * *
16  * Copyright (c) 2005: *
17  * CERN, Switzerland *
18  * MPI-K Heidelberg, Germany *
19  * *
20  * Redistribution and use in source and binary forms, with or without *
21  * modification, are permitted according to the terms listed in LICENSE *
22  * (http://tmva.sourceforge.net/LICENSE) *
23  **********************************************************************************/
24 
25 /*! \class TMVA::GeneticAlgorithm
26 \ingroup TMVA
27 
28 Base definition for genetic algorithm
29 
30 */
31 
32 #include <iostream>
33 #include <algorithm>
34 #include <float.h>
35 
36 #ifdef _GLIBCXX_PARALLEL
37 #include <omp.h>
38 #endif
39 
40 #include "TMVA/GeneticAlgorithm.h"
41 #include "TMVA/Interval.h"
42 #include "TMVA/IFitterTarget.h"
43 #include "TMVA/MsgLogger.h"
44 #include "TMVA/Types.h"
45 
46 #include "RtypesCore.h"
47 #include "Rtypes.h"
48 #include "TMath.h"
49 
50 namespace TMVA {
51  const Bool_t GeneticAlgorithm__DEBUG__ = kFALSE;
52 }
53 
55 
56 ////////////////////////////////////////////////////////////////////////////////
57 /// Constructor
58 ///
59 /// Parameters:
60 ///
61 /// - int populationSize : defines the number of "Individuals" which are created and tested
62 /// within one Generation (Iteration of the Evolution)
63 /// - std::vector<TMVA::Interval*> ranges : Interval holds the information of an interval, where the GetMin
64 /// gets the low and GetMax gets the high constraint of the variable
65 /// the size of "ranges" is the number of coefficients which are optimised
66 /// Purpose:
67 ///
68 /// Creates a random population with individuals of the size ranges.size()
69 
71  const std::vector<Interval*>& ranges, UInt_t seed )
72 : fConvCounter(-1),
73  fFitterTarget( target ),
74  fConvValue(0.),
75  fLastResult(DBL_MAX),
76  fSpread(0.1),
77  fMirror(kTRUE),
78  fFirstTime(kTRUE),
79  fMakeCopies(kFALSE),
80  fPopulationSize(populationSize),
81  fRanges( ranges ),
82  fPopulation(ranges, populationSize, seed),
83  fBestFitness(DBL_MAX),
84  fLogger( new MsgLogger("GeneticAlgorithm") )
85 {
86  fPopulation.SetRandomSeed( seed );
87 }
88 
90 {
91  // destructor; deletes fLogger
92  delete fLogger;
93 }
94 
95 
96 ////////////////////////////////////////////////////////////////////////////////
97 /// calls evolution, but if it is not the first time.
98 /// If it's the first time, the random population created by the
99 /// constructor is still not evaluated, .. therefore we wait for the
100 /// second time init is called.
101 
103 {
104  if ( fFirstTime ) fFirstTime = kFALSE;
105  else {
106  Evolution();
107  }
108 }
109 
110 ////////////////////////////////////////////////////////////////////////////////
111 /// if the "fitnessFunction" is called multiple times for one set of
112 /// factors (because i.e. each event of a TTree has to be assessed with
113 /// each set of Factors proposed by the Genetic Algorithm) the value
114 /// of the current calculation has to be added(? or else) to the value
115 /// obtained up to now.
116 /// example: some chi-square is calculated for every event,
117 /// after every event the new chi-square (newValue) has to be simply
118 /// added to the oldValue.
119 ///
120 /// this function has to be overridden eventually
121 /// it might contain only the following return statement.
122 /// return oldValue + newValue;
123 
125 {
126  return newValue;
127 }
128 
129 ////////////////////////////////////////////////////////////////////////////////
130 /// starts the evaluation of the fitness of all different individuals of
131 /// the population.
132 ///
133 /// this function calls implicitly (many times) the "fitnessFunction" which
134 /// has been overridden by the user.
135 
137 {
138  fBestFitness = DBL_MAX;
139 #ifdef _GLIBCXX_PARALLEL
140 
141  const int nt = omp_get_num_threads();
142  Double_t bests[nt];
143  for ( int i =0; i < nt; ++i )
144  bests[i] = fBestFitness;
145 
146 #pragma omp parallel
147  {
148  int thread_number = omp_get_thread_num();
149 #pragma omp for
150  for ( int index = 0; index < fPopulation.GetPopulationSize(); ++index )
151  {
152  GeneticGenes* genes = fPopulation.GetGenes(index);
153  Double_t fitness = NewFitness( genes->GetFitness(),
154  fFitterTarget.EstimatorFunction(genes->GetFactors()) );
155  genes->SetFitness( fitness );
156 
157  if ( bests[thread_number] > fitness )
158  bests[thread_number] = fitness;
159  }
160  }
161 
162  fBestFitness = *std::min_element(bests, bests+nt);
163 
164 #else
165 
166  for ( int index = 0; index < fPopulation.GetPopulationSize(); ++index ) {
167  GeneticGenes* genes = fPopulation.GetGenes(index);
168  Double_t fitness = NewFitness( genes->GetFitness(),
169  fFitterTarget.EstimatorFunction(genes->GetFactors()) );
170  genes->SetFitness( fitness );
171 
172  if ( fBestFitness > fitness )
173  fBestFitness = fitness;
174 
175  }
176 
177 #endif
178 
179  fPopulation.Sort();
180 
181  return fBestFitness;
182 }
183 
184 ////////////////////////////////////////////////////////////////////////////////
185 /// this function is called from "init" and controls the evolution of the
186 /// individuals.
187 ///
188 /// The function can be overridden to change the parameters for mutation rate
189 /// sexual reproduction and so on.
190 
192 {
193  if ( fMakeCopies )
194  fPopulation.MakeCopies( 5 );
195  fPopulation.MakeChildren();
196 
197  fPopulation.Mutate( 10, 3, kTRUE, fSpread, fMirror );
198  fPopulation.Mutate( 40, fPopulation.GetPopulationSize()*3/4 );
199 }
200 
201 ////////////////////////////////////////////////////////////////////////////////
202 /// this function provides the ability to change the stepSize of a mutation according to
203 /// the success of the last generations.
204 ///
205 /// Parameters:
206 ///
207 /// - int ofSteps : = if OF the number of STEPS given in this variable (ofSteps)
208 /// - int successSteps : >sucessSteps Generations could improve the result
209 /// - double factor : than multiply the stepSize ( spread ) by this factor
210 ///
211 /// (if ofSteps == successSteps nothing is changed, if ofSteps < successSteps, the spread
212 /// is divided by the factor)
213 ///
214 /// using this function one can increase the stepSize of the mutation when we have
215 /// good success (to pass fast through the easy phase-space) and reduce the stepSize
216 /// if we are in a difficult "territory" of the phase-space.
217 
219 {
220  // < is valid for "less" comparison
221  if ( fBestFitness < fLastResult || fSuccessList.size() <=0 ) {
222  fLastResult = fBestFitness;
223  fSuccessList.push_front( 1 ); // it got better
224  }
225  else {
226  fSuccessList.push_front( 0 ); // it stayed the same
227  }
228  Int_t n = 0;
229  Int_t sum = 0;
230  std::deque<Int_t>::iterator vec = fSuccessList.begin();
231  for (; vec != fSuccessList.end() ; vec++) {
232  sum += *vec;
233  n++;
234  }
235 
236  if ( n >= ofSteps ) {
237  fSuccessList.pop_back();
238  if ( sum > successSteps ) { // too much success
239  fSpread /= factor;
240  if (GeneticAlgorithm__DEBUG__) Log() << kINFO << ">" << std::flush;
241  }
242  else if ( sum == successSteps ) { // on the optimal path
243  if (GeneticAlgorithm__DEBUG__) Log() << "=" << std::flush;
244  }
245  else { // not very successful
246  fSpread *= factor;
247  if (GeneticAlgorithm__DEBUG__) Log() << "<" << std::flush;
248  }
249  }
250 
251  return fSpread;
252 }
253 
254 ////////////////////////////////////////////////////////////////////////////////
255 /// gives back true if the last "steps" steps have lead to an improvement of the
256 /// "fitness" of the "individuals" of at least "improvement"
257 ///
258 /// this gives a simple measure of if the fitness of the individuals is
259 /// converging and no major improvement is to be expected soon.
260 
262 {
263  if (fConvCounter < 0) {
264  fConvValue = fBestFitness;
265  }
266  if (TMath::Abs(fBestFitness - fConvValue) <= improvement || steps<0) {
267  fConvCounter ++;
268  }
269  else {
270  fConvCounter = 0;
271  fConvValue = fBestFitness;
272  }
273  if (GeneticAlgorithm__DEBUG__) Log() << "." << std::flush;
274  if (fConvCounter < steps) return kFALSE;
275  return kTRUE;
276 }
static long int sum(long int i)
Definition: Factory.cxx:2162
Double_t Log(Double_t x)
Definition: TMath.h:649
virtual Double_t CalculateFitness()
starts the evaluation of the fitness of all different individuals of the population.
virtual Double_t SpreadControl(Int_t steps, Int_t ofSteps, Double_t factor)
this function provides the ability to change the stepSize of a mutation according to the success of t...
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
STL namespace.
Short_t Abs(Short_t d)
Definition: TMathBase.h:108
Cut optimisation interface class for genetic algorithm.
Definition: GeneticGenes.h:41
void SetFitness(Double_t fitness)
Definition: GeneticGenes.h:51
The TMVA::Interval Class.
Definition: Interval.h:61
unsigned int UInt_t
Definition: RtypesCore.h:42
Base definition for genetic algorithm.
Double_t GetFitness() const
Definition: GeneticGenes.h:52
const Bool_t kFALSE
Definition: RtypesCore.h:92
#define ClassImp(name)
Definition: Rtypes.h:336
double Double_t
Definition: RtypesCore.h:55
virtual Bool_t HasConverged(Int_t steps=10, Double_t ratio=0.1)
gives back true if the last "steps" steps have lead to an improvement of the "fitness" of the "indivi...
std::vector< Double_t > & GetFactors()
Definition: GeneticGenes.h:49
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
virtual void Evolution()
this function is called from "init" and controls the evolution of the individuals.
Abstract ClassifierFactory template that handles arbitrary types.
virtual Double_t NewFitness(Double_t oldValue, Double_t newValue)
if the "fitnessFunction" is called multiple times for one set of factors (because i...
Interface for a fitter &#39;target&#39;.
Definition: IFitterTarget.h:44
const Bool_t kTRUE
Definition: RtypesCore.h:91
void Init()
calls evolution, but if it is not the first time.
const Int_t n
Definition: legend1.C:16