Logo ROOT  
Reference Guide
GeneticAlgorithm.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Peter Speckmayer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : TMVA::GeneticAlgorithm *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
15  * *
16  * Copyright (c) 2005: *
17  * CERN, Switzerland *
18  * MPI-K Heidelberg, Germany *
19  * *
20  * Redistribution and use in source and binary forms, with or without *
21  * modification, are permitted according to the terms listed in LICENSE *
22  * (http://tmva.sourceforge.net/LICENSE) *
23  **********************************************************************************/
24 
25 /*! \class TMVA::GeneticAlgorithm
26 \ingroup TMVA
27 
28 Base definition for genetic algorithm
29 
30 */
31 
32 #include <algorithm>
33 #include <cfloat>
34 
35 #ifdef _GLIBCXX_PARALLEL
36 #include <omp.h>
37 #endif
38 
39 #include "TMVA/GeneticAlgorithm.h"
40 #include "TMVA/Interval.h"
41 #include "TMVA/IFitterTarget.h"
42 #include "TMVA/MsgLogger.h"
43 #include "TMVA/Types.h"
44 
45 #include "RtypesCore.h"
46 #include "Rtypes.h"
47 #include "TMath.h"
48 
49 namespace TMVA {
50  const Bool_t GeneticAlgorithm__DEBUG__ = kFALSE;
51 }
52 
54 
55 ////////////////////////////////////////////////////////////////////////////////
56 /// Constructor
57 ///
58 /// Parameters:
59 ///
60 /// - int populationSize : defines the number of "Individuals" which are created and tested
61 /// within one Generation (Iteration of the Evolution)
62 /// - std::vector<TMVA::Interval*> ranges : Interval holds the information of an interval, where the GetMin
63 /// gets the low and GetMax gets the high constraint of the variable
64 /// the size of "ranges" is the number of coefficients which are optimised
65 /// Purpose:
66 ///
67 /// Creates a random population with individuals of the size ranges.size()
68 
70  const std::vector<Interval*>& ranges, UInt_t seed )
71 : fConvCounter(-1),
72  fFitterTarget( target ),
73  fConvValue(0.),
74  fLastResult(DBL_MAX),
75  fSpread(0.1),
76  fMirror(kTRUE),
77  fFirstTime(kTRUE),
78  fMakeCopies(kFALSE),
79  fPopulationSize(populationSize),
80  fRanges( ranges ),
81  fPopulation(ranges, populationSize, seed),
82  fBestFitness(DBL_MAX),
83  fLogger( new MsgLogger("GeneticAlgorithm") )
84 {
85  fPopulation.SetRandomSeed( seed );
86 }
87 
89 {
90  // destructor; deletes fLogger
91  delete fLogger;
92 }
93 
94 
95 ////////////////////////////////////////////////////////////////////////////////
96 /// calls evolution, but if it is not the first time.
97 /// If it's the first time, the random population created by the
98 /// constructor is still not evaluated, .. therefore we wait for the
99 /// second time init is called.
100 
102 {
103  if ( fFirstTime ) fFirstTime = kFALSE;
104  else {
105  Evolution();
106  }
107 }
108 
109 ////////////////////////////////////////////////////////////////////////////////
110 /// if the "fitnessFunction" is called multiple times for one set of
111 /// factors (because i.e. each event of a TTree has to be assessed with
112 /// each set of Factors proposed by the Genetic Algorithm) the value
113 /// of the current calculation has to be added(? or else) to the value
114 /// obtained up to now.
115 /// example: some chi-square is calculated for every event,
116 /// after every event the new chi-square (newValue) has to be simply
117 /// added to the oldValue.
118 ///
119 /// this function has to be overridden eventually
120 /// it might contain only the following return statement.
121 /// return oldValue + newValue;
122 
124 {
125  return newValue;
126 }
127 
128 ////////////////////////////////////////////////////////////////////////////////
129 /// starts the evaluation of the fitness of all different individuals of
130 /// the population.
131 ///
132 /// this function calls implicitly (many times) the "fitnessFunction" which
133 /// has been overridden by the user.
134 
136 {
137  fBestFitness = DBL_MAX;
138 #ifdef _GLIBCXX_PARALLEL
139 
140  const int nt = omp_get_num_threads();
141  Double_t bests[nt];
142  for ( int i =0; i < nt; ++i )
143  bests[i] = fBestFitness;
144 
145 #pragma omp parallel
146  {
147  int thread_number = omp_get_thread_num();
148 #pragma omp for
149  for ( int index = 0; index < fPopulation.GetPopulationSize(); ++index )
150  {
151  GeneticGenes* genes = fPopulation.GetGenes(index);
152  Double_t fitness = NewFitness( genes->GetFitness(),
153  fFitterTarget.EstimatorFunction(genes->GetFactors()) );
154  genes->SetFitness( fitness );
155 
156  if ( bests[thread_number] > fitness )
157  bests[thread_number] = fitness;
158  }
159  }
160 
161  fBestFitness = *std::min_element(bests, bests+nt);
162 
163 #else
164 
165  for ( int index = 0; index < fPopulation.GetPopulationSize(); ++index ) {
166  GeneticGenes* genes = fPopulation.GetGenes(index);
167  Double_t fitness = NewFitness( genes->GetFitness(),
168  fFitterTarget.EstimatorFunction(genes->GetFactors()) );
169  genes->SetFitness( fitness );
170 
171  if ( fBestFitness > fitness )
172  fBestFitness = fitness;
173 
174  }
175 
176 #endif
177 
178  fPopulation.Sort();
179 
180  return fBestFitness;
181 }
182 
183 ////////////////////////////////////////////////////////////////////////////////
184 /// this function is called from "init" and controls the evolution of the
185 /// individuals.
186 ///
187 /// The function can be overridden to change the parameters for mutation rate
188 /// sexual reproduction and so on.
189 
191 {
192  if ( fMakeCopies )
193  fPopulation.MakeCopies( 5 );
194  fPopulation.MakeChildren();
195 
196  fPopulation.Mutate( 10, 3, kTRUE, fSpread, fMirror );
197  fPopulation.Mutate( 40, fPopulation.GetPopulationSize()*3/4 );
198 }
199 
200 ////////////////////////////////////////////////////////////////////////////////
201 /// this function provides the ability to change the stepSize of a mutation according to
202 /// the success of the last generations.
203 ///
204 /// Parameters:
205 ///
206 /// - int ofSteps : = if OF the number of STEPS given in this variable (ofSteps)
207 /// - int successSteps : >sucessSteps Generations could improve the result
208 /// - double factor : than multiply the stepSize ( spread ) by this factor
209 ///
210 /// (if ofSteps == successSteps nothing is changed, if ofSteps < successSteps, the spread
211 /// is divided by the factor)
212 ///
213 /// using this function one can increase the stepSize of the mutation when we have
214 /// good success (to pass fast through the easy phase-space) and reduce the stepSize
215 /// if we are in a difficult "territory" of the phase-space.
216 
218 {
219  // < is valid for "less" comparison
220  if ( fBestFitness < fLastResult || fSuccessList.size() <=0 ) {
221  fLastResult = fBestFitness;
222  fSuccessList.push_front( 1 ); // it got better
223  }
224  else {
225  fSuccessList.push_front( 0 ); // it stayed the same
226  }
227  Int_t n = 0;
228  Int_t sum = 0;
229  std::deque<Int_t>::iterator vec = fSuccessList.begin();
230  for (; vec != fSuccessList.end() ; ++vec) {
231  sum += *vec;
232  n++;
233  }
234 
235  if ( n >= ofSteps ) {
236  fSuccessList.pop_back();
237  if ( sum > successSteps ) { // too much success
238  fSpread /= factor;
239  if (GeneticAlgorithm__DEBUG__) Log() << kINFO << ">" << std::flush;
240  }
241  else if ( sum == successSteps ) { // on the optimal path
242  if (GeneticAlgorithm__DEBUG__) Log() << "=" << std::flush;
243  }
244  else { // not very successful
245  fSpread *= factor;
246  if (GeneticAlgorithm__DEBUG__) Log() << "<" << std::flush;
247  }
248  }
249 
250  return fSpread;
251 }
252 
253 ////////////////////////////////////////////////////////////////////////////////
254 /// gives back true if the last "steps" steps have lead to an improvement of the
255 /// "fitness" of the "individuals" of at least "improvement"
256 ///
257 /// this gives a simple measure of if the fitness of the individuals is
258 /// converging and no major improvement is to be expected soon.
259 
261 {
262  if (fConvCounter < 0) {
263  fConvValue = fBestFitness;
264  }
265  if (TMath::Abs(fBestFitness - fConvValue) <= improvement || steps<0) {
266  fConvCounter ++;
267  }
268  else {
269  fConvCounter = 0;
270  fConvValue = fBestFitness;
271  }
272  if (GeneticAlgorithm__DEBUG__) Log() << "." << std::flush;
273  if (fConvCounter < steps) return kFALSE;
274  return kTRUE;
275 }
n
const Int_t n
Definition: legend1.C:16
kTRUE
const Bool_t kTRUE
Definition: RtypesCore.h:91
TMVA::GeneticAlgorithm::SpreadControl
virtual Double_t SpreadControl(Int_t steps, Int_t ofSteps, Double_t factor)
this function provides the ability to change the stepSize of a mutation according to the success of t...
Definition: GeneticAlgorithm.cxx:217
TMVA::GeneticPopulation::SetRandomSeed
void SetRandomSeed(UInt_t seed=0)
the random seed of the random generator
Definition: GeneticPopulation.cxx:93
ClassImp
#define ClassImp(name)
Definition: Rtypes.h:364
TMVA::GeneticAlgorithm::GeneticAlgorithm
GeneticAlgorithm(IFitterTarget &target, Int_t populationSize, const std::vector< TMVA::Interval * > &ranges, UInt_t seed=0)
Constructor.
Definition: GeneticAlgorithm.cxx:69
TMVA::GeneticAlgorithm::NewFitness
virtual Double_t NewFitness(Double_t oldValue, Double_t newValue)
if the "fitnessFunction" is called multiple times for one set of factors (because i....
Definition: GeneticAlgorithm.cxx:123
sum
static uint64_t sum(uint64_t i)
Definition: Factory.cxx:2345
TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:760
TMVA::GeneticAlgorithm
Base definition for genetic algorithm.
Definition: GeneticAlgorithm.h:50
TMath::Abs
Short_t Abs(Short_t d)
Definition: TMathBase.h:120
TMVA::GeneticAlgorithm::Init
void Init()
calls evolution, but if it is not the first time.
Definition: GeneticAlgorithm.cxx:101
Bool_t
bool Bool_t
Definition: RtypesCore.h:63
bool
TMVA::GeneticGenes::GetFitness
Double_t GetFitness() const
Definition: GeneticGenes.h:52
MsgLogger.h
kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:92
TMVA::GeneticGenes::SetFitness
void SetFitness(Double_t fitness)
Definition: GeneticGenes.h:51
TMVA::GeneticGenes
Cut optimisation interface class for genetic algorithm.
Definition: GeneticGenes.h:41
TMVA::GeneticAlgorithm::HasConverged
virtual Bool_t HasConverged(Int_t steps=10, Double_t ratio=0.1)
gives back true if the last "steps" steps have lead to an improvement of the "fitness" of the "indivi...
Definition: GeneticAlgorithm.cxx:260
Types.h
TMVA::GeneticGenes::GetFactors
std::vector< Double_t > & GetFactors()
Definition: GeneticGenes.h:49
unsigned int
Double_t
double Double_t
Definition: RtypesCore.h:59
TMVA::GeneticAlgorithm::CalculateFitness
virtual Double_t CalculateFitness()
starts the evaluation of the fitness of all different individuals of the population.
Definition: GeneticAlgorithm.cxx:135
IFitterTarget.h
RtypesCore.h
TMVA::MsgLogger
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
TMVA::GeneticAlgorithm::Evolution
virtual void Evolution()
this function is called from "init" and controls the evolution of the individuals.
Definition: GeneticAlgorithm.cxx:190
GeneticAlgorithm.h
TMVA::IFitterTarget
Interface for a fitter 'target'.
Definition: IFitterTarget.h:44
Rtypes.h
TMVA::GeneticAlgorithm::~GeneticAlgorithm
virtual ~GeneticAlgorithm()
Definition: GeneticAlgorithm.cxx:88
TMath.h
Interval.h
TMVA
create variable transformations
Definition: GeneticMinimizer.h:22
TMVA::GeneticAlgorithm::fPopulation
GeneticPopulation fPopulation
Definition: GeneticAlgorithm.h:98
int