Logo ROOT  
Reference Guide
GeneticAlgorithm.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Peter Speckmayer
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TMVA::GeneticAlgorithm *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
15 * *
16 * Copyright (c) 2005: *
17 * CERN, Switzerland *
18 * MPI-K Heidelberg, Germany *
19 * *
20 * Redistribution and use in source and binary forms, with or without *
21 * modification, are permitted according to the terms listed in LICENSE *
22 * (http://tmva.sourceforge.net/LICENSE) *
23 **********************************************************************************/
24
25/*! \class TMVA::GeneticAlgorithm
26\ingroup TMVA
27
28Base definition for genetic algorithm
29
30*/
31
32#include <iostream>
33#include <algorithm>
34#include <float.h>
35
36#ifdef _GLIBCXX_PARALLEL
37#include <omp.h>
38#endif
39
41#include "TMVA/Interval.h"
42#include "TMVA/IFitterTarget.h"
43#include "TMVA/MsgLogger.h"
44#include "TMVA/Types.h"
45
46#include "RtypesCore.h"
47#include "Rtypes.h"
48#include "TMath.h"
49
50namespace TMVA {
51 const Bool_t GeneticAlgorithm__DEBUG__ = kFALSE;
52}
53
55
56////////////////////////////////////////////////////////////////////////////////
57/// Constructor
58///
59/// Parameters:
60///
61/// - int populationSize : defines the number of "Individuals" which are created and tested
62/// within one Generation (Iteration of the Evolution)
63/// - std::vector<TMVA::Interval*> ranges : Interval holds the information of an interval, where the GetMin
64/// gets the low and GetMax gets the high constraint of the variable
65/// the size of "ranges" is the number of coefficients which are optimised
66/// Purpose:
67///
68/// Creates a random population with individuals of the size ranges.size()
69
71 const std::vector<Interval*>& ranges, UInt_t seed )
72: fConvCounter(-1),
73 fFitterTarget( target ),
74 fConvValue(0.),
75 fLastResult(DBL_MAX),
76 fSpread(0.1),
77 fMirror(kTRUE),
78 fFirstTime(kTRUE),
79 fMakeCopies(kFALSE),
80 fPopulationSize(populationSize),
81 fRanges( ranges ),
82 fPopulation(ranges, populationSize, seed),
83 fBestFitness(DBL_MAX),
84 fLogger( new MsgLogger("GeneticAlgorithm") )
85{
87}
88
90{
91 // destructor; deletes fLogger
92 delete fLogger;
93}
94
95
96////////////////////////////////////////////////////////////////////////////////
97/// calls evolution, but if it is not the first time.
98/// If it's the first time, the random population created by the
99/// constructor is still not evaluated, .. therefore we wait for the
100/// second time init is called.
101
103{
104 if ( fFirstTime ) fFirstTime = kFALSE;
105 else {
106 Evolution();
107 }
108}
109
110////////////////////////////////////////////////////////////////////////////////
111/// if the "fitnessFunction" is called multiple times for one set of
112/// factors (because i.e. each event of a TTree has to be assessed with
113/// each set of Factors proposed by the Genetic Algorithm) the value
114/// of the current calculation has to be added(? or else) to the value
115/// obtained up to now.
116/// example: some chi-square is calculated for every event,
117/// after every event the new chi-square (newValue) has to be simply
118/// added to the oldValue.
119///
120/// this function has to be overridden eventually
121/// it might contain only the following return statement.
122/// return oldValue + newValue;
123
125{
126 return newValue;
127}
128
129////////////////////////////////////////////////////////////////////////////////
130/// starts the evaluation of the fitness of all different individuals of
131/// the population.
132///
133/// this function calls implicitly (many times) the "fitnessFunction" which
134/// has been overridden by the user.
135
137{
138 fBestFitness = DBL_MAX;
139#ifdef _GLIBCXX_PARALLEL
140
141 const int nt = omp_get_num_threads();
142 Double_t bests[nt];
143 for ( int i =0; i < nt; ++i )
144 bests[i] = fBestFitness;
145
146#pragma omp parallel
147 {
148 int thread_number = omp_get_thread_num();
149#pragma omp for
150 for ( int index = 0; index < fPopulation.GetPopulationSize(); ++index )
151 {
152 GeneticGenes* genes = fPopulation.GetGenes(index);
153 Double_t fitness = NewFitness( genes->GetFitness(),
154 fFitterTarget.EstimatorFunction(genes->GetFactors()) );
155 genes->SetFitness( fitness );
156
157 if ( bests[thread_number] > fitness )
158 bests[thread_number] = fitness;
159 }
160 }
161
162 fBestFitness = *std::min_element(bests, bests+nt);
163
164#else
165
166 for ( int index = 0; index < fPopulation.GetPopulationSize(); ++index ) {
167 GeneticGenes* genes = fPopulation.GetGenes(index);
168 Double_t fitness = NewFitness( genes->GetFitness(),
169 fFitterTarget.EstimatorFunction(genes->GetFactors()) );
170 genes->SetFitness( fitness );
171
172 if ( fBestFitness > fitness )
173 fBestFitness = fitness;
174
175 }
176
177#endif
178
179 fPopulation.Sort();
180
181 return fBestFitness;
182}
183
184////////////////////////////////////////////////////////////////////////////////
185/// this function is called from "init" and controls the evolution of the
186/// individuals.
187///
188/// The function can be overridden to change the parameters for mutation rate
189/// sexual reproduction and so on.
190
192{
193 if ( fMakeCopies )
194 fPopulation.MakeCopies( 5 );
195 fPopulation.MakeChildren();
196
197 fPopulation.Mutate( 10, 3, kTRUE, fSpread, fMirror );
198 fPopulation.Mutate( 40, fPopulation.GetPopulationSize()*3/4 );
199}
200
201////////////////////////////////////////////////////////////////////////////////
202/// this function provides the ability to change the stepSize of a mutation according to
203/// the success of the last generations.
204///
205/// Parameters:
206///
207/// - int ofSteps : = if OF the number of STEPS given in this variable (ofSteps)
208/// - int successSteps : >sucessSteps Generations could improve the result
209/// - double factor : than multiply the stepSize ( spread ) by this factor
210///
211/// (if ofSteps == successSteps nothing is changed, if ofSteps < successSteps, the spread
212/// is divided by the factor)
213///
214/// using this function one can increase the stepSize of the mutation when we have
215/// good success (to pass fast through the easy phase-space) and reduce the stepSize
216/// if we are in a difficult "territory" of the phase-space.
217
219{
220 // < is valid for "less" comparison
221 if ( fBestFitness < fLastResult || fSuccessList.size() <=0 ) {
222 fLastResult = fBestFitness;
223 fSuccessList.push_front( 1 ); // it got better
224 }
225 else {
226 fSuccessList.push_front( 0 ); // it stayed the same
227 }
228 Int_t n = 0;
229 Int_t sum = 0;
230 std::deque<Int_t>::iterator vec = fSuccessList.begin();
231 for (; vec != fSuccessList.end() ; ++vec) {
232 sum += *vec;
233 n++;
234 }
235
236 if ( n >= ofSteps ) {
237 fSuccessList.pop_back();
238 if ( sum > successSteps ) { // too much success
239 fSpread /= factor;
240 if (GeneticAlgorithm__DEBUG__) Log() << kINFO << ">" << std::flush;
241 }
242 else if ( sum == successSteps ) { // on the optimal path
243 if (GeneticAlgorithm__DEBUG__) Log() << "=" << std::flush;
244 }
245 else { // not very successful
246 fSpread *= factor;
247 if (GeneticAlgorithm__DEBUG__) Log() << "<" << std::flush;
248 }
249 }
250
251 return fSpread;
252}
253
254////////////////////////////////////////////////////////////////////////////////
255/// gives back true if the last "steps" steps have lead to an improvement of the
256/// "fitness" of the "individuals" of at least "improvement"
257///
258/// this gives a simple measure of if the fitness of the individuals is
259/// converging and no major improvement is to be expected soon.
260
262{
263 if (fConvCounter < 0) {
264 fConvValue = fBestFitness;
265 }
266 if (TMath::Abs(fBestFitness - fConvValue) <= improvement || steps<0) {
267 fConvCounter ++;
268 }
269 else {
270 fConvCounter = 0;
271 fConvValue = fBestFitness;
272 }
273 if (GeneticAlgorithm__DEBUG__) Log() << "." << std::flush;
274 if (fConvCounter < steps) return kFALSE;
275 return kTRUE;
276}
const Bool_t kFALSE
Definition: RtypesCore.h:90
bool Bool_t
Definition: RtypesCore.h:61
double Double_t
Definition: RtypesCore.h:57
const Bool_t kTRUE
Definition: RtypesCore.h:89
#define ClassImp(name)
Definition: Rtypes.h:361
Base definition for genetic algorithm.
virtual Double_t SpreadControl(Int_t steps, Int_t ofSteps, Double_t factor)
this function provides the ability to change the stepSize of a mutation according to the success of t...
virtual Bool_t HasConverged(Int_t steps=10, Double_t ratio=0.1)
gives back true if the last "steps" steps have lead to an improvement of the "fitness" of the "indivi...
GeneticPopulation fPopulation
void Init()
calls evolution, but if it is not the first time.
virtual void Evolution()
this function is called from "init" and controls the evolution of the individuals.
virtual Double_t CalculateFitness()
starts the evaluation of the fitness of all different individuals of the population.
GeneticAlgorithm(IFitterTarget &target, Int_t populationSize, const std::vector< TMVA::Interval * > &ranges, UInt_t seed=0)
Constructor.
virtual Double_t NewFitness(Double_t oldValue, Double_t newValue)
if the "fitnessFunction" is called multiple times for one set of factors (because i....
Cut optimisation interface class for genetic algorithm.
Definition: GeneticGenes.h:41
void SetFitness(Double_t fitness)
Definition: GeneticGenes.h:51
std::vector< Double_t > & GetFactors()
Definition: GeneticGenes.h:49
Double_t GetFitness() const
Definition: GeneticGenes.h:52
void SetRandomSeed(UInt_t seed=0)
the random seed of the random generator
Interface for a fitter 'target'.
Definition: IFitterTarget.h:44
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
const Int_t n
Definition: legend1.C:16
create variable transformations
Double_t Log(Double_t x)
Definition: TMath.h:750
Short_t Abs(Short_t d)
Definition: TMathBase.h:120
static long int sum(long int i)
Definition: Factory.cxx:2275