Logo ROOT  
Reference Guide
GeneticPopulation.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Peter Speckmayer
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TMVA::GeneticPopulation *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
15 * *
16 * Copyright (c) 2005: *
17 * CERN, Switzerland *
18 * MPI-K Heidelberg, Germany *
19 * *
20 * Redistribution and use in source and binary forms, with or without *
21 * modification, are permitted according to the terms listed in LICENSE *
22 * (http://tmva.sourceforge.net/LICENSE) *
23 **********************************************************************************/
24
25/*! \class TMVA::GeneticPopulation
26\ingroup TMVA
27
28Population definition for genetic algorithm.
29
30*/
31
32#include <iostream>
33#include <iomanip>
34
35#include "Rstrstream.h"
36#include "TSystem.h"
37#include "TRandom3.h"
38#include "TH1.h"
39#include <algorithm>
40
42#include "TMVA/GeneticGenes.h"
43#include "TMVA/MsgLogger.h"
44
46
47using namespace std;
48
49////////////////////////////////////////////////////////////////////////////////
50/// Constructor
51
52TMVA::GeneticPopulation::GeneticPopulation(const std::vector<Interval*>& ranges, Int_t size, UInt_t seed)
53 : fGenePool(size),
54 fRanges(ranges.size()),
55 fLogger( new MsgLogger("GeneticPopulation") )
56{
57 // create a randomGenerator for this population and set a seed
58 // create the genePools
59 //
60 fRandomGenerator = new TRandom3( 100 ); //please check
63
64 for ( unsigned int i = 0; i < ranges.size(); ++i )
65 fRanges[i] = new TMVA::GeneticRange( fRandomGenerator, ranges[i] );
66
67 vector<Double_t> newEntry( fRanges.size() );
68 for ( int i = 0; i < size; ++i )
69 {
70 for ( unsigned int rIt = 0; rIt < fRanges.size(); ++rIt )
71 newEntry[rIt] = fRanges[rIt]->Random();
72 fGenePool[i] = TMVA::GeneticGenes( newEntry);
73 }
74
76}
77
78////////////////////////////////////////////////////////////////////////////////
79/// destructor
80
82{
83 if (fRandomGenerator != NULL) delete fRandomGenerator;
84
85 std::vector<GeneticRange*>::iterator it = fRanges.begin();
86 for (;it!=fRanges.end(); ++it) delete *it;
87
88 delete fLogger;
89}
90
91
92
93////////////////////////////////////////////////////////////////////////////////
94/// the random seed of the random generator
95
97{
98 fRandomGenerator->SetSeed( seed );
99}
100
101////////////////////////////////////////////////////////////////////////////////
102/// Produces offspring which is are copies of their parents.
103///
104/// Parameters:
105/// - int number : the number of the last individual to be copied
106
108{
109 int i=0;
110 for (std::vector<TMVA::GeneticGenes>::iterator it = fGenePool.begin();
111 it != fGenePool.end() && i < number;
112 ++it, ++i ) {
113 GiveHint( it->GetFactors(), it->GetFitness() );
114 }
115}
116
117////////////////////////////////////////////////////////////////////////////////
118/// Creates children out of members of the current generation.
119///
120/// Children have a combination of the coefficients of their parents
121
123{
124#ifdef _GLIBCXX_PARALLEL
125#pragma omp parallel
126#pragma omp for
127#endif
128 for ( int it = 0; it < (int) (fGenePool.size() / 2); ++it )
129 {
130 Int_t pos = (Int_t)fRandomGenerator->Integer( fGenePool.size()/2 );
131 fGenePool[(fGenePool.size() / 2) + it] = MakeSex( fGenePool[it], fGenePool[pos] );
132 }
133}
134
135////////////////////////////////////////////////////////////////////////////////
136/// this function takes two individuals and produces offspring by mixing
137/// (recombining) their coefficients.
138
140 TMVA::GeneticGenes female )
141{
142 vector< Double_t > child(fRanges.size());
143 for (unsigned int i = 0; i < fRanges.size(); ++i) {
144 if (fRandomGenerator->Integer( 2 ) == 0) {
145 child[i] = male.GetFactors()[i];
146 }else{
147 child[i] = female.GetFactors()[i];
148 }
149 }
150 return TMVA::GeneticGenes( child );
151}
152
153////////////////////////////////////////////////////////////////////////////////
154/// Mutates the individuals in the genePool.
155///
156/// Parameters:
157///
158/// - double probability : gives the probability (in percent) of a mutation of a coefficient
159/// - int startIndex : leaves unchanged (without mutation) the individuals which are better ranked
160/// than indicated by "startIndex". This means: if "startIndex==3", the first (and best)
161/// three individuals are not mutated. This allows to preserve the best result of the
162/// current Generation for the next generation.
163/// - Bool_t near : if true, the mutation will produce a new coefficient which is "near" the old one
164/// (gaussian around the current value)
165/// - double spread : if near==true, spread gives the sigma of the gaussian
166/// - Bool_t mirror : if the new value obtained would be outside of the given constraints
167/// the value is mapped between the constraints again. This can be done either
168/// by a kind of periodic boundary conditions or mirrored at the boundary.
169/// (mirror = true seems more "natural")
170
171void TMVA::GeneticPopulation::Mutate( Double_t probability , Int_t startIndex,
172 Bool_t near, Double_t spread, Bool_t mirror )
173{
174 vector< Double_t>::iterator vec;
175 vector< TMVA::GeneticRange* >::iterator vecRange;
176
177 //#ifdef _GLIBCXX_PARALLEL
178 // #pragma omp parallel
179 // #pragma omp for
180 //#endif
181 // The range methods are not thread safe!
182 for (int it = startIndex; it < (int) fGenePool.size(); ++it) {
183 vecRange = fRanges.begin();
184 for (vec = (fGenePool[it].GetFactors()).begin(); vec < (fGenePool[it].GetFactors()).end(); ++vec) {
185 if (fRandomGenerator->Uniform( 100 ) <= probability) {
186 (*vec) = (*vecRange)->Random( near, (*vec), spread, mirror );
187 }
188 ++vecRange;
189 }
190 }
191}
192
193
194////////////////////////////////////////////////////////////////////////////////
195/// gives back the "Genes" of the population with the given index.
196
198{
199 return &(fGenePool[index]);
200}
201
202////////////////////////////////////////////////////////////////////////////////
203/// make a little printout of the individuals up to index "untilIndex"
204/// this means, .. write out the best "untilIndex" individuals.
205
207{
208 for ( unsigned int it = 0; it < fGenePool.size(); ++it )
209 {
210 Int_t n=0;
211 if (untilIndex >= -1 ) {
212 if (untilIndex == -1 ) return;
213 untilIndex--;
214 }
215 Log() << "fitness: " << fGenePool[it].GetFitness() << " ";
216 for (vector< Double_t >::iterator vec = fGenePool[it].GetFactors().begin();
217 vec < fGenePool[it].GetFactors().end(); ++vec ) {
218 Log() << "f_" << n++ << ": " << (*vec) << " ";
219 }
220 Log() << Endl;
221 }
222}
223
224////////////////////////////////////////////////////////////////////////////////
225/// make a little printout to the stream "out" of the individuals up to index "untilIndex"
226/// this means, .. write out the best "untilIndex" individuals.
227
228void TMVA::GeneticPopulation::Print( ostream & out, Int_t untilIndex )
229{
230 for ( unsigned int it = 0; it < fGenePool.size(); ++it ) {
231 Int_t n=0;
232 if (untilIndex >= -1 ) {
233 if (untilIndex == -1 ) return;
234 untilIndex--;
235 }
236 out << "fitness: " << fGenePool[it].GetFitness() << " ";
237 for (vector< Double_t >::iterator vec = fGenePool[it].GetFactors().begin();
238 vec < fGenePool[it].GetFactors().end(); ++vec ) {
239 out << "f_" << n++ << ": " << (*vec) << " ";
240 }
241 out << std::endl;
242 }
243}
244
245////////////////////////////////////////////////////////////////////////////////
246/// give back a histogram with the distribution of the coefficients.
247///
248/// Parameters:
249///
250/// - int bins : number of bins of the histogram
251/// - int min : histogram minimum
252/// - int max : maximum value of the histogram
253
255 Int_t min, Int_t max )
256{
257 std::cout << "FAILED! TMVA::GeneticPopulation::VariableDistribution" << std::endl;
258
259 std::stringstream histName;
260 histName.clear();
261 histName.str("v");
262 histName << varNumber;
263 TH1F *hist = new TH1F( histName.str().c_str(),histName.str().c_str(), bins,min,max );
264
265 return hist;
266}
267
268////////////////////////////////////////////////////////////////////////////////
269/// gives back all the values of coefficient "varNumber" of the current generation
270
272{
273 std::cout << "FAILED! TMVA::GeneticPopulation::VariableDistribution" << std::endl;
274
275 vector< Double_t > varDist;
276
277 return varDist;
278}
279
280////////////////////////////////////////////////////////////////////////////////
281/// add another population (strangers) to the one of this GeneticPopulation
282
284{
285 for (std::vector<TMVA::GeneticGenes>::iterator it = strangers->fGenePool.begin();
286 it != strangers->fGenePool.end(); ++it ) {
287 GiveHint( it->GetFactors(), it->GetFitness() );
288 }
289}
290
291////////////////////////////////////////////////////////////////////////////////
292/// add another population (strangers) to the one of this GeneticPopulation
293
295{
296 AddPopulation(&strangers);
297}
298
299////////////////////////////////////////////////////////////////////////////////
300/// trim the population to the predefined size
301
303{
304 std::sort(fGenePool.begin(), fGenePool.end());
305 while ( fGenePool.size() > (unsigned int) fPopulationSizeLimit )
306 fGenePool.pop_back();
307}
308
309////////////////////////////////////////////////////////////////////////////////
310/// add an individual (a set of variables) to the population
311/// if there is a set of variables which is known to perform good, they can be given as a hint to the population
312
313void TMVA::GeneticPopulation::GiveHint( std::vector< Double_t >& hint, Double_t fitness )
314{
315 TMVA::GeneticGenes g(hint);
316 g.SetFitness(fitness);
317
318 fGenePool.push_back( g );
319}
320
321////////////////////////////////////////////////////////////////////////////////
322/// sort the genepool according to the fitness of the individuals
323
325{
326 std::sort(fGenePool.begin(), fGenePool.end());
327}
328
#define g(i)
Definition: RSha256.hxx:105
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
#define ClassImp(name)
Definition: Rtypes.h:365
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:571
Cut optimisation interface class for genetic algorithm.
Definition: GeneticGenes.h:41
std::vector< Double_t > & GetFactors()
Definition: GeneticGenes.h:49
Population definition for genetic algorithm.
void Mutate(Double_t probability=20, Int_t startIndex=0, Bool_t near=kFALSE, Double_t spread=0.1, Bool_t mirror=kFALSE)
Mutates the individuals in the genePool.
virtual ~GeneticPopulation()
destructor
std::vector< TMVA::GeneticRange * > fRanges
void Sort()
sort the genepool according to the fitness of the individuals
void MakeCopies(int number)
Produces offspring which is are copies of their parents.
void TrimPopulation()
trim the population to the predefined size
GeneticGenes * GetGenes(Int_t index)
gives back the "Genes" of the population with the given index.
GeneticPopulation(const std::vector< TMVA::Interval * > &ranges, Int_t size, UInt_t seed=0)
Constructor.
void Print(Int_t untilIndex=-1)
make a little printout of the individuals up to index "untilIndex" this means, .
void MakeChildren()
Creates children out of members of the current generation.
void GiveHint(std::vector< Double_t > &hint, Double_t fitness=0)
add an individual (a set of variables) to the population if there is a set of variables which is know...
std::vector< TMVA::GeneticGenes > fGenePool
void AddPopulation(GeneticPopulation *strangers)
add another population (strangers) to the one of this GeneticPopulation
void SetRandomSeed(UInt_t seed=0)
the random seed of the random generator
GeneticGenes MakeSex(GeneticGenes male, GeneticGenes female)
this function takes two individuals and produces offspring by mixing (recombining) their coefficients...
TH1F * VariableDistribution(Int_t varNumber, Int_t bins, Int_t min, Int_t max)
give back a histogram with the distribution of the coefficients.
Range definition for genetic algorithm.
Definition: GeneticRange.h:42
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
Random number generator class based on M.
Definition: TRandom3.h:27
virtual void SetSeed(ULong_t seed=0)
Set the random generator sequence if seed is 0 (default value) a TUUID is generated and used to fill ...
Definition: TRandom3.cxx:207
virtual Double_t Uniform(Double_t x1=1)
Returns a uniform deviate on the interval (0, x1).
Definition: TRandom.cxx:635
const Int_t n
Definition: legend1.C:16
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Log(Double_t x)
Definition: TMath.h:750