34void TMVA_Higgs_Classification() {
38 bool useLikelihood =
true;
39 bool useLikelihoodKDE =
false;
40 bool useFischer =
true;
49 auto outputFile =
TFile::Open(
"Higgs_ClassificationOutput.root",
"RECREATE");
51 TMVA::Factory factory(
"TMVA_Higgs_Classification", outputFile,
52 "!V:ROC:!Silent:Color:AnalysisType=Classification" );
62 TString inputFileName =
"Higgs_data.root";
63 TString inputFileLink =
"http://root.cern.ch/files/" + inputFileName;
65 TFile *inputFile =
nullptr;
74 Info(
"TMVA_Higgs_Classification",
"Download Higgs_data.root file");
75 inputFile =
TFile::Open(inputFileLink,
"CACHEREAD");
79 Error(
"TMVA_Higgs_Classification",
"Input file cannot be downloaded - exit");
139 "nTrain_Signal=7000:nTrain_Background=7000:SplitMode=Random:NormMode=NumEvents:!V" );
153 "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
156if (useLikelihoodKDE) {
158 "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" );
164 factory.BookMethod(loader,
TMVA::Types::kFisher,
"Fisher",
"H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
170 "!V:NTrees=200:MinNodeSize=2.5%:MaxDepth=2:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );
176 "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=N+5:TestRate=5:!UseRegulator" );
246 bool useDLGPU =
false;
252 TString inputLayoutString =
"InputLayout=1|1|7";
253 TString batchLayoutString=
"BatchLayout=1|128|7";
254 TString layoutString (
"Layout=DENSE|64|TANH,DENSE|64|TANH,DENSE|64|TANH,DENSE|64|TANH,DENSE|1|LINEAR");
257 TString training1(
"LearningRate=1e-3,Momentum=0.9,Repetitions=1,"
258 "ConvergenceSteps=10,BatchSize=128,TestRepetitions=1,"
259 "MaxEpochs=30,WeightDecay=1e-4,Regularization=None,"
260 "Optimizer=ADAM,DropConfig=0.0+0.0+0.0+0.");
266 TString trainingStrategyString (
"TrainingStrategy=");
267 trainingStrategyString += training1;
271 TString dnnOptions (
"!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=G:"
272 "WeightInitialization=XAVIER");
273 dnnOptions.Append (
":"); dnnOptions.Append (inputLayoutString);
274 dnnOptions.Append (
":"); dnnOptions.Append (batchLayoutString);
275 dnnOptions.Append (
":"); dnnOptions.Append (layoutString);
276 dnnOptions.Append (
":"); dnnOptions.Append (trainingStrategyString);
278 TString dnnMethodName =
"DNN_CPU";
280 dnnOptions +=
":Architecture=GPU";
281 dnnMethodName =
"DNN_GPU";
283 dnnOptions +=
":Architecture=CPU";
297 factory.TrainAllMethods();
305 factory.TestAllMethods();
307 factory.EvaluateAllMethods();
311 auto c1 = factory.GetROCCurve(loader);
void Info(const char *location, const char *msgfmt,...)
void Error(const char *location, const char *msgfmt,...)
R__EXTERN TSystem * gSystem
A specialized string object used for TTree selections.
TObject * Get(const char *namecycle) override
Return pointer to object identified by namecycle.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
This is the main MVA steering class.
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
A TTree represents a columnar dataset.
virtual void Print(Option_t *option="") const
Print a summary of the tree contents.