Logo ROOT  
Reference Guide
df007_snapshot.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_dataframe
3/// \notebook -draw
4/// Write ROOT data with RDataFrame.
5///
6/// This tutorial shows how to write out datasets in ROOT format using the RDataFrame
7///
8/// \macro_image
9/// \macro_code
10///
11/// \date April 2017
12/// \author Danilo Piparo (CERN)
13
14// A simple helper function to fill a test tree: this makes the example
15// stand-alone.
16void fill_tree(const char *treeName, const char *fileName)
17{
18 ROOT::RDataFrame d(10000);
19 int i(0);
20 d.Define("b1", [&i]() { return i; })
21 .Define("b2",
22 [&i]() {
23 float j = i * i;
24 ++i;
25 return j;
26 })
27 .Snapshot(treeName, fileName);
28}
29
31{
32 // We prepare an input tree to run on
33 auto fileName = "df007_snapshot.root";
34 auto outFileName = "df007_snapshot_output.root";
35 auto outFileNameAllColumns = "df007_snapshot_output_allColumns.root";
36 auto treeName = "myTree";
37 fill_tree(treeName, fileName);
38
39 // We read the tree from the file and create a RDataFrame.
40 ROOT::RDataFrame d(treeName, fileName);
41
42 // ## Select entries
43 // We now select some entries in the dataset
44 auto d_cut = d.Filter("b1 % 2 == 0");
45 // ## Enrich the dataset
46 // Build some temporary columns: we'll write them out
47 auto d2 = d_cut.Define("b1_square", "b1 * b1")
48 .Define("b2_vector",
49 [](float b2) {
50 std::vector<float> v;
51 for (int i = 0; i < 3; i++)
52 v.push_back(b2 * i);
53 return v;
54 },
55 {"b2"});
56
57 // ## Write it to disk in ROOT format
58 // We now write to disk a new dataset with one of the variables originally
59 // present in the tree and the new variables.
60 // The user can explicitly specify the types of the columns as template
61 // arguments of the Snapshot method, otherwise they will be automatically
62 // inferred.
63 d2.Snapshot(treeName, outFileName, {"b1", "b1_square", "b2_vector"});
64
65 // Open the new file and list the columns of the tree
66 TFile f1(outFileName);
67 auto t = f1.Get<TTree>(treeName);
68 std::cout << "These are the columns b1, b1_square and b2_vector:" << std::endl;
69 for (auto branch : *t->GetListOfBranches()) {
70 std::cout << "Branch: " << branch->GetName() << std::endl;
71 }
72 f1.Close();
73
74 // We are not forced to write the full set of column names. We can also
75 // specify a regular expression for that. In case nothing is specified, all
76 // columns are persistified.
77 d2.Snapshot(treeName, outFileNameAllColumns);
78
79 // Open the new file and list the columns of the tree
80 TFile f2(outFileNameAllColumns);
81 t = f2.Get<TTree>(treeName);
82 std::cout << "These are all the columns available to this dataframe:" << std::endl;
83 for (auto branch : *t->GetListOfBranches()) {
84 std::cout << "Branch: " << branch->GetName() << std::endl;
85 }
86 f2.Close();
87
88 // We can also get a fresh RDataFrame out of the snapshot and restart the
89 // analysis chain from it. The default columns are the one selected.
90 // Notice also how we can decide to be more explicit with the types of the
91 // columns.
92 auto snapshot_df = d2.Snapshot<int>(treeName, outFileName, {"b1_square"});
93 auto h = snapshot_df->Histo1D();
94 auto c = new TCanvas();
95 h->DrawClone();
96
97 return 0;
98}
#define d(i)
Definition: RSha256.hxx:102
#define c(i)
Definition: RSha256.hxx:101
#define h(i)
Definition: RSha256.hxx:106
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTree,...
Definition: RDataFrame.hxx:42
The Canvas class.
Definition: TCanvas.h:23
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:54
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
A TTree represents a columnar dataset.
Definition: TTree.h:79
TF1 * f1
Definition: legend1.C:11