Logo ROOT   6.18/05
Reference Guide
df007_snapshot.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_dataframe
3## \notebook -draw
4## This tutorial shows how to write out datasets in ROOT formatusing the RDataFrame
5## \macro_code
6##
7## \date April 2017
8## \author Danilo Piparo
9
10import ROOT
11
12# A simple helper function to fill a test tree: this makes the example stand-alone.
13def fill_tree(treeName, fileName):
14 tdf = ROOT.ROOT.RDataFrame(10000)
15 tdf.Define("b1", "(int) tdfentry_")\
16 .Define("b2", "(float) tdfentry_ * tdfentry_").Snapshot(treeName, fileName)
17
18# We prepare an input tree to run on
19fileName = "df007_snapshot_py.root"
20outFileName = "df007_snapshot_output_py.root"
21outFileNameAllColumns = "df007_snapshot_output_allColumns_py.root"
22treeName = "myTree"
23fill_tree(treeName, fileName)
24
25# We read the tree from the file and create a RDataFrame.
26RDF = ROOT.ROOT.RDataFrame
27d = RDF(treeName, fileName)
28
29# ## Select entries
30# We now select some entries in the dataset
31d_cut = d.Filter("b1 % 2 == 0")
32# ## Enrich the dataset
33# Build some temporary columns: we'll write them out
34
35getVector_code ='''
36std::vector<float> getVector (float b2)
37{
38 std::vector<float> v;
39 for (int i = 0; i < 3; i++) v.push_back(b2*i);
40 return v;
41}
42'''
43ROOT.gInterpreter.Declare(getVector_code)
44
45d2 = d_cut.Define("b1_square", "b1 * b1") \
46 .Define("b2_vector", "getVector( b2 )")
47
48# ## Write it to disk in ROOT format
49# We now write to disk a new dataset with one of the variables originally
50# present in the tree and the new variables.
51# The user can explicitly specify the types of the columns as template
52# arguments of the Snapshot method, otherwise they will be automatically
53# inferred.
54branchList = ROOT.vector('string')()
55for branchName in ["b1", "b1_square", "b2_vector"]:
56 branchList.push_back(branchName)
57d2.Snapshot(treeName, outFileName, branchList)
58
59# Open the new file and list the columns of the tree
60f1 = ROOT.TFile(outFileName)
61t = f1.myTree
62print("These are the columns b1, b1_square and b2_vector:")
63for branch in t.GetListOfBranches():
64 print("Branch: %s" %branch.GetName())
65
66f1.Close()
67
68# We are not forced to write the full set of column names. We can also
69# specify a regular expression for that. In case nothing is specified, all
70# columns are persistified.
71d2.Snapshot(treeName, outFileNameAllColumns)
72
73# Open the new file and list the columns of the tree
74f2 = ROOT.TFile(outFileNameAllColumns)
75t = f2.myTree
76print("These are all the columns available to this tdf:")
77for branch in t.GetListOfBranches():
78 print("Branch: %s" %branch.GetName())
79
80f2.Close()
81
82# We can also get a fresh RDataFrame out of the snapshot and restart the
83# analysis chain from it.
84
85branchList.clear()
86branchList.push_back("b1_square")
87snapshot_tdf = d2.Snapshot(treeName, outFileName, branchList);
88h = snapshot_tdf.Histo1D("b1_square")
89c = ROOT.TCanvas()
90h.Draw()
91