Logo ROOT   6.14/05
Reference Guide
df007_snapshot.py
Go to the documentation of this file.
1 ## \file
2 ## \ingroup tutorial_dataframe
3 ## \notebook
4 ## This tutorial shows how to write out datasets in ROOT formatusing the RDataFrame
5 ## \macro_code
6 ##
7 ## \date April 2017
8 ## \author Danilo Piparo
9 
10 import ROOT
11 
12 # A simple helper function to fill a test tree: this makes the example stand-alone.
13 def fill_tree(treeName, fileName):
14  tdf = ROOT.ROOT.RDataFrame(10000)
15  tdf.Define("b1", "(int) tdfentry_")\
16  .Define("b2", "(float) tdfentry_ * tdfentry_").Snapshot(treeName, fileName)
17 
18 # We prepare an input tree to run on
19 fileName = "df007_snapshot_py.root"
20 outFileName = "df007_snapshot_output_py.root"
21 outFileNameAllColumns = "df007_snapshot_output_allColumns_py.root"
22 treeName = "myTree"
23 fill_tree(treeName, fileName)
24 
25 # We read the tree from the file and create a RDataFrame.
26 RDF = ROOT.ROOT.RDataFrame
27 d = RDF(treeName, fileName)
28 
29 # ## Select entries
30 # We now select some entries in the dataset
31 d_cut = d.Filter("b1 % 2 == 0")
32 # ## Enrich the dataset
33 # Build some temporary columns: we'll write them out
34 
35 getVector_code ='''
36 std::vector<float> getVector (float b2)
37 {
38  std::vector<float> v;
39  for (int i = 0; i < 3; i++) v.push_back(b2*i);
40  return v;
41 }
42 '''
43 ROOT.gInterpreter.Declare(getVector_code)
44 
45 d2 = d_cut.Define("b1_square", "b1 * b1") \
46  .Define("b2_vector", "getVector( b2 )")
47 
48 # ## Write it to disk in ROOT format
49 # We now write to disk a new dataset with one of the variables originally
50 # present in the tree and the new variables.
51 # The user can explicitly specify the types of the columns as template
52 # arguments of the Snapshot method, otherwise they will be automatically
53 # inferred.
54 branchList = ROOT.vector('string')()
55 for branchName in ["b1", "b1_square", "b2_vector"]:
56  branchList.push_back(branchName)
57 d2.Snapshot(treeName, outFileName, branchList)
58 
59 # Open the new file and list the columns of the tree
60 f1 = ROOT.TFile(outFileName)
61 t = f1.myTree
62 print("These are the columns b1, b1_square and b2_vector:")
63 for branch in t.GetListOfBranches():
64  print("Branch: %s" %branch.GetName())
65 
66 f1.Close()
67 
68 # We are not forced to write the full set of column names. We can also
69 # specify a regular expression for that. In case nothing is specified, all
70 # columns are persistified.
71 d2.Snapshot(treeName, outFileNameAllColumns)
72 
73 # Open the new file and list the columns of the tree
74 f2 = ROOT.TFile(outFileNameAllColumns)
75 t = f2.myTree
76 print("These are all the columns available to this tdf:")
77 for branch in t.GetListOfBranches():
78  print("Branch: %s" %branch.GetName())
79 
80 f2.Close()
81 
82 # We can also get a fresh RDataFrame out of the snapshot and restart the
83 # analysis chain from it.
84 
85 branchList.clear()
86 branchList.push_back("b1_square")
87 snapshot_tdf = d2.Snapshot(treeName, outFileName, branchList);
88 h = snapshot_tdf.Histo1D("b1_square")
89 c = ROOT.TCanvas()
90 h.Draw()
91