ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
parse_CSV_file_with_TTree_ReadStream.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 import ROOT
4 import sys
5 import os
6 
8  """
9  parse_CSV_file_with_TTree_ReadStream
10  Michael Marino: mmarino@gmail.com
11 
12  This function provides an example of how one might
13  massage a csv data file to read into a ROOT TTree
14  via TTree::ReadStream. This could be useful if the
15  data read out from some DAQ program doesn't 'quite'
16  match the formatting expected by ROOT (e.g. comma-
17  separated, tab-separated with white-space strings,
18  headers not matching the expected format, etc.)
19 
20  This example is shipped with a data
21  file that looks like:
22 
23 Date/Time Synchro Capacity Temp.Cold Head Temp. Electrode HV Supply Voltage Electrode 1 Electrode 2 Electrode 3 Electrode 4
24 # Example data to read out. Some data have oddities that might need to
25 # dealt with, including the 'NaN' in Electrode 4 and the empty string in Date/Time (last row)
26 08112010.160622 7 5.719000E-10 8.790500 24.237700 -0.008332 0 0 0 0
27 8112010.160626 7 5.710000E-10 8.828400 24.237500 -0.008818 0 0 0 0
28 08112010.160626 7 5.719000E-10 8.828400 24.237500 -0.008818 0 0 0 0
29 08112010.160627 7 5.719000E-10 9.014300 24.237400 -0.028564 0 0 0 NaN
30 08112010.160627 7 5.711000E-10 8.786000 24.237400 -0.008818 0 0 0 0
31 08112010.160628 7 5.702000E-10 8.786000 24.237400 -0.009141 0 0 0 0
32 08112010.160633 7 5.710000E-10 9.016200 24.237200 -0.008818 0 0 0 0
33  7 5.710000E-10 8.903400 24.237200 -0.008818 0 0 0 0
34 
35  These data require some massaging, including:
36 
37 - Date/Time has a blank ('') entry that must be handled
38 - The headers are not in the correct format
39 - Tab-separated entries with additional white space
40 - NaN entries
41  """
42 
43  ROOT.gROOT.SetBatch()
44  # The mapping dictionary defines the proper branch names and types given a header name.
45  header_mapping_dictionary = {
46  'Date/Time' : ('Datetime' , str) ,
47  'Synchro' : ('Synchro' , int) ,
48  'Capacity' : ('Capacitance' , float) ,
49  'Temp.Cold Head' : ('TempColdHead' , float) ,
50  'Temp. Electrode' : ('TempElectrode' , float) ,
51  'HV Supply Voltage' : ('HVSupplyVoltage', float) ,
52  'Electrode 1' : ('Electrode1' , int) ,
53  'Electrode 2' : ('Electrode2' , int) ,
54  'Electrode 3' : ('Electrode3' , int) ,
55  'Electrode 4' : ('Electrode4' , int) ,
56  }
57 
58  type_mapping_dictionary = {
59  str : 'C',
60  int : 'I',
61  float : 'F'
62  }
63 
64 
65 
66  # Grab the header row of the file. In this particular example,
67  # the data are separated using tabs, but some of the header names
68  # include spaces and are not generally in the ROOT expected format, e.g.
69  #
70  # FloatData/F:StringData/C:IntData/I
71  #
72  # etc. Therefore, we grab the header_row of the file, and use
73  # a python dictionary to set up the appropriate branch descriptor
74  # line.
75 
76  # Open a file, grab the first line, strip the new lines
77  # and split it into a list along 'tab' boundaries
78  header_row = open(afile).readline().strip().split('\t')
79  # Create the branch descriptor
80  branch_descriptor = ':'.join([header_mapping_dictionary[row][0]+'/'+
81  type_mapping_dictionary[header_mapping_dictionary[row][1]]
82  for row in header_row])
83  #print branch_descriptor
84 
85  # Handling the input and output names. Using the same
86  # base name for the ROOT output file.
87  output_ROOT_file_name = os.path.splitext(afile)[0] + '.root'
88  output_file = ROOT.TFile(output_ROOT_file_name, 'recreate')
89  print "Outputting %s -> %s" % (afile, output_ROOT_file_name)
90 
91  output_tree = ROOT.TTree(tree_name, tree_name)
92  file_lines = open(afile).readlines()
93 
94  # Clean the data entries: remove the first (header) row.
95  # Ensure empty strings are tagged as such since
96  # ROOT doesn't differentiate between different types
97  # of white space. Therefore, we change all of these
98  # entries to 'empty'. Also, avoiding any lines that begin
99  # with '#'
100  file_lines = ['\t'.join([val if (val.find(' ') == -1 and val != '')
101  else 'empty' for val in line.split('\t')])
102  for line in file_lines[1:] if line[0] != '#' ]
103 
104  # Removing NaN, setting these entries to 0.0.
105  # Also joining the list of strings into one large string.
106  file_as_string = ('\n'.join(file_lines)).replace('NaN', str(0.0))
107  #print file_as_string
108 
109  # creating an istringstream to pass into ReadStream
110  istring = ROOT.istringstream(file_as_string)
111 
112  # Now read the stream
113  output_tree.ReadStream(istring, branch_descriptor)
114 
115  output_file.cd()
116  output_tree.Write()
117 
118 
119 if __name__ == '__main__':
120  if len(sys.argv) < 2:
121  print "Usage: %s file_to_parse.dat" % sys.argv[0]
122  sys.exit(1)
123  parse_CSV_file_with_TTree_ReadStream("example_tree", sys.argv[1])
124 
ClassImp(TAlienJobStatusList) void TAlienJobStatusList TString split(jobstatus->GetKey("split"))
Print information about jobs.