Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
global_temperatures.C
Go to the documentation of this file.
1// Tutorial that highlights ROOT 7 features such as RNTuple and visualizations, as well as ROOT 6 features like RDataframe.
2
3// The tutorial first uses RNTuple to ingest climate data and create a model with fields like
4// AverageTemperature. Then it uses RDataframe to process and filter the climate data for
5// average temperature per city by season. Then it does the same for average temperature
6// per city for the years between 1993-2002, and 2003-2013. Finally, the tutorial
7// visualizes this processed data through histograms.
8
9//During ROOT setup, configure the following flags: "-DCMAKE_CXX_STANDARD=14 -Droot7=ON -Dwebgui=ON"
10
11// NOTE: The RNTuple classes are experimental at this point.
12// Functionality, interface, and data format is still subject to changes.
13// Do not use for real data!
14
15// Until C++ runtime modules are universally used, we explicitly load the ntuple library. Otherwise
16// triggering autoloading from the use of templated types would require an exhaustive enumeration
17// of "all" template instances in the LinkDef file.
18R__LOAD_LIBRARY(ROOTNTuple)
19#include <ROOT/RDataFrame.hxx>
20#include <ROOT/RNTuple.hxx>
21#include <ROOT/RNTupleDS.hxx>
23#include <ROOT/RNTupleModel.hxx>
24#include <ROOT/RCanvas.hxx>
25#include <ROOT/RColor.hxx>
28#include <ROOT/RRawFile.hxx>
29#include <TH1D.h>
30#include <TLegend.h>
31#include <TSystem.h>
32
33#include <algorithm>
34#include <cassert>
35#include <cstdio>
36#include <fstream>
37#include <iostream>
38#include <memory>
39#include <string>
40#include <sstream>
41#include <stdexcept>
42#include <utility>
43#include <chrono>
44
45using Clock = std::chrono::high_resolution_clock;
47using namespace ROOT::Experimental;
48
49// Helper function to handle histogram pointer ownership.
51 auto result = std::shared_ptr<TH1D>(static_cast<TH1D *>(h.GetPtr()->Clone()));
52 result->SetDirectory(nullptr);
53 return result;
54}
55
56// Climate data is downloadable at the followink URL:
57// https://www.kaggle.com/berkeleyearth/climate-change-earth-surface-temperature-data
58// The original data set is from http://berkeleyearth.org/archive/data/
59// License CC BY-NC-SA 4.0
60constexpr const char *kRawDataUrl = "http://root.cern./files/tutorials/GlobalLandTemperaturesByCity.csv";
61constexpr const char *kNTupleFileName = "GlobalLandTemperaturesByCity.root";
62
63void Ingest() {
64 int nRecords = 0;
65 int nSkipped = 0;
66 std::cout << "Converting " << kRawDataUrl << " to " << kNTupleFileName << std::endl;
67
68 auto t1 = Clock::now();
69
70 // Create a unique pointer to an empty data model.
71 auto model = RNTupleModel::Create();
72 // To define the data model, create fields with a given C++ type and name. Fields are roughly TTree branches.
73 // MakeField returns a shared pointer to a memory location to fill the ntuple with data.
74 auto fieldYear = model->MakeField<std::uint32_t>("Year");
75 auto fieldMonth = model->MakeField<std::uint32_t>("Month");
76 auto fieldDay = model->MakeField<std::uint32_t>("Day");
77 auto fieldAvgTemp = model->MakeField<float>("AverageTemperature");
78 auto fieldTempUncrty = model->MakeField<float>("AverageTemperatureUncertainty");
79 auto fieldCity = model->MakeField<std::string>("City");
80 auto fieldCountry = model->MakeField<std::string>("Country");
81 auto fieldLat = model->MakeField<float>("Latitude");
82 auto fieldLong = model->MakeField<float>("Longitude");
83
84 // Hand-over the data model to a newly created ntuple of name "globalTempData", stored in kNTupleFileName.
85 // In return, get a unique pointer to a fillable ntuple (first compress the file).
86 RNTupleWriteOptions options;
88 auto ntuple = RNTupleWriter::Recreate(std::move(model), "GlobalTempData", kNTupleFileName, options);
89
91 std::string record;
92 constexpr int kMaxCharsPerLine = 128;
93 while (file->Readln(record)) {
94 if (record.length() >= kMaxCharsPerLine)
95 throw std::runtime_error("record too long: " + record);
96
97 // Parse lines of the form:
98 // 1743-11-01,6.068,1.7369999999999999,Ã…rhus,Denmark,57.05N,10.33E
99 // and skip records with empty fields.
100 std::replace(record.begin(), record.end(), ',', ' ');
101 char country[kMaxCharsPerLine];
102 char city[kMaxCharsPerLine];
103 int nFields = sscanf(record.c_str(), "%u-%u-%u %f %f %s %s %fN %fE",
104 fieldYear.get(), fieldMonth.get(), fieldDay.get(),
105 fieldAvgTemp.get(), fieldTempUncrty.get(), country, city,
106 fieldLat.get(), fieldLong.get());
107 if (nFields != 9) {
108 nSkipped++;
109 continue;
110 }
111 *fieldCountry = country;
112 *fieldCity = city;
113
114 ntuple->Fill();
115
116 if (++nRecords % 1000000 == 0)
117 std::cout << " ... converted " << nRecords << " records" << std::endl;
118 }
119
120 // Display the total time to process the data.
121 std::cout << nSkipped << " records skipped" << std::endl;
122 std::cout << nRecords << " records processed" << std::endl;
123
124 auto t2 = Clock::now();
125 std::cout << std::endl
126 << "Processing Time: "
127 << std::chrono::duration_cast<std::chrono::seconds>(t2 - t1).count()
128 << " seconds\n" << std::endl;
129}
130
131// Every data result that we want to get is declared first, and it is only upon their declaration that
132// they are actually used. This stems from motivations relating to efficiency and optimization.
133void Analyze() {
134 // Create a RDataframe by wrapping around NTuple.
135 auto df = ROOT::Experimental::MakeNTupleDataFrame("GlobalTempData", kNTupleFileName);
136 df.Display()->Print();
137
138 // Declare the minimum and maximum temperature from the dataset.
139 auto min_value = df.Min("AverageTemperature");
140 auto max_value = df.Max("AverageTemperature");
141
142 // Functions to filter by each season from date formatted "1944-12-01."
143 auto fnWinter = [](int month) { return month == 12 || month == 1 || month == 2; };
144 auto fnSpring = [](int month) { return month == 3 || month == 4 || month == 5; };
145 auto fnSummer = [](int month) { return month == 6 || month == 7 || month == 8; };
146 auto fnFall = [](int month) { return month == 9 || month == 10 || month == 11; };
147
148 // Create a RDataFrame per season.
149 auto dfWinter = df.Filter(fnWinter, {"Month"});
150 auto dfSpring = df.Filter(fnSpring, {"Month"});
151 auto dfSummer = df.Filter(fnSummer, {"Month"});
152 auto dfFall = df.Filter(fnFall, {"Month"});
153
154 // Get the count for each season.
155 auto winterCount = dfWinter.Count();
156 auto springCount = dfSpring.Count();
157 auto summerCount = dfSummer.Count();
158 auto fallCount = dfFall.Count();
159
160 // Functions to filter for the time period between 2003-2013, and 1993-2002.
161 auto fn1993_to_2002 = [](int year) { return year >= 1993 && year <= 2002; };
162 auto fn2003_to_2013 = [](int year) { return year >= 2003 && year <= 2013; };
163
164 // Create a RDataFrame for decades 1993_to_2002 & 2003_to_2013.
165 auto df1993_to_2002 = df.Filter(fn1993_to_2002, {"Year"});
166 auto df2003_to_2013 = df.Filter(fn2003_to_2013, {"Year"});
167
168 // Get the count for each decade.
169 auto decade_1993_to_2002_Count = *df1993_to_2002.Count();
170 auto decade_2003_to_2013_Count = *df2003_to_2013.Count();
171
172 // Configure histograms for each season.
173 auto fallHistResultPtr = dfFall.Histo1D({"Fall Average Temp", "Average Temperature by Season", 100, -40, 40}, "AverageTemperature");
174 auto winterHistResultPtr = dfWinter.Histo1D({"Winter Average Temp", "Average Temperature by Season", 100, -40, 40}, "AverageTemperature");
175 auto springHistResultPtr = dfSpring.Histo1D({"Spring Average Temp", "Average Temperature by Season", 100, -40, 40}, "AverageTemperature");
176 auto summerHistResultPtr = dfSummer.Histo1D({"Summer Average Temp", "Average Temperature by Season", 100, -40, 40}, "AverageTemperature");
177
178 // Configure histograms for each decade.
179 auto hist_1993_to_2002_ResultPtr = df1993_to_2002.Histo1D({"1993_to_2002 Average Temp", "Average Temperature: 1993_to_2002 vs. 2003_to_2013", 100, -40, 40}, "AverageTemperature");
180 auto hist_2003_to_2013_ResultPtr = df2003_to_2013.Histo1D({"2003_to_2013 Average Temp", "Average Temperature: 1993_to_2002 vs. 2003_to_2013", 100, -40, 40}, "AverageTemperature");
181
182 //____________________________________________________________________________________
183
184 // Display the minimum and maximum temperature values.
185 std::cout << std::endl << "The Minimum temperature is: " << *min_value << std::endl;
186 std::cout << "The Maximum temperature is: " << *max_value << std::endl;
187
188 // Display the count for each season.
189 std::cout << std::endl << "The count for Winter: " << *winterCount<< std::endl;
190 std::cout << "The count for Spring: " << *springCount << std::endl;
191 std::cout << "The count for Summer: " << *summerCount << std::endl;
192 std::cout << "The count for Fall: " << *fallCount << std::endl;
193
194 // Display the count for each decade.
195 std::cout << std::endl << "The count for 1993_to_2002: " << decade_1993_to_2002_Count << std::endl;
196 std::cout << "The count for 2003_to_2013: " <<decade_2003_to_2013_Count << std::endl;
197
198 // Transform histogram in order to address ROOT 7 v 6 version compatibility
199 auto fallHist = GetDrawableHist(fallHistResultPtr);
200 auto winterHist = GetDrawableHist(winterHistResultPtr);
201 auto springHist = GetDrawableHist(springHistResultPtr);
202 auto summerHist = GetDrawableHist(summerHistResultPtr);
203
204 // Set an orange histogram for fall.
205 fallHist->SetLineColor(kOrange);
206 fallHist->SetLineWidth(6);
207 // Set a blue histogram for winter.
208 winterHist->SetLineColor(kBlue);
209 winterHist->SetLineWidth(6);
210 // Set a green histogram for spring.
211 springHist->SetLineColor(kGreen);
212 springHist->SetLineWidth(6);
213 // Set a red histogram for summer.
214 summerHist->SetLineColor(kRed);
215 summerHist->SetLineWidth(6);
216
217 // Transform histogram in order to address ROOT 7 v 6 version compatibility
218 auto hist_1993_to_2002 = GetDrawableHist(hist_1993_to_2002_ResultPtr);
219 auto hist_2003_to_2013 = GetDrawableHist(hist_2003_to_2013_ResultPtr);
220
221 // Set a violet histogram for 1993_to_2002.
222 hist_1993_to_2002->SetLineColor(kViolet);
223 hist_1993_to_2002->SetLineWidth(6);
224 // Set a spring-green histogram for 2003_to_2013.
225 hist_2003_to_2013->SetLineColor(kSpring);
226 hist_2003_to_2013->SetLineWidth(6);
227
228
229 // Create a canvas to display histograms for average temperature by season.
230 auto canvas = RCanvas::Create("Average Temperature by Season");
231 canvas->Draw<TObjectDrawable>(fallHist, "L");
232 canvas->Draw<TObjectDrawable>(winterHist, "L");
233 canvas->Draw<TObjectDrawable>(springHist, "L");
234 canvas->Draw<TObjectDrawable>(summerHist, "L");
235
236 // Create a legend for the seasons canvas.
237 auto legend = std::make_shared<TLegend>(0.15,0.65,0.53,0.85);
238 legend->AddEntry(fallHist.get(),"fall","l");
239 legend->AddEntry(winterHist.get(),"winter","l");
240 legend->AddEntry(springHist.get(),"spring","l");
241 legend->AddEntry(summerHist.get(),"summer","l");
242 canvas->Draw<TObjectDrawable>(legend, "L");
243 canvas->Show();
244
245 // Create a canvas to display histograms for average temperature for 1993_to_2002 & 2003_to_2013.
246 auto canvas2 = RCanvas::Create("Average Temperature: 1993_to_2002 vs. 2003_to_2013");
247 canvas2->Draw<TObjectDrawable>(hist_1993_to_2002, "L");
248 canvas2->Draw<TObjectDrawable>(hist_2003_to_2013, "L");
249
250 // Create a legend for the two decades canvas.
251 auto legend2 = std::make_shared<TLegend>(0.1,0.7,0.48,0.9);
252 legend2->AddEntry(hist_1993_to_2002.get(),"1993_to_2002","l");
253 legend2->AddEntry(hist_2003_to_2013.get(),"2003_to_2013","l");
254 canvas2->Draw<TObjectDrawable>(legend2, "L");
255 canvas2->Show();
256}
257
259 //if NOT zero (the file does NOT already exist), then Ingest
261 Ingest();
262
263 }
264 Analyze();
265}
#define h(i)
Definition RSha256.hxx:106
#define R__LOAD_LIBRARY(LIBRARY)
Definition Rtypes.h:472
@ kRed
Definition Rtypes.h:66
@ kOrange
Definition Rtypes.h:67
@ kGreen
Definition Rtypes.h:66
@ kBlue
Definition Rtypes.h:66
@ kViolet
Definition Rtypes.h:67
@ kSpring
Definition Rtypes.h:67
R__EXTERN TSystem * gSystem
Definition TSystem.h:559
static std::shared_ptr< RCanvas > Create(const std::string &title)
Create new canvas instance.
Definition RCanvas.cxx:103
static std::unique_ptr< RNTupleModel > Create()
Common user-tunable settings for storing ntuples.
static std::unique_ptr< RNTupleWriter > Recreate(std::unique_ptr< RNTupleModel > model, std::string_view ntupleName, std::string_view storage, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Definition RNTuple.cxx:265
Provides v7 drawing facilities for TObject types (TGraph etc).
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url.
Definition RRawFile.cxx:73
Smart pointer for the return type of actions.
1-D histogram with a double per channel (see TH1 documentation)}
Definition TH1.h:618
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition TSystem.cxx:1294
void Analyze()
void global_temperatures()
void Ingest()
constexpr const char * kRawDataUrl
std::shared_ptr< TH1D > GetDrawableHist(ROOT::RDF::RResultPtr< TH1D > &h)
std::chrono::high_resolution_clock Clock
constexpr const char * kNTupleFileName
RDataFrame MakeNTupleDataFrame(std::string_view ntupleName, std::string_view fileName)
Definition file.py:1
@ kUseGeneralPurpose
Use the new recommended general-purpose setting; it is a best trade-off between compression ratio/dec...
Definition Compression.h:54
auto * t1
Definition textangle.C:20