Logo ROOT  
Reference Guide
RNTuple.cxx
Go to the documentation of this file.
1/// \file RNTuple.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RNTuple.hxx>
17
19#include <ROOT/RNTupleModel.hxx>
21#include <ROOT/RPageStorage.hxx>
22#include <ROOT/RPageSinkBuf.hxx>
24#ifdef R__USE_IMT
25#include <ROOT/TTaskGroup.hxx>
26#endif
27
28#include <TError.h>
29#include <TROOT.h> // for IsImplicitMTEnabled()
30
31#include <algorithm>
32#include <exception>
33#include <functional>
34#include <iomanip>
35#include <iostream>
36#include <sstream>
37#include <string>
38#include <unordered_map>
39#include <utility>
40
41
42#ifdef R__USE_IMT
44{
45 Reset();
46}
47
49{
50 fTaskGroup = std::make_unique<TTaskGroup>();
51}
52
53
55{
56 fTaskGroup->Run(taskFunc);
57}
58
59
61{
62 fTaskGroup->Wait();
63}
64#endif
65
66
67//------------------------------------------------------------------------------
68
69
71 const auto &desc = fSource->GetDescriptor();
72 model.GetFieldZero()->SetOnDiskId(desc.GetFieldZeroId());
73 for (auto &field : *model.GetFieldZero()) {
74 // If the model has been created from the descritor, the on-disk IDs are already set.
75 // User-provided models instead need to find their corresponding IDs in the descriptor.
76 if (field.GetOnDiskId() == kInvalidDescriptorId) {
77 field.SetOnDiskId(desc.FindFieldId(field.GetName(), field.GetParent()->GetOnDiskId()));
78 }
79 field.ConnectPageSource(*fSource);
80 }
81}
82
84{
85#ifdef R__USE_IMT
86 if (IsImplicitMTEnabled()) {
87 fUnzipTasks = std::make_unique<RNTupleImtTaskScheduler>();
88 fSource->SetTaskScheduler(fUnzipTasks.get());
89 }
90#endif
91 fSource->Attach();
92 fMetrics.ObserveMetrics(fSource->GetMetrics());
93}
94
96 std::unique_ptr<ROOT::Experimental::RNTupleModel> model,
97 std::unique_ptr<ROOT::Experimental::Detail::RPageSource> source)
98 : fSource(std::move(source))
99 , fModel(std::move(model))
100 , fMetrics("RNTupleReader")
101{
102 if (!fSource) {
103 throw RException(R__FAIL("null source"));
104 }
105 if (!fModel) {
106 throw RException(R__FAIL("null model"));
107 }
108 fModel->Freeze();
111}
112
113ROOT::Experimental::RNTupleReader::RNTupleReader(std::unique_ptr<ROOT::Experimental::Detail::RPageSource> source)
114 : fSource(std::move(source))
115 , fModel(nullptr)
116 , fMetrics("RNTupleReader")
117{
118 if (!fSource) {
119 throw RException(R__FAIL("null source"));
120 }
122}
123
125
126std::unique_ptr<ROOT::Experimental::RNTupleReader> ROOT::Experimental::RNTupleReader::Open(
127 std::unique_ptr<RNTupleModel> model,
128 std::string_view ntupleName,
129 std::string_view storage,
130 const RNTupleReadOptions &options)
131{
132 return std::make_unique<RNTupleReader>(std::move(model), Detail::RPageSource::Create(ntupleName, storage, options));
133}
134
135std::unique_ptr<ROOT::Experimental::RNTupleReader> ROOT::Experimental::RNTupleReader::Open(
136 std::string_view ntupleName,
137 std::string_view storage,
138 const RNTupleReadOptions &options)
139{
140 return std::make_unique<RNTupleReader>(Detail::RPageSource::Create(ntupleName, storage, options));
141}
142
143std::unique_ptr<ROOT::Experimental::RNTupleReader> ROOT::Experimental::RNTupleReader::OpenFriends(
144 std::span<ROpenSpec> ntuples)
145{
146 std::vector<std::unique_ptr<Detail::RPageSource>> sources;
147 for (const auto &n : ntuples) {
148 sources.emplace_back(Detail::RPageSource::Create(n.fNTupleName, n.fStorage, n.fOptions));
149 }
150 return std::make_unique<RNTupleReader>(std::make_unique<Detail::RPageSourceFriends>("_friends", sources));
151}
152
154{
155 if (!fModel) {
156 fModel = fSource->GetDescriptor().GenerateModel();
157 ConnectModel(*fModel);
158 }
159 return fModel.get();
160}
161
163{
164 // TODO(lesimon): In a later version, these variables may be defined by the user or the ideal width may be read out from the terminal.
165 char frameSymbol = '*';
166 int width = 80;
167 /*
168 if (width < 30) {
169 output << "The width is too small! Should be at least 30." << std::endl;
170 return;
171 }
172 */
173 std::string name = fSource->GetDescriptor().GetName();
174 switch (what) {
176 for (int i = 0; i < (width/2 + width%2 - 4); ++i)
177 output << frameSymbol;
178 output << " NTUPLE ";
179 for (int i = 0; i < (width/2 - 4); ++i)
180 output << frameSymbol;
181 output << std::endl;
182 // FitString defined in RFieldVisitor.cxx
183 output << frameSymbol << " N-Tuple : " << RNTupleFormatter::FitString(name, width-13) << frameSymbol << std::endl; // prints line with name of ntuple
184 output << frameSymbol << " Entries : " << RNTupleFormatter::FitString(std::to_string(GetNEntries()), width - 13) << frameSymbol << std::endl; // prints line with number of entries
185
186 // Traverses through all fields to gather information needed for printing.
187 RPrepareVisitor prepVisitor;
188 // Traverses through all fields to do the actual printing.
189 RPrintSchemaVisitor printVisitor(output);
190
191 // Note that we do not need to connect the model, we are only looking at its tree of fields
192 auto fullModel = fSource->GetDescriptor().GenerateModel();
193 fullModel->GetFieldZero()->AcceptVisitor(prepVisitor);
194
195 printVisitor.SetFrameSymbol(frameSymbol);
196 printVisitor.SetWidth(width);
197 printVisitor.SetDeepestLevel(prepVisitor.GetDeepestLevel());
198 printVisitor.SetNumFields(prepVisitor.GetNumFields());
199
200 for (int i = 0; i < width; ++i)
201 output << frameSymbol;
202 output << std::endl;
203 fullModel->GetFieldZero()->AcceptVisitor(printVisitor);
204 for (int i = 0; i < width; ++i)
205 output << frameSymbol;
206 output << std::endl;
207 break;
208 }
210 fSource->GetDescriptor().PrintInfo(output);
211 break;
213 fMetrics.Print(output);
214 break;
215 default:
216 // Unhandled case, internal error
217 R__ASSERT(false);
218 }
219}
220
221
223{
224 if (!fDisplayReader)
225 fDisplayReader = Clone();
226 return fDisplayReader.get();
227}
228
229
231{
232 RNTupleReader *reader = this;
233 REntry *entry = nullptr;
234 // Don't accidentally trigger loading of the entire model
235 if (fModel)
236 entry = fModel->GetDefaultEntry();
237
238 switch(format) {
240 reader = GetDisplayReader();
241 entry = reader->GetModel()->GetDefaultEntry();
242 // Fall through
244 if (!entry) {
245 output << "{}" << std::endl;
246 break;
247 }
248
249 reader->LoadEntry(index);
250 output << "{";
251 for (auto iValue = entry->begin(); iValue != entry->end(); ) {
252 output << std::endl;
253 RPrintValueVisitor visitor(*iValue, output, 1 /* level */);
254 iValue->GetField()->AcceptVisitor(visitor);
255
256 if (++iValue == entry->end()) {
257 output << std::endl;
258 break;
259 } else {
260 output << ",";
261 }
262 }
263 output << "}" << std::endl;
264 break;
265 default:
266 // Unhandled case, internal error
267 R__ASSERT(false);
268 }
269}
270
271
272//------------------------------------------------------------------------------
273
274
276 std::unique_ptr<ROOT::Experimental::RNTupleModel> model,
277 std::unique_ptr<ROOT::Experimental::Detail::RPageSink> sink)
278 : fSink(std::move(sink))
279 , fModel(std::move(model))
280 , fMetrics("RNTupleWriter")
281{
282 if (!fModel) {
283 throw RException(R__FAIL("null model"));
284 }
285 if (!fSink) {
286 throw RException(R__FAIL("null sink"));
287 }
288 fModel->Freeze();
289#ifdef R__USE_IMT
290 if (IsImplicitMTEnabled()) {
291 fZipTasks = std::make_unique<RNTupleImtTaskScheduler>();
292 fSink->SetTaskScheduler(fZipTasks.get());
293 }
294#endif
295 fSink->Create(*fModel.get());
296 fMetrics.ObserveMetrics(fSink->GetMetrics());
297
298 const auto &writeOpts = fSink->GetWriteOptions();
299 fMaxUnzippedClusterSize = writeOpts.GetMaxUnzippedClusterSize();
300 // First estimate is a factor 2 compression if compression is used at all
301 const int scale = writeOpts.GetCompression() ? 2 : 1;
302 fUnzippedClusterSizeEst = scale * writeOpts.GetApproxZippedClusterSize();
303}
304
306{
307 CommitCluster();
308 fSink->CommitDataset();
309}
310
311std::unique_ptr<ROOT::Experimental::RNTupleWriter> ROOT::Experimental::RNTupleWriter::Recreate(
312 std::unique_ptr<RNTupleModel> model,
313 std::string_view ntupleName,
314 std::string_view storage,
315 const RNTupleWriteOptions &options)
316{
317 return std::make_unique<RNTupleWriter>(std::move(model), Detail::RPageSink::Create(ntupleName, storage, options));
318}
319
320std::unique_ptr<ROOT::Experimental::RNTupleWriter> ROOT::Experimental::RNTupleWriter::Append(
321 std::unique_ptr<RNTupleModel> model,
322 std::string_view ntupleName,
323 TFile &file,
324 const RNTupleWriteOptions &options)
325{
326 auto sink = std::make_unique<Detail::RPageSinkFile>(ntupleName, file, options);
327 if (options.GetUseBufferedWrite()) {
328 auto bufferedSink = std::make_unique<Detail::RPageSinkBuf>(std::move(sink));
329 return std::make_unique<RNTupleWriter>(std::move(model), std::move(bufferedSink));
330 }
331 return std::make_unique<RNTupleWriter>(std::move(model), std::move(sink));
332}
333
334
336{
337 if (fNEntries == fLastCommitted) return;
338 for (auto& field : *fModel->GetFieldZero()) {
339 field.Flush();
340 field.CommitCluster();
341 }
342 fNBytesCommitted += fSink->CommitCluster(fNEntries);
343 fNBytesFilled += fUnzippedClusterSize;
344
345 // Cap the compression factor at 1000 to prevent overflow of fUnzippedClusterSizeEst
346 const float compressionFactor = std::min(1000.f,
347 static_cast<float>(fNBytesFilled) / static_cast<float>(fNBytesCommitted));
348 fUnzippedClusterSizeEst =
349 compressionFactor * static_cast<float>(fSink->GetWriteOptions().GetApproxZippedClusterSize());
350
351 fLastCommitted = fNEntries;
352 fUnzippedClusterSize = 0;
353}
354
355
356//------------------------------------------------------------------------------
357
358
360 : fOffset(0), fDefaultEntry(std::move(defaultEntry))
361{
362}
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition: RError.hxx:291
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
#define R__ASSERT(e)
Definition: TError.h:118
char name[80]
Definition: TGX11.cxx:110
void SetOnDiskId(DescriptorId_t id)
Definition: RField.hxx:248
void ObserveMetrics(RNTupleMetrics &observee)
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
RCollectionNTupleWriter(std::unique_ptr< REntry > defaultEntry)
Definition: RNTuple.cxx:359
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition: REntry.hxx:43
Base class for all ROOT issued exceptions.
Definition: RError.hxx:114
static std::string FitString(const std::string &str, int availableSpace)
void Reset() final
Start a new set of tasks.
Definition: RNTuple.cxx:48
void AddTask(const std::function< void(void)> &taskFunc) final
Take a callable that represents a task.
Definition: RNTuple.cxx:54
void Wait() final
Blocks until all scheduled tasks finished.
Definition: RNTuple.cxx:60
The RNTupleModel encapulates the schema of an ntuple.
RFieldZero * GetFieldZero() const
Common user-tunable settings for reading ntuples.
An RNTuple that is used to read data from storage.
Definition: RNTuple.hxx:103
static std::unique_ptr< RNTupleReader > OpenFriends(std::span< ROpenSpec > ntuples)
Open RNTuples as one virtual, horizontally combined ntuple.
Definition: RNTuple.cxx:143
RNTupleReader * GetDisplayReader()
Definition: RNTuple.cxx:222
static std::unique_ptr< RNTupleReader > Open(std::unique_ptr< RNTupleModel > model, std::string_view ntupleName, std::string_view storage, const RNTupleReadOptions &options=RNTupleReadOptions())
Throws an exception if the model is null.
Definition: RNTuple.cxx:126
std::unique_ptr< Detail::RPageSource > fSource
Definition: RNTuple.hxx:109
void ConnectModel(const RNTupleModel &model)
Definition: RNTuple.cxx:70
RNTupleReader(std::unique_ptr< RNTupleModel > model, std::unique_ptr< Detail::RPageSource > source)
The user imposes an ntuple model, which must be compatible with the model found in the data on storag...
Definition: RNTuple.cxx:95
void LoadEntry(NTupleSize_t index)
Analogous to Fill(), fills the default entry of the model.
Definition: RNTuple.hxx:238
void PrintInfo(const ENTupleInfo what=ENTupleInfo::kSummary, std::ostream &output=std::cout)
Prints a detailed summary of the ntuple, including a list of fields.
Definition: RNTuple.cxx:162
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSource.
Definition: RNTuple.hxx:111
void Show(NTupleSize_t index, const ENTupleShowFormat format=ENTupleShowFormat::kCurrentModelJSON, std::ostream &output=std::cout)
Shows the values of the i-th entry/row, starting with 0 for the first entry.
Definition: RNTuple.cxx:230
Common user-tunable settings for storing ntuples.
void CommitCluster()
Ensure that the data from the so far seen Fill calls has been written to storage.
Definition: RNTuple.cxx:335
NTupleSize_t fUnzippedClusterSizeEst
Estimator of uncompressed cluster size, taking into account the estimated compression ratio.
Definition: RNTuple.hxx:373
RNTupleWriter(std::unique_ptr< RNTupleModel > model, std::unique_ptr< Detail::RPageSink > sink)
Throws an exception if the model or the sink is null.
Definition: RNTuple.cxx:275
std::size_t fMaxUnzippedClusterSize
Limit for committing cluster no matter the other tunables.
Definition: RNTuple.hxx:371
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSink.
Definition: RNTuple.hxx:359
Detail::RNTupleMetrics fMetrics
Definition: RNTuple.hxx:360
std::unique_ptr< Detail::RPageSink > fSink
Definition: RNTuple.hxx:357
static std::unique_ptr< RNTupleWriter > Recreate(std::unique_ptr< RNTupleModel > model, std::string_view ntupleName, std::string_view storage, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Throws an exception if the model is null.
Definition: RNTuple.cxx:311
std::unique_ptr< Detail::RPageStorage::RTaskScheduler > fZipTasks
The page sink's parallel page compression scheduler if IMT is on.
Definition: RNTuple.hxx:356
static std::unique_ptr< RNTupleWriter > Append(std::unique_ptr< RNTupleModel > model, std::string_view ntupleName, TFile &file, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Throws an exception if the model is null.
Definition: RNTuple.cxx:320
Visitor used for a pre-processing run to collect information needed by another visitor class.
unsigned int GetDeepestLevel() const
Contains settings for printing and prints a summary of an RField instance.
Renders a JSON value corresponding to the field.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:54
const Int_t n
Definition: legend1.C:16
basic_string_view< char > string_view
ENTupleInfo
Listing of the different options that can be printed by RNTupleReader::GetInfo()
Definition: RNTuple.hxx:52
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:55
ENTupleShowFormat
Listing of the different entry output formats of RNTupleReader::Show()
Definition: RNTuple.hxx:61
constexpr DescriptorId_t kInvalidDescriptorId
Definition: RNTupleUtil.hxx:92
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:150
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:558
Definition: file.py:1
static const char * what
Definition: stlLoader.cc:6
static void output(int code)
Definition: gifencode.c:226