Logo ROOT  
Reference Guide
RNTuple.cxx
Go to the documentation of this file.
1/// \file RNTuple.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RNTuple.hxx>
17
19#include <ROOT/RNTupleModel.hxx>
21#include <ROOT/RPageStorage.hxx>
22#include <ROOT/RPageSinkBuf.hxx>
24#ifdef R__USE_IMT
25#include <ROOT/TTaskGroup.hxx>
26#endif
27
28#include <TBuffer.h>
29#include <TError.h>
30#include <TFile.h>
31#include <TROOT.h> // for IsImplicitMTEnabled()
32
33#include <algorithm>
34#include <exception>
35#include <functional>
36#include <iomanip>
37#include <iostream>
38#include <sstream>
39#include <string>
40#include <unordered_map>
41#include <utility>
42
43
44#ifdef R__USE_IMT
46{
47 Reset();
48}
49
51{
52 fTaskGroup = std::make_unique<TTaskGroup>();
53}
54
55
57{
58 fTaskGroup->Run(taskFunc);
59}
60
61
63{
64 fTaskGroup->Wait();
65}
66#endif
67
68
69//------------------------------------------------------------------------------
70
71
73 // We must not use the descriptor guard to prevent recursive locking in field.ConnectPageSource
74 model.GetFieldZero()->SetOnDiskId(fSource->GetSharedDescriptorGuard()->GetFieldZeroId());
75 for (auto &field : *model.GetFieldZero()) {
76 // If the model has been created from the descritor, the on-disk IDs are already set.
77 // User-provided models instead need to find their corresponding IDs in the descriptor.
78 if (field.GetOnDiskId() == kInvalidDescriptorId) {
79 field.SetOnDiskId(
80 fSource->GetSharedDescriptorGuard()->FindFieldId(field.GetName(), field.GetParent()->GetOnDiskId()));
81 }
82 field.ConnectPageSource(*fSource);
83 }
84}
85
87{
88#ifdef R__USE_IMT
89 if (IsImplicitMTEnabled()) {
90 fUnzipTasks = std::make_unique<RNTupleImtTaskScheduler>();
91 fSource->SetTaskScheduler(fUnzipTasks.get());
92 }
93#endif
94 fSource->Attach();
95 fMetrics.ObserveMetrics(fSource->GetMetrics());
96}
97
99 std::unique_ptr<ROOT::Experimental::RNTupleModel> model,
100 std::unique_ptr<ROOT::Experimental::Detail::RPageSource> source)
101 : fSource(std::move(source))
102 , fModel(std::move(model))
103 , fMetrics("RNTupleReader")
104{
105 if (!fSource) {
106 throw RException(R__FAIL("null source"));
107 }
108 if (!fModel) {
109 throw RException(R__FAIL("null model"));
110 }
111 fModel->Freeze();
114}
115
116ROOT::Experimental::RNTupleReader::RNTupleReader(std::unique_ptr<ROOT::Experimental::Detail::RPageSource> source)
117 : fSource(std::move(source))
118 , fModel(nullptr)
119 , fMetrics("RNTupleReader")
120{
121 if (!fSource) {
122 throw RException(R__FAIL("null source"));
123 }
125}
126
128
129std::unique_ptr<ROOT::Experimental::RNTupleReader> ROOT::Experimental::RNTupleReader::Open(
130 std::unique_ptr<RNTupleModel> model,
131 std::string_view ntupleName,
132 std::string_view storage,
133 const RNTupleReadOptions &options)
134{
135 return std::make_unique<RNTupleReader>(std::move(model), Detail::RPageSource::Create(ntupleName, storage, options));
136}
137
138std::unique_ptr<ROOT::Experimental::RNTupleReader> ROOT::Experimental::RNTupleReader::Open(
139 std::string_view ntupleName,
140 std::string_view storage,
141 const RNTupleReadOptions &options)
142{
143 return std::make_unique<RNTupleReader>(Detail::RPageSource::Create(ntupleName, storage, options));
144}
145
146std::unique_ptr<ROOT::Experimental::RNTupleReader>
148{
149 return std::make_unique<RNTupleReader>(ntuple->MakePageSource(options));
150}
151
152std::unique_ptr<ROOT::Experimental::RNTupleReader> ROOT::Experimental::RNTupleReader::OpenFriends(
153 std::span<ROpenSpec> ntuples)
154{
155 std::vector<std::unique_ptr<Detail::RPageSource>> sources;
156 for (const auto &n : ntuples) {
157 sources.emplace_back(Detail::RPageSource::Create(n.fNTupleName, n.fStorage, n.fOptions));
158 }
159 return std::make_unique<RNTupleReader>(std::make_unique<Detail::RPageSourceFriends>("_friends", sources));
160}
161
163{
164 if (!fModel) {
165 fModel = fSource->GetSharedDescriptorGuard()->GenerateModel();
166 ConnectModel(*fModel);
167 }
168 return fModel.get();
169}
170
172{
173 // TODO(lesimon): In a later version, these variables may be defined by the user or the ideal width may be read out from the terminal.
174 char frameSymbol = '*';
175 int width = 80;
176 /*
177 if (width < 30) {
178 output << "The width is too small! Should be at least 30." << std::endl;
179 return;
180 }
181 */
182 switch (what) {
184 std::string name;
185 std::unique_ptr<RNTupleModel> fullModel;
186 {
187 auto descriptorGuard = fSource->GetSharedDescriptorGuard();
188 name = descriptorGuard->GetName();
189 fullModel = descriptorGuard->GenerateModel();
190 }
191
192 for (int i = 0; i < (width/2 + width%2 - 4); ++i)
193 output << frameSymbol;
194 output << " NTUPLE ";
195 for (int i = 0; i < (width/2 - 4); ++i)
196 output << frameSymbol;
197 output << std::endl;
198 // FitString defined in RFieldVisitor.cxx
199 output << frameSymbol << " N-Tuple : " << RNTupleFormatter::FitString(name, width-13) << frameSymbol << std::endl; // prints line with name of ntuple
200 output << frameSymbol << " Entries : " << RNTupleFormatter::FitString(std::to_string(GetNEntries()), width - 13) << frameSymbol << std::endl; // prints line with number of entries
201
202 // Traverses through all fields to gather information needed for printing.
203 RPrepareVisitor prepVisitor;
204 // Traverses through all fields to do the actual printing.
205 RPrintSchemaVisitor printVisitor(output);
206
207 // Note that we do not need to connect the model, we are only looking at its tree of fields
208 fullModel->GetFieldZero()->AcceptVisitor(prepVisitor);
209
210 printVisitor.SetFrameSymbol(frameSymbol);
211 printVisitor.SetWidth(width);
212 printVisitor.SetDeepestLevel(prepVisitor.GetDeepestLevel());
213 printVisitor.SetNumFields(prepVisitor.GetNumFields());
214
215 for (int i = 0; i < width; ++i)
216 output << frameSymbol;
217 output << std::endl;
218 fullModel->GetFieldZero()->AcceptVisitor(printVisitor);
219 for (int i = 0; i < width; ++i)
220 output << frameSymbol;
221 output << std::endl;
222 break;
223 }
224 case ENTupleInfo::kStorageDetails: fSource->GetSharedDescriptorGuard()->PrintInfo(output); break;
226 fMetrics.Print(output);
227 break;
228 default:
229 // Unhandled case, internal error
230 R__ASSERT(false);
231 }
232}
233
234
236{
237 if (!fDisplayReader)
238 fDisplayReader = Clone();
239 return fDisplayReader.get();
240}
241
242
244{
245 RNTupleReader *reader = this;
246 REntry *entry = nullptr;
247 // Don't accidentally trigger loading of the entire model
248 if (fModel)
249 entry = fModel->GetDefaultEntry();
250
251 switch(format) {
253 reader = GetDisplayReader();
254 entry = reader->GetModel()->GetDefaultEntry();
255 // Fall through
257 if (!entry) {
258 output << "{}" << std::endl;
259 break;
260 }
261
262 reader->LoadEntry(index);
263 output << "{";
264 for (auto iValue = entry->begin(); iValue != entry->end(); ) {
265 output << std::endl;
266 RPrintValueVisitor visitor(*iValue, output, 1 /* level */);
267 iValue->GetField()->AcceptVisitor(visitor);
268
269 if (++iValue == entry->end()) {
270 output << std::endl;
271 break;
272 } else {
273 output << ",";
274 }
275 }
276 output << "}" << std::endl;
277 break;
278 default:
279 // Unhandled case, internal error
280 R__ASSERT(false);
281 }
282}
283
285{
286 auto descriptorGuard = fSource->GetSharedDescriptorGuard();
287 if (!fCachedDescriptor || fCachedDescriptor->GetGeneration() != descriptorGuard->GetGeneration())
288 fCachedDescriptor = descriptorGuard->Clone();
289 return fCachedDescriptor.get();
290}
291
292//------------------------------------------------------------------------------
293
294
296 std::unique_ptr<ROOT::Experimental::RNTupleModel> model,
297 std::unique_ptr<ROOT::Experimental::Detail::RPageSink> sink)
298 : fSink(std::move(sink))
299 , fModel(std::move(model))
300 , fMetrics("RNTupleWriter")
301{
302 if (!fModel) {
303 throw RException(R__FAIL("null model"));
304 }
305 if (!fSink) {
306 throw RException(R__FAIL("null sink"));
307 }
308 fModel->Freeze();
309#ifdef R__USE_IMT
310 if (IsImplicitMTEnabled()) {
311 fZipTasks = std::make_unique<RNTupleImtTaskScheduler>();
312 fSink->SetTaskScheduler(fZipTasks.get());
313 }
314#endif
315 fSink->Create(*fModel.get());
316 fMetrics.ObserveMetrics(fSink->GetMetrics());
317
318 const auto &writeOpts = fSink->GetWriteOptions();
319 fMaxUnzippedClusterSize = writeOpts.GetMaxUnzippedClusterSize();
320 // First estimate is a factor 2 compression if compression is used at all
321 const int scale = writeOpts.GetCompression() ? 2 : 1;
322 fUnzippedClusterSizeEst = scale * writeOpts.GetApproxZippedClusterSize();
323}
324
326{
327 CommitCluster(true /* commitClusterGroup */);
328 fSink->CommitDataset();
329}
330
331std::unique_ptr<ROOT::Experimental::RNTupleWriter> ROOT::Experimental::RNTupleWriter::Recreate(
332 std::unique_ptr<RNTupleModel> model,
333 std::string_view ntupleName,
334 std::string_view storage,
335 const RNTupleWriteOptions &options)
336{
337 return std::make_unique<RNTupleWriter>(std::move(model), Detail::RPageSink::Create(ntupleName, storage, options));
338}
339
340std::unique_ptr<ROOT::Experimental::RNTupleWriter> ROOT::Experimental::RNTupleWriter::Append(
341 std::unique_ptr<RNTupleModel> model,
342 std::string_view ntupleName,
343 TFile &file,
344 const RNTupleWriteOptions &options)
345{
346 auto sink = std::make_unique<Detail::RPageSinkFile>(ntupleName, file, options);
347 if (options.GetUseBufferedWrite()) {
348 auto bufferedSink = std::make_unique<Detail::RPageSinkBuf>(std::move(sink));
349 return std::make_unique<RNTupleWriter>(std::move(model), std::move(bufferedSink));
350 }
351 return std::make_unique<RNTupleWriter>(std::move(model), std::move(sink));
352}
353
355{
356 if (fNEntries == fLastCommittedClusterGroup)
357 return;
358 fSink->CommitClusterGroup();
359 fLastCommittedClusterGroup = fNEntries;
360}
361
363{
364 if (fNEntries == fLastCommitted) {
365 if (commitClusterGroup)
366 CommitClusterGroup();
367 return;
368 }
369 for (auto& field : *fModel->GetFieldZero()) {
370 field.Flush();
371 field.CommitCluster();
372 }
373 fNBytesCommitted += fSink->CommitCluster(fNEntries);
374 fNBytesFilled += fUnzippedClusterSize;
375
376 // Cap the compression factor at 1000 to prevent overflow of fUnzippedClusterSizeEst
377 const float compressionFactor = std::min(1000.f,
378 static_cast<float>(fNBytesFilled) / static_cast<float>(fNBytesCommitted));
379 fUnzippedClusterSizeEst =
380 compressionFactor * static_cast<float>(fSink->GetWriteOptions().GetApproxZippedClusterSize());
381
382 fLastCommitted = fNEntries;
383 fUnzippedClusterSize = 0;
384
385 if (commitClusterGroup)
386 CommitClusterGroup();
387}
388
389
390//------------------------------------------------------------------------------
391
392
394 : fOffset(0), fDefaultEntry(std::move(defaultEntry))
395{
396}
397
398//------------------------------------------------------------------------------
399
401{
402 static TClassRef RNTupleAnchorClass("ROOT::Experimental::Internal::RFileNTupleAnchor");
403
404 if (buf.IsReading()) {
405 RNTupleAnchorClass->ReadBuffer(buf, static_cast<Internal::RFileNTupleAnchor *>(this));
406 R__ASSERT(buf.GetParent() && buf.GetParent()->InheritsFrom("TFile"));
407 fFile = reinterpret_cast<TFile *>(buf.GetParent());
408 } else {
409 RNTupleAnchorClass->WriteBuffer(buf, static_cast<Internal::RFileNTupleAnchor *>(this));
410 }
411}
412
413std::unique_ptr<ROOT::Experimental::Detail::RPageSource>
415{
416 if (!fFile)
417 throw RException(R__FAIL("This RNTuple object was not streamed from a file"));
418
419 // TODO(jblomer): Add RRawFile factory that create a raw file from a TFile. This may then duplicate the file
420 // descriptor (to avoid re-open). There could also be a raw file that uses a TFile as a "backend" for TFile cases
421 // that are unsupported by raw file.
422 auto path = fFile->GetEndpointUrl()->GetFile();
423 return Detail::RPageSourceFile::CreateFromAnchor(GetAnchor(), path, options);
424}
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition: RError.hxx:291
#define R__ASSERT(e)
Definition: TError.h:118
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
Option_t Option_t width
char name[80]
Definition: TGX11.cxx:110
void SetOnDiskId(DescriptorId_t id)
Definition: RField.hxx:247
void ObserveMetrics(RNTupleMetrics &observee)
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
static std::unique_ptr< RPageSourceFile > CreateFromAnchor(const Internal::RFileNTupleAnchor &anchor, std::string_view path, const RNTupleReadOptions &options)
Used from the RNTuple class to build a datasource if the anchor is already available.
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
RCollectionNTupleWriter(std::unique_ptr< REntry > defaultEntry)
Definition: RNTuple.cxx:393
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition: REntry.hxx:43
Base class for all ROOT issued exceptions.
Definition: RError.hxx:114
The on-storage meta-data of an ntuple.
std::unique_ptr< RNTupleDescriptor > Clone() const
static std::string FitString(const std::string &str, int availableSpace)
void Reset() final
Start a new set of tasks.
Definition: RNTuple.cxx:50
void AddTask(const std::function< void(void)> &taskFunc) final
Take a callable that represents a task.
Definition: RNTuple.cxx:56
void Wait() final
Blocks until all scheduled tasks finished.
Definition: RNTuple.cxx:62
The RNTupleModel encapulates the schema of an ntuple.
RFieldZero * GetFieldZero() const
Common user-tunable settings for reading ntuples.
An RNTuple that is used to read data from storage.
Definition: RNTuple.hxx:110
static std::unique_ptr< RNTupleReader > OpenFriends(std::span< ROpenSpec > ntuples)
Open RNTuples as one virtual, horizontally combined ntuple.
Definition: RNTuple.cxx:152
RNTupleReader * GetDisplayReader()
Definition: RNTuple.cxx:235
const RNTupleDescriptor * GetDescriptor()
Returns a cached copy of the page source descriptor.
Definition: RNTuple.cxx:284
static std::unique_ptr< RNTupleReader > Open(std::unique_ptr< RNTupleModel > model, std::string_view ntupleName, std::string_view storage, const RNTupleReadOptions &options=RNTupleReadOptions())
Throws an exception if the model is null.
Definition: RNTuple.cxx:129
std::unique_ptr< Detail::RPageSource > fSource
Definition: RNTuple.hxx:116
void ConnectModel(const RNTupleModel &model)
Definition: RNTuple.cxx:72
RNTupleReader(std::unique_ptr< RNTupleModel > model, std::unique_ptr< Detail::RPageSource > source)
The user imposes an ntuple model, which must be compatible with the model found in the data on storag...
Definition: RNTuple.cxx:98
void LoadEntry(NTupleSize_t index)
Analogous to Fill(), fills the default entry of the model.
Definition: RNTuple.hxx:256
void PrintInfo(const ENTupleInfo what=ENTupleInfo::kSummary, std::ostream &output=std::cout)
Prints a detailed summary of the ntuple, including a list of fields.
Definition: RNTuple.cxx:171
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSource.
Definition: RNTuple.hxx:118
void Show(NTupleSize_t index, const ENTupleShowFormat format=ENTupleShowFormat::kCurrentModelJSON, std::ostream &output=std::cout)
Shows the values of the i-th entry/row, starting with 0 for the first entry.
Definition: RNTuple.cxx:243
Common user-tunable settings for storing ntuples.
NTupleSize_t fUnzippedClusterSizeEst
Estimator of uncompressed cluster size, taking into account the estimated compression ratio.
Definition: RNTuple.hxx:390
RNTupleWriter(std::unique_ptr< RNTupleModel > model, std::unique_ptr< Detail::RPageSink > sink)
Throws an exception if the model or the sink is null.
Definition: RNTuple.cxx:295
std::size_t fMaxUnzippedClusterSize
Limit for committing cluster no matter the other tunables.
Definition: RNTuple.hxx:388
void CommitCluster(bool commitClusterGroup=false)
Ensure that the data from the so far seen Fill calls has been written to storage.
Definition: RNTuple.cxx:362
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSink.
Definition: RNTuple.hxx:375
Detail::RNTupleMetrics fMetrics
Definition: RNTuple.hxx:376
std::unique_ptr< Detail::RPageSink > fSink
Definition: RNTuple.hxx:373
static std::unique_ptr< RNTupleWriter > Recreate(std::unique_ptr< RNTupleModel > model, std::string_view ntupleName, std::string_view storage, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Throws an exception if the model is null.
Definition: RNTuple.cxx:331
std::unique_ptr< Detail::RPageStorage::RTaskScheduler > fZipTasks
The page sink's parallel page compression scheduler if IMT is on.
Definition: RNTuple.hxx:372
static std::unique_ptr< RNTupleWriter > Append(std::unique_ptr< RNTupleModel > model, std::string_view ntupleName, TFile &file, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Throws an exception if the model is null.
Definition: RNTuple.cxx:340
Representation of an RNTuple data set in a ROOT file.
Definition: RNTuple.hxx:491
std::unique_ptr< Detail::RPageSource > MakePageSource(const RNTupleReadOptions &options=RNTupleReadOptions())
Create a page source from the RNTuple object.
Definition: RNTuple.cxx:414
void Streamer(TBuffer &)
Definition: RNTuple.cxx:400
Visitor used for a pre-processing run to collect information needed by another visitor class.
unsigned int GetDeepestLevel() const
Contains settings for printing and prints a summary of an RField instance.
Renders a JSON value corresponding to the field.
Buffer base class used for serializing objects.
Definition: TBuffer.h:43
TObject * GetParent() const
Return pointer to parent of this buffer.
Definition: TBuffer.cxx:262
Bool_t IsReading() const
Definition: TBuffer.h:86
TClassRef is used to implement a permanent reference to a TClass object.
Definition: TClassRef.h:28
Int_t ReadBuffer(TBuffer &b, void *pointer, Int_t version, UInt_t start, UInt_t count)
Function called by the Streamer functions to deserialize information from buffer b into object at p.
Definition: TClass.cxx:6747
Int_t WriteBuffer(TBuffer &b, void *pointer, const char *info="")
Function called by the Streamer functions to serialize object at p to buffer b.
Definition: TClass.cxx:6768
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:54
virtual Bool_t InheritsFrom(const char *classname) const
Returns kTRUE if object inherits from class "classname".
Definition: TObject.cxx:449
const Int_t n
Definition: legend1.C:16
basic_string_view< char > string_view
ENTupleInfo
Listing of the different options that can be printed by RNTupleReader::GetInfo()
Definition: RNTuple.hxx:59
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:47
ENTupleShowFormat
Listing of the different entry output formats of RNTupleReader::Show()
Definition: RNTuple.hxx:68
constexpr DescriptorId_t kInvalidDescriptorId
Definition: RNTupleUtil.hxx:84
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:167
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:558
Definition: file.py:1
static const char * what
Definition: stlLoader.cc:6
Entry point for an RNTuple in a ROOT file.
Definition: RMiniFile.hxx:65
static void output()