Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleInspector.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleInspector.hxx
2/// \ingroup NTuple ROOT7
3/// \author Florine de Geus <florine.de.geus@cern.ch>
4/// \date 2023-01-09
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2023, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleInspector
17#define ROOT7_RNTupleInspector
18
19#include <ROOT/RError.hxx>
21
22#include <TFile.h>
23#include <TH1D.h>
24#include <THStack.h>
25
26#include <cstdlib>
27#include <iostream>
28#include <memory>
29#include <numeric>
30#include <regex>
31#include <vector>
32
33namespace ROOT {
34namespace Experimental {
35
36class RNTuple;
37
38namespace Internal {
39class RPageSource;
40} // namespace Internal
41
44
45// clang-format off
46/**
47\class ROOT::Experimental::RNTupleInspector
48\ingroup NTuple
49\brief Inspect on-disk and storage-related information of an RNTuple.
50
51The RNTupleInspector can be used for studying an RNTuple in terms of its storage efficiency. It provides information on
52the level of the RNTuple itself, on the (sub)field level and on the column level.
53
54Example usage:
55
56~~~ {.cpp}
57#include <ROOT/RNTuple.hxx>
58#include <ROOT/RNTupleInspector.hxx>
59
60#include <iostream>
61
62using ROOT::Experimental::RNTuple;
63using ROOT::Experimental::RNTupleInspector;
64
65auto file = TFile::Open("data.rntuple");
66auto rntuple = file->Get<RNTuple>("NTupleName");
67auto inspector = RNTupleInspector::Create(rntuple).Unwrap();
68
69std::cout << "The compression factor is " << inspector->GetCompressionFactor()
70 << " using compression settings " << inspector->GetCompressionSettings()
71 << std::endl;
72~~~
73*/
74// clang-format on
76public:
77 /////////////////////////////////////////////////////////////////////////////
78 /// \brief Provides column-level storage information.
79 ///
80 /// The RColumnInspector class provides storage information for an individual column. This information is partly
81 /// collected during the construction of the RNTupleInspector object, and can partly be accessed using the
82 /// RColumnInspector that belongs to this field.
84 private:
86 const std::vector<std::uint64_t> fCompressedPageSizes = {};
87 std::uint32_t fElementSize = 0;
88 std::uint64_t fNElements = 0;
89
90 public:
91 RColumnInspector(const RColumnDescriptor &colDesc, const std::vector<std::uint64_t> &compressedPageSizes,
92 std::uint32_t elemSize, std::uint64_t nElems)
93 : fColumnDescriptor(colDesc),
94 fCompressedPageSizes(compressedPageSizes),
95 fElementSize(elemSize),
96 fNElements(nElems){};
97 ~RColumnInspector() = default;
98
100 const std::vector<std::uint64_t> &GetCompressedPageSizes() const { return fCompressedPageSizes; }
101 std::uint64_t GetNPages() const { return fCompressedPageSizes.size(); }
102 std::uint64_t GetCompressedSize() const
103 {
104 return std::accumulate(fCompressedPageSizes.begin(), fCompressedPageSizes.end(), 0);
105 }
106 std::uint64_t GetUncompressedSize() const { return fElementSize * fNElements; }
107 std::uint64_t GetElementSize() const { return fElementSize; }
108 std::uint64_t GetNElements() const { return fNElements; }
110 };
111
112 /////////////////////////////////////////////////////////////////////////////
113 /// \brief Provides field-level storage information.
114 ///
115 /// The RFieldTreeInspector class provides storage information for a field **and** its subfields. This information is
116 /// partly collected during the construction of the RNTupleInspector object, and can partly be accessed using
117 /// the RFieldDescriptor that belongs to this field.
119 private:
121 std::uint64_t fCompressedSize = 0;
122 std::uint64_t fUncompressedSize = 0;
123
124 public:
125 RFieldTreeInspector(const RFieldDescriptor &fieldDesc, std::uint64_t onDiskSize, std::uint64_t inMemSize)
126 : fRootFieldDescriptor(fieldDesc), fCompressedSize(onDiskSize), fUncompressedSize(inMemSize){};
128
130 std::uint64_t GetCompressedSize() const { return fCompressedSize; }
131 std::uint64_t GetUncompressedSize() const { return fUncompressedSize; }
132 };
133
134private:
135 std::unique_ptr<Internal::RPageSource> fPageSource;
136 std::unique_ptr<RNTupleDescriptor> fDescriptor;
138 std::uint64_t fCompressedSize = 0;
139 std::uint64_t fUncompressedSize = 0;
140
141 std::unordered_map<int, RColumnInspector> fColumnInfo;
142 std::unordered_map<int, RFieldTreeInspector> fFieldTreeInfo;
143
144 RNTupleInspector(std::unique_ptr<Internal::RPageSource> pageSource);
145
146 /////////////////////////////////////////////////////////////////////////////
147 /// \brief Gather column-level and RNTuple-level information.
148 ///
149 /// \note This method is called when the RNTupleInspector is initially created. This means that anything unexpected
150 /// about the RNTuple itself (e.g. inconsistent compression settings across clusters) will be detected here.
151 /// Therefore, any related exceptions will be thrown on creation of the inspector.
152 void CollectColumnInfo();
153
154 /////////////////////////////////////////////////////////////////////////////
155 /// \brief Recursively gather field-level information.
156 ///
157 /// \param[in] fieldId The ID of the field from which to start the recursive traversal. Typically this is the "zero
158 /// ID", i.e. the logical parent of all top-level fields.
159 ///
160 /// \return The RFieldTreeInspector for the provided field ID.
161 ///
162 /// This method is called when the RNTupleInspector is initially created.
164
165 /////////////////////////////////////////////////////////////////////////////
166 /// \brief Get the columns that make up the given field, including its subfields.
167 ///
168 /// \param [in] fieldId The ID of the field for which to collect the columns.
169 ///
170 /// \return A vector containing the IDs of all columns for the provided field ID.
171 std::vector<DescriptorId_t> GetColumnsByFieldId(DescriptorId_t fieldId) const;
172
173public:
174 RNTupleInspector(const RNTupleInspector &other) = delete;
178 ~RNTupleInspector() = default;
179
180 /////////////////////////////////////////////////////////////////////////////
181 /// \brief Create a new RNTupleInspector.
182 ///
183 /// \param[in] sourceNTuple A pointer to the RNTuple to be inspected.
184 ///
185 /// \return A pointer to the newly created RNTupleInspector.
186 ///
187 /// \note When this factory method is called, all required static information is collected from the RNTuple's fields
188 /// and underlying columns are collected at ones. This means that when any inconsistencies are encountered (e.g.
189 /// inconsistent compression across clusters), it will throw an error here.
190 static std::unique_ptr<RNTupleInspector> Create(RNTuple *sourceNTuple);
191
192 /////////////////////////////////////////////////////////////////////////////
193 /// \brief Create a new RNTupleInspector.
194 ///
195 /// \param[in] ntupleName The name of the RNTuple to be inspected.
196 /// \param[in] storage The path or URI to the RNTuple to be inspected.
197 ///
198 /// \see Create(RNTuple *sourceNTuple)
199 static std::unique_ptr<RNTupleInspector> Create(std::string_view ntupleName, std::string_view storage);
200
201 /////////////////////////////////////////////////////////////////////////////
202 /// \brief Get the descriptor for the RNTuple being inspected.
203 ///
204 /// \return A static copy of the RNTupleDescriptor belonging to the inspected RNTuple.
205 RNTupleDescriptor *GetDescriptor() const { return fDescriptor.get(); }
206
207 /////////////////////////////////////////////////////////////////////////////
208 /// \brief Get the compression settings of the RNTuple being inspected.
209 ///
210 /// \return The integer representation (\f$algorithm * 10 + level\f$, where \f$algorithm\f$ follows
211 /// ROOT::RCompressionSetting::ELevel::EValues) of the compression settings used for the inspected RNTuple.
212 ///
213 /// \note Here, we assume that the compression settings are consistent across all clusters and columns. If this is
214 /// not the case, an exception will be thrown when RNTupleInspector::Create is called.
216
217 /////////////////////////////////////////////////////////////////////////////
218 /// \brief Get a string describing compression settings of the RNTuple being inspected.
219 ///
220 /// \return A string describing the compression used for the inspected RNTuple. The format of the string is
221 /// `"A (level L)"`, where `A` is the name of the compression algorithm and `L` the compression level.
222 ///
223 /// \note Here, we assume that the compression settings are consistent across all clusters and columns. If this is
224 /// not the case, an exception will be thrown when RNTupleInspector::Create is called.
225 std::string GetCompressionSettingsAsString() const;
226
227 /////////////////////////////////////////////////////////////////////////////
228 /// \brief Get the compressed, on-disk size of the RNTuple being inspected.
229 ///
230 /// \return The compressed size of the inspected RNTuple, in bytes, excluding the size of the header and footer.
231 std::uint64_t GetCompressedSize() const { return fCompressedSize; }
232
233 /////////////////////////////////////////////////////////////////////////////
234 /// \brief Get the uncompressed total size of the RNTuple being inspected.
235 ///
236 /// \return The uncompressed size of the inspected RNTuple, in bytes, excluding the size of the header and footer.
237 std::uint64_t GetUncompressedSize() const { return fUncompressedSize; }
238
239 /////////////////////////////////////////////////////////////////////////////
240 /// \brief Get the compression factor of the RNTuple being inspected.
241 ///
242 /// \return The compression factor of the inspected RNTuple.
243 ///
244 /// The compression factor shows how well the data present in the RNTuple is compressed by the compression settings
245 /// that were used. The compression factor is calculated as \f$size_{uncompressed} / size_{compressed}\f$.
246 float GetCompressionFactor() const { return (float)fUncompressedSize / (float)fCompressedSize; }
247
248 /////////////////////////////////////////////////////////////////////////////
249 /// \brief Get storage information for a given column.
250 ///
251 /// \param[in] physicalColumnId The physical ID of the column for which to get the information.
252 ///
253 /// \return The storage information for the provided column.
254 const RColumnInspector &GetColumnInspector(DescriptorId_t physicalColumnId) const;
255
256 /////////////////////////////////////////////////////////////////////////////
257 /// \brief Get the number of columns of a given type present in the RNTuple.
258 ///
259 /// \param[in] colType The column type to count, as defined by ROOT::Experimental::EColumnType.
260 ///
261 /// \return The number of columns present in the inspected RNTuple of the provided type.
262 size_t GetColumnCountByType(EColumnType colType) const;
263
264 /////////////////////////////////////////////////////////////////////////////
265 /// \brief Get the IDs of all columns with the given type.
266 ///
267 /// \param[in] colType The column type to collect, as defined by ROOT::Experimental::EColumnType.
268 ///
269 /// \return A vector containing the physical IDs of columns of the provided type.
270 const std::vector<DescriptorId_t> GetColumnsByType(EColumnType colType);
271
272 /////////////////////////////////////////////////////////////////////////////
273 /// \brief Get all column types present in the RNTuple being inspected.
274 ///
275 /// \return A vector containing all column types present in the RNTuple.
276 const std::vector<EColumnType> GetColumnTypes();
277
278 /////////////////////////////////////////////////////////////////////////////
279 /// \brief Print storage information per column type.
280 ///
281 /// \param[in] format Whether to print the information as a (markdown-parseable) table or in CSV format.
282 /// \param[in] output Where to write the output to. Default is `stdout`.
283 ///
284 /// The output includes for each column type its count, the total number of elements, the compressed size and the
285 /// uncompressed size.
286 ///
287 /// **Example: printing the column type information of an RNTuple as a table**
288 /// ~~~ {.cpp}
289 /// #include <ROOT/RNTupleInspector.hxx>
290 /// using ROOT::Experimental::RNTupleInspector;
291 /// using ROOT::Experimental::ENTupleInspectorPrintFormat;
292 ///
293 /// auto inspector = RNTupleInspector::Create("myNTuple", "some/file.root");
294 /// inspector->PrintColumnTypeInfo();
295 /// ~~~
296 /// Output:
297 /// ~~~
298 /// column type | count | # elements | compressed bytes | uncompressed bytes
299 /// ----------------|---------|-----------------|-------------------|--------------------
300 /// SplitIndex64 | 2 | 150 | 72 | 1200
301 /// SplitReal32 | 4 | 300 | 189 | 1200
302 /// SplitUInt32 | 3 | 225 | 123 | 900
303 /// ~~~
304 ///
305 /// **Example: printing the column type information of an RNTuple in CSV format**
306 /// ~~~ {.cpp}
307 /// #include <ROOT/RNTupleInspector.hxx>
308 /// using ROOT::Experimental::RNTupleInspector;
309 /// using ROOT::Experimental::ENTupleInspectorPrintFormat;
310 ///
311 /// auto inspector = RNTupleInspector::Create("myNTuple", "some/file.root");
312 /// inspector->PrintColumnTypeInfo();
313 /// ~~~
314 /// Output:
315 /// ~~~
316 /// columnType,count,nElements,compressedSize,uncompressedSize
317 /// SplitIndex64,2,150,72,1200
318 /// SplitReal32,4,300,189,1200
319 /// SplitUInt32,3,225,123,900
320 /// ~~~
322 std::ostream &output = std::cout);
323
324 /////////////////////////////////////////////////////////////////////////////
325 /// \brief Get a histogram showing information for each column type present,
326 ///
327 /// \param[in] histKind Which type of information should be returned.
328 /// \param[in] histName The name of the histogram. An empty string means a default name will be used.
329 /// \param[in] histTitle The title of the histogram. An empty string means a default title will be used.
330 ///
331 /// \return A pointer to a `TH1D` containing the specified kind of information.
332 ///
333 /// Get a histogram showing the count, number of elements, size on disk, or size in memory for each column
334 /// type present in the inspected RNTuple.
335 std::unique_ptr<TH1D> GetColumnTypeInfoAsHist(ENTupleInspectorHist histKind, std::string_view histName = "",
336 std::string_view histTitle = "");
337
338 /////////////////////////////////////////////////////////////////////////////
339 /// \brief Get a histogram containing the size distribution of the compressed pages for an individual column.
340 ///
341 /// \param[in] physicalColumnId The physical ID of the column for which to get the page size distribution.
342 /// \param[in] histName The name of the histogram. An empty string means a default name will be used.
343 /// \param[in] histTitle The title of the histogram. An empty string means a default title will be used.
344 /// \param[in] nBins The desired number of histogram bins.
345 ///
346 /// \return A pointer to a `TH1D` containing the page size distribution.
347 ///
348 /// The x-axis will range from the smallest page size, to the largest (inclusive).
349 std::unique_ptr<TH1D> GetPageSizeDistribution(DescriptorId_t physicalColumnId, std::string histName = "",
350 std::string histTitle = "", size_t nBins = 64);
351
352 /////////////////////////////////////////////////////////////////////////////
353 /// \brief Get a histogram containing the size distribution of the compressed pages for all columns of a given type.
354 ///
355 /// \param[in] colType The column type for which to get the size distribution, as defined by
356 /// ROOT::Experimental::EColumnType.
357 /// \param[in] histName The name of the histogram. An empty string means a default name will be used.
358 /// \param[in] histTitle The title of the histogram. An empty string means a default title will be used.
359 /// \param[in] nBins The desired number of histogram bins.
360 ///
361 /// \return A pointer to a `TH1D` containing the page size distribution.
362 ///
363 /// The x-axis will range from the smallest page size, to the largest (inclusive).
364 std::unique_ptr<TH1D> GetPageSizeDistribution(EColumnType colType, std::string histName = "",
365 std::string histTitle = "", size_t nBins = 64);
366
367 /////////////////////////////////////////////////////////////////////////////
368 /// \brief Get a histogram containing the size distribution of the compressed pages for a collection columns.
369 ///
370 /// \param[in] colIds The physical IDs of the columns for which to get the page size distribution.
371 /// \param[in] histName The name of the histogram. An empty string means a default name will be used.
372 /// \param[in] histTitle The title of the histogram. An empty string means a default title will be used.
373 /// \param[in] nBins The desired number of histogram bins.
374 ///
375 /// \return A pointer to a `TH1D` containing the (cumulative) page size distribution.
376 ///
377 /// The x-axis will range from the smallest page size, to the largest (inclusive).
378 std::unique_ptr<TH1D> GetPageSizeDistribution(std::initializer_list<DescriptorId_t> colIds,
379 std::string histName = "", std::string histTitle = "",
380 size_t nBins = 64);
381
382 /////////////////////////////////////////////////////////////////////////////
383 /// \brief Get a histogram containing the size distribution of the compressed pages for all columns of a given list
384 /// of types.
385 ///
386 /// \param[in] colTypes The column types for which to get the size distribution, as defined by
387 /// ROOT::Experimental::EColumnType. The default is an empty vector, which indicates that the distribution for *all*
388 /// physical columns will be returned.
389 /// \param[in] histName The name of the histogram. An empty string means a default name will be used. The name of
390 /// each histogram inside the `THStack` will be `histName + colType`.
391 /// \param[in] histTitle The title of the histogram. An empty string means a default title will be used.
392 /// \param[in] nBins The desired number of histogram bins.
393 ///
394 /// \return A pointer to a `THStack` with one histogram for each column type.
395 ///
396 /// The x-axis will range from the smallest page size, to the largest (inclusive).
397 ///
398 /// **Example: Drawing a non-stacked page size distribution with a legend**
399 /// ~~~ {.cpp}
400 /// auto canvas = std::make_unique<TCanvas>();
401 /// auto inspector = RNTupleInspector::Create("myNTuple", "ntuple.root");
402 ///
403 /// // We want to show the page size distributions of columns with type `kSplitReal32` and `kSplitReal64`.
404 /// auto hist = inspector->GetPageSizeDistribution(
405 /// {ROOT::Experimental::EColumnType::kSplitReal32,
406 /// ROOT::Experimental::EColumnType::kSplitReal64});
407 /// // The "PLC" option automatically sets the line color for each histogram in the `THStack`.
408 /// // The "NOSTACK" option will draw the histograms on top of each other instead of stacked.
409 /// hist->DrawClone("PLC NOSTACK");
410 /// canvas->BuildLegend(0.7, 0.8, 0.89, 0.89);
411 /// canvas->DrawClone();
412 /// ~~~
413 std::unique_ptr<THStack> GetPageSizeDistribution(std::initializer_list<EColumnType> colTypes = {},
414 std::string histName = "", std::string histTitle = "",
415 size_t nBins = 64);
416
417 /////////////////////////////////////////////////////////////////////////////
418 /// \brief Get storage information for a given (sub)field by ID.
419 ///
420 /// \param[in] fieldId The ID of the (sub)field for which to get the information.
421 ///
422 /// \return The storage information inspector for the provided (sub)field tree.
423 const RFieldTreeInspector &GetFieldTreeInspector(DescriptorId_t fieldId) const;
424
425 /////////////////////////////////////////////////////////////////////////////
426 /// \brief Get a storage information inspector for a given (sub)field by name, including its subfields.
427 ///
428 /// \param[in] fieldName The name of the (sub)field for which to get the information.
429 ///
430 /// \return The storage information inspector for the provided (sub)field tree.
431 const RFieldTreeInspector &GetFieldTreeInspector(std::string_view fieldName) const;
432
433 /////////////////////////////////////////////////////////////////////////////
434 /// \brief Get the number of fields of a given type or class present in the RNTuple.
435 ///
436 /// \param[in] typeNamePattern The type or class name to count. May contain regular expression patterns for grouping
437 /// multiple kinds of types or classes.
438 /// \param[in] searchInSubFields If set to `false`, only top-level fields will be considered.
439 ///
440 /// \return The number of fields that matches the provided type.
441 size_t GetFieldCountByType(const std::regex &typeNamePattern, bool searchInSubFields = true) const;
442
443 /////////////////////////////////////////////////////////////////////////////
444 /// \brief Get the number of fields of a given type or class present in the RNTuple.
445 ///
446 /// \see GetFieldCountByType(const std::regex &typeNamePattern, bool searchInSubFields) const
447 size_t GetFieldCountByType(std::string_view typeNamePattern, bool searchInSubFields = true) const
448 {
449 return GetFieldCountByType(std::regex{std::string(typeNamePattern)}, searchInSubFields);
450 }
451
452 /////////////////////////////////////////////////////////////////////////////
453 /// \brief Get the IDs of (sub-)fields whose name matches the given string.
454 ///
455 /// \param[in] fieldNamePattern The name of the field name to get. Because field names are unique by design,
456 /// providing a single field name will return a vector containing just the ID of that field. However, regular
457 /// expression patterns are supported in order to get the IDs of all fields whose name follow a certain structure.
458 /// \param[in] searchInSubFields If set to `false`, only top-level fields will be considered.
459 ///
460 /// \return A vector containing the IDs of fields that match the provided name.
461 const std::vector<DescriptorId_t>
462 GetFieldsByName(const std::regex &fieldNamePattern, bool searchInSubFields = true) const;
463
464 /////////////////////////////////////////////////////////////////////////////
465 /// \brief Get the IDs of (sub-)fields whose name matches the given string.
466 ///
467 /// \see GetFieldsByName(const std::regex &fieldNamePattern, bool searchInSubFields) const
468 const std::vector<DescriptorId_t> GetFieldsByName(std::string_view fieldNamePattern, bool searchInSubFields = true)
469 {
470 return GetFieldsByName(std::regex{std::string(fieldNamePattern)}, searchInSubFields);
471 }
472};
473} // namespace Experimental
474} // namespace ROOT
475
476#endif // ROOT7_RNTupleInspector
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
Meta-data stored for every column of an ntuple.
Meta-data stored for every field of an ntuple.
The on-storage meta-data of an ntuple.
Provides column-level storage information.
RColumnInspector(const RColumnDescriptor &colDesc, const std::vector< std::uint64_t > &compressedPageSizes, std::uint32_t elemSize, std::uint64_t nElems)
const std::vector< std::uint64_t > fCompressedPageSizes
const std::vector< std::uint64_t > & GetCompressedPageSizes() const
RFieldTreeInspector(const RFieldDescriptor &fieldDesc, std::uint64_t onDiskSize, std::uint64_t inMemSize)
Inspect on-disk and storage-related information of an RNTuple.
const RColumnInspector & GetColumnInspector(DescriptorId_t physicalColumnId) const
Get storage information for a given column.
float GetCompressionFactor() const
Get the compression factor of the RNTuple being inspected.
std::unique_ptr< TH1D > GetPageSizeDistribution(EColumnType colType, std::string histName="", std::string histTitle="", size_t nBins=64)
Get a histogram containing the size distribution of the compressed pages for all columns of a given t...
const std::vector< DescriptorId_t > GetFieldsByName(const std::regex &fieldNamePattern, bool searchInSubFields=true) const
Get the IDs of (sub-)fields whose name matches the given string.
RNTupleInspector & operator=(RNTupleInspector &&other)=delete
std::unique_ptr< Internal::RPageSource > fPageSource
const std::vector< EColumnType > GetColumnTypes()
Get all column types present in the RNTuple being inspected.
RNTupleInspector(const RNTupleInspector &other)=delete
std::uint64_t GetCompressedSize() const
Get the compressed, on-disk size of the RNTuple being inspected.
std::uint64_t GetUncompressedSize() const
Get the uncompressed total size of the RNTuple being inspected.
size_t GetColumnCountByType(EColumnType colType) const
Get the number of columns of a given type present in the RNTuple.
RNTupleInspector(RNTupleInspector &&other)=delete
std::string GetCompressionSettingsAsString() const
Get a string describing compression settings of the RNTuple being inspected.
RNTupleDescriptor * GetDescriptor() const
Get the descriptor for the RNTuple being inspected.
static std::unique_ptr< RNTupleInspector > Create(RNTuple *sourceNTuple)
Create a new RNTupleInspector.
std::unique_ptr< TH1D > GetPageSizeDistribution(DescriptorId_t physicalColumnId, std::string histName="", std::string histTitle="", size_t nBins=64)
Get a histogram containing the size distribution of the compressed pages for an individual column.
size_t GetFieldCountByType(std::string_view typeNamePattern, bool searchInSubFields=true) const
Get the number of fields of a given type or class present in the RNTuple.
const std::vector< DescriptorId_t > GetColumnsByType(EColumnType colType)
Get the IDs of all columns with the given type.
void PrintColumnTypeInfo(ENTupleInspectorPrintFormat format=ENTupleInspectorPrintFormat::kTable, std::ostream &output=std::cout)
Print storage information per column type.
const std::vector< DescriptorId_t > GetFieldsByName(std::string_view fieldNamePattern, bool searchInSubFields=true)
Get the IDs of (sub-)fields whose name matches the given string.
RFieldTreeInspector CollectFieldTreeInfo(DescriptorId_t fieldId)
Recursively gather field-level information.
std::unique_ptr< RNTupleDescriptor > fDescriptor
RNTupleInspector & operator=(const RNTupleInspector &other)=delete
std::vector< DescriptorId_t > GetColumnsByFieldId(DescriptorId_t fieldId) const
Get the columns that make up the given field, including its subfields.
std::unordered_map< int, RFieldTreeInspector > fFieldTreeInfo
std::unique_ptr< THStack > GetPageSizeDistribution(std::initializer_list< EColumnType > colTypes={}, std::string histName="", std::string histTitle="", size_t nBins=64)
Get a histogram containing the size distribution of the compressed pages for all columns of a given l...
void CollectColumnInfo()
Gather column-level and RNTuple-level information.
int GetCompressionSettings() const
Get the compression settings of the RNTuple being inspected.
std::unordered_map< int, RColumnInspector > fColumnInfo
size_t GetFieldCountByType(const std::regex &typeNamePattern, bool searchInSubFields=true) const
Get the number of fields of a given type or class present in the RNTuple.
const RFieldTreeInspector & GetFieldTreeInspector(DescriptorId_t fieldId) const
Get storage information for a given (sub)field by ID.
std::unique_ptr< TH1D > GetColumnTypeInfoAsHist(ENTupleInspectorHist histKind, std::string_view histName="", std::string_view histTitle="")
Get a histogram showing information for each column type present,.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:61
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
static void output()