Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RMiniFile.hxx
Go to the documentation of this file.
1/// \file ROOT/RMiniFile.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-12-22
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RMiniFile
17#define ROOT7_RMiniFile
18
19#include <ROOT/RError.hxx>
20#include <ROOT/RNTuple.hxx>
22#include <ROOT/RSpan.hxx>
23#include <string_view>
24
25#include <cstdint>
26#include <cstdio>
27#include <memory>
28#include <string>
29
30class TCollection;
31class TFile;
32class TFileMergeInfo;
34
35namespace ROOT {
36
37namespace Internal {
38class RRawFile;
39}
40
41namespace Experimental {
42
43class RNTupleWriteOptions;
44
45namespace Internal {
46/// Holds status information of an open ROOT file during writing
47struct RTFileControlBlock;
48
49// clang-format off
50/**
51\class ROOT::Experimental::Internal::RMiniFileReader
52\ingroup NTuple
53\brief Read RNTuple data blocks from a TFile container, provided by a RRawFile
54
55A RRawFile is used for the byte access. The class implements a minimal subset of TFile, enough to extract
56RNTuple data keys.
57*/
58// clang-format on
60private:
61 /// The raw file used to read byte ranges
63 /// Indicates whether the file is a TFile container or an RNTuple bare file
64 bool fIsBare = false;
65 /// If `fMaxKeySize > 0` and ReadBuffer attempts to read `nbytes > maxKeySize`, it will assume the
66 /// blob being read is chunked and read all the chunks into the buffer. This is symmetrical to
67 /// what happens in `RNTupleFileWriter::WriteBlob()`.
68 std::uint64_t fMaxKeySize = 0;
69
70 /// Used when the file container turns out to be a bare file
71 RResult<RNTuple> GetNTupleBare(std::string_view ntupleName);
72 /// Used when the file turns out to be a TFile container
73 RResult<RNTuple> GetNTupleProper(std::string_view ntupleName);
74
75 RNTuple CreateAnchor(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor,
76 std::uint16_t versionPatch, std::uint64_t seekHeader, std::uint64_t nbytesHeader,
77 std::uint64_t lenHeader, std::uint64_t seekFooter, std::uint64_t nbytesFooter,
78 std::uint64_t lenFooter, std::uint64_t maxKeySize);
79
80public:
81 RMiniFileReader() = default;
82 /// Uses the given raw file to read byte ranges
84 /// Extracts header and footer location for the RNTuple identified by ntupleName
85 RResult<RNTuple> GetNTuple(std::string_view ntupleName);
86 /// Reads a given byte range from the file into the provided memory buffer.
87 /// If `nbytes > fMaxKeySize` it will perform chunked read from multiple blobs,
88 /// whose addresses are listed at the end of the first chunk.
89 void ReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset);
90
91 std::uint64_t GetMaxKeySize() const { return fMaxKeySize; }
92};
93
94// clang-format off
95/**
96\class ROOT::Experimental::Internal::RNTupleFileWriter
97\ingroup NTuple
98\brief Write RNTuple data blocks in a TFile or a bare file container
99
100The writer can create a new TFile container for an RNTuple or add an RNTuple to an existing TFile.
101Creating a single RNTuple in a new TFile container can be done with a C file stream without a TFile class.
102Updating an existing TFile requires a proper TFile object. Also, writing a remote file requires a proper TFile object.
103A stand-alone version of RNTuple can remove the TFile based writer.
104*/
105// clang-format on
107private:
108 struct RFileProper {
109 TFile *fFile = nullptr;
110 /// Low-level writing using a TFile
111 void Write(const void *buffer, size_t nbytes, std::int64_t offset);
112 /// Writes an RBlob opaque key with the provided buffer as data record and returns the offset of the record
113 std::uint64_t WriteKey(const void *buffer, size_t nbytes, size_t len);
114 operator bool() const { return fFile; }
115 };
116
117 struct RFileSimple {
118 /// Direct I/O requires that all buffers and write lengths are aligned. It seems 512 byte alignment is the minimum
119 /// for Direct I/O to work, but further testing showed that it results in worse performance than 4kB.
120 static constexpr int kBlockAlign = 4096;
121 /// During commit, WriteTFileKeysList() updates fNBytesKeys and fSeekKeys of the RTFFile located at
122 /// fSeekFileRecord. Given that the TFile key starts at offset 100 and the file name, which is written twice,
123 /// is shorter than 255 characters, we should need at most ~600 bytes. However, the header also needs to be
124 /// aligned to kBlockAlign...
125 static constexpr std::size_t kHeaderBlockSize = 4096;
126 /// Testing suggests that 4MiB gives best performance at a reasonable memory consumption.
127 static constexpr std::size_t kBlockSize = 4 * 1024 * 1024;
128
129 // fHeaderBlock and fBlock are raw pointers because we have to manually call operator new and delete.
130 unsigned char *fHeaderBlock;
131 std::uint64_t fBlockOffset = 0;
132 unsigned char *fBlock;
133
134 /// For the simplest cases, a C file stream can be used for writing
135 FILE *fFile = nullptr;
136 /// Whether the C file stream has been opened with Direct I/O, introducing alignment requirements.
137 bool fDirectIO = false;
138 /// Keeps track of the seek offset
139 std::uint64_t fFilePos = 0;
140 /// Keeps track of the next key offset
141 std::uint64_t fKeyOffset = 0;
142 /// Keeps track of TFile control structures, which need to be updated on committing the data set
143 std::unique_ptr<ROOT::Experimental::Internal::RTFileControlBlock> fControlBlock;
144
145 RFileSimple();
146 RFileSimple(const RFileSimple &other) = delete;
147 RFileSimple(RFileSimple &&other) = delete;
148 RFileSimple &operator=(const RFileSimple &other) = delete;
150 ~RFileSimple();
151
152 void Flush();
153
154 /// Writes bytes in the open stream, either at fFilePos or at the given offset
155 void Write(const void *buffer, size_t nbytes, std::int64_t offset = -1);
156 /// Writes a TKey including the data record, given by buffer, into fFile; returns the file offset to the payload.
157 /// The payload is already compressed
158 std::uint64_t WriteKey(const void *buffer, std::size_t nbytes, std::size_t len, std::int64_t offset = -1,
159 std::uint64_t directoryOffset = 100, const std::string &className = "",
160 const std::string &objectName = "", const std::string &title = "");
161 operator bool() const { return fFile; }
162 };
163
164 // TODO(jblomer): wrap in an std::variant with C++17
165 /// For updating existing files and for storing more than just an RNTuple in the file
167 /// For simple use cases, survives without libRIO dependency
169 /// A simple file can either be written as TFile container or as NTuple bare file
170 bool fIsBare = false;
171 /// The identifier of the RNTuple; A single writer object can only write a single RNTuple but multiple
172 /// writers can operate on the same file if (and only if) they use a proper TFile object for writing.
173 std::string fNTupleName;
174 /// The file name without parent directory; only required when writing with a C file stream
175 std::string fFileName;
176 /// Header and footer location of the ntuple, written on Commit()
178 /// Set of streamer info records that should be written to the file.
179 /// The RNTuple class description is always present.
181
182 explicit RNTupleFileWriter(std::string_view name, std::uint64_t maxKeySize);
183
184 /// For a TFile container written by a C file stream, write the header and TFile object
185 void WriteTFileSkeleton(int defaultCompression);
186 /// The only key that will be visible in file->ls()
187 void WriteTFileNTupleKey();
188 /// Write the TList with the RNTuple key
189 void WriteTFileKeysList();
190 /// Write the compressed streamer info record with the description of the RNTuple class
192 /// Last record in the file
193 void WriteTFileFreeList();
194 /// For a bare file, which is necessarily written by a C file stream, write file header
195 void WriteBareFileSkeleton(int defaultCompression);
196
197public:
198 /// For testing purposes, RNTuple data can be written into a bare file container instead of a ROOT file
199 enum class EContainerFormat {
200 kTFile, // ROOT TFile
201 kBare, // A thin envelope supporting a single RNTuple only
202 };
203
204 /// Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName.
205 /// Uses a C stream for writing
206 static std::unique_ptr<RNTupleFileWriter> Recreate(std::string_view ntupleName, std::string_view path,
207 EContainerFormat containerFormat,
208 const RNTupleWriteOptions &options);
209 /// Add a new RNTuple identified by ntupleName to the existing TFile.
210 static std::unique_ptr<RNTupleFileWriter> Append(std::string_view ntupleName, TFile &file, std::uint64_t maxKeySize);
211
212 RNTupleFileWriter(const RNTupleFileWriter &other) = delete;
217
218 /// Writes the compressed header and registeres its location; lenHeader is the size of the uncompressed header.
219 std::uint64_t WriteNTupleHeader(const void *data, size_t nbytes, size_t lenHeader);
220 /// Writes the compressed footer and registeres its location; lenFooter is the size of the uncompressed footer.
221 std::uint64_t WriteNTupleFooter(const void *data, size_t nbytes, size_t lenFooter);
222 /// Writes a new record as an RBlob key into the file
223 std::uint64_t WriteBlob(const void *data, size_t nbytes, size_t len);
224 /// Reserves a new record as an RBlob key in the file.
225 std::uint64_t ReserveBlob(size_t nbytes, size_t len);
226 /// Write into a reserved record; the caller is responsible for making sure that the written byte range is in the
227 /// previously reserved key.
228 void WriteIntoReservedBlob(const void *buffer, size_t nbytes, std::int64_t offset);
229 /// Ensures that the streamer info records passed as argument are written to the file
231 /// Writes the RNTuple key to the file so that the header and footer keys can be found
232 void Commit();
233};
234
235} // namespace Internal
236} // namespace Experimental
237} // namespace ROOT
238
239#endif
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
char name[80]
Definition TGX11.cxx:110
Read RNTuple data blocks from a TFile container, provided by a RRawFile.
Definition RMiniFile.hxx:59
bool fIsBare
Indicates whether the file is a TFile container or an RNTuple bare file.
Definition RMiniFile.hxx:64
RResult< RNTuple > GetNTuple(std::string_view ntupleName)
Extracts header and footer location for the RNTuple identified by ntupleName.
RResult< RNTuple > GetNTupleProper(std::string_view ntupleName)
Used when the file turns out to be a TFile container.
std::uint64_t fMaxKeySize
If fMaxKeySize > 0 and ReadBuffer attempts to read nbytes > maxKeySize, it will assume the blob being...
Definition RMiniFile.hxx:68
RNTuple CreateAnchor(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor, std::uint16_t versionPatch, std::uint64_t seekHeader, std::uint64_t nbytesHeader, std::uint64_t lenHeader, std::uint64_t seekFooter, std::uint64_t nbytesFooter, std::uint64_t lenFooter, std::uint64_t maxKeySize)
ROOT::Internal::RRawFile * fRawFile
The raw file used to read byte ranges.
Definition RMiniFile.hxx:62
RResult< RNTuple > GetNTupleBare(std::string_view ntupleName)
Used when the file container turns out to be a bare file.
void ReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset)
Reads a given byte range from the file into the provided memory buffer.
Write RNTuple data blocks in a TFile or a bare file container.
std::uint64_t WriteBlob(const void *data, size_t nbytes, size_t len)
Writes a new record as an RBlob key into the file.
std::string fNTupleName
The identifier of the RNTuple; A single writer object can only write a single RNTuple but multiple wr...
std::string fFileName
The file name without parent directory; only required when writing with a C file stream.
std::uint64_t WriteNTupleFooter(const void *data, size_t nbytes, size_t lenFooter)
Writes the compressed footer and registeres its location; lenFooter is the size of the uncompressed f...
static std::unique_ptr< RNTupleFileWriter > Append(std::string_view ntupleName, TFile &file, std::uint64_t maxKeySize)
Add a new RNTuple identified by ntupleName to the existing TFile.
void WriteTFileKeysList()
Write the TList with the RNTuple key.
void UpdateStreamerInfos(const RNTupleSerializer::StreamerInfoMap_t &streamerInfos)
Ensures that the streamer info records passed as argument are written to the file.
void Commit()
Writes the RNTuple key to the file so that the header and footer keys can be found.
RFileProper fFileProper
For updating existing files and for storing more than just an RNTuple in the file.
std::uint64_t ReserveBlob(size_t nbytes, size_t len)
Reserves a new record as an RBlob key in the file.
RFileSimple fFileSimple
For simple use cases, survives without libRIO dependency.
RNTupleFileWriter(const RNTupleFileWriter &other)=delete
void WriteTFileNTupleKey()
The only key that will be visible in file->ls()
void WriteTFileFreeList()
Last record in the file.
EContainerFormat
For testing purposes, RNTuple data can be written into a bare file container instead of a ROOT file.
RNTupleFileWriter & operator=(const RNTupleFileWriter &other)=delete
bool fIsBare
A simple file can either be written as TFile container or as NTuple bare file.
static std::unique_ptr< RNTupleFileWriter > Recreate(std::string_view ntupleName, std::string_view path, EContainerFormat containerFormat, const RNTupleWriteOptions &options)
Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName.
void WriteTFileSkeleton(int defaultCompression)
For a TFile container written by a C file stream, write the header and TFile object.
void WriteBareFileSkeleton(int defaultCompression)
For a bare file, which is necessarily written by a C file stream, write file header.
void WriteTFileStreamerInfo()
Write the compressed streamer info record with the description of the RNTuple class.
std::uint64_t WriteNTupleHeader(const void *data, size_t nbytes, size_t lenHeader)
Writes the compressed header and registeres its location; lenHeader is the size of the uncompressed h...
RNTuple fNTupleAnchor
Header and footer location of the ntuple, written on Commit()
RNTupleFileWriter(RNTupleFileWriter &&other)=delete
RNTupleFileWriter & operator=(RNTupleFileWriter &&other)=delete
void WriteIntoReservedBlob(const void *buffer, size_t nbytes, std::int64_t offset)
Write into a reserved record; the caller is responsible for making sure that the written byte range i...
RNTupleSerializer::StreamerInfoMap_t fStreamerInfoMap
Set of streamer info records that should be written to the file.
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
Common user-tunable settings for storing ntuples.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:61
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:194
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
Collection abstract base class.
Definition TCollection.h:65
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:53
Abstract Interface class describing Streamer information for one class.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
void Write(const void *buffer, size_t nbytes, std::int64_t offset)
Low-level writing using a TFile.
std::uint64_t WriteKey(const void *buffer, size_t nbytes, size_t len)
Writes an RBlob opaque key with the provided buffer as data record and returns the offset of the reco...
static constexpr std::size_t kBlockSize
Testing suggests that 4MiB gives best performance at a reasonable memory consumption.
bool fDirectIO
Whether the C file stream has been opened with Direct I/O, introducing alignment requirements.
std::unique_ptr< ROOT::Experimental::Internal::RTFileControlBlock > fControlBlock
Keeps track of TFile control structures, which need to be updated on committing the data set.
FILE * fFile
For the simplest cases, a C file stream can be used for writing.
RFileSimple & operator=(const RFileSimple &other)=delete
std::uint64_t fKeyOffset
Keeps track of the next key offset.
static constexpr int kBlockAlign
Direct I/O requires that all buffers and write lengths are aligned.
void Write(const void *buffer, size_t nbytes, std::int64_t offset=-1)
Writes bytes in the open stream, either at fFilePos or at the given offset.
static constexpr std::size_t kHeaderBlockSize
During commit, WriteTFileKeysList() updates fNBytesKeys and fSeekKeys of the RTFFile located at fSeek...
std::uint64_t fFilePos
Keeps track of the seek offset.
std::uint64_t WriteKey(const void *buffer, std::size_t nbytes, std::size_t len, std::int64_t offset=-1, std::uint64_t directoryOffset=100, const std::string &className="", const std::string &objectName="", const std::string &title="")
Writes a TKey including the data record, given by buffer, into fFile; returns the file offset to the ...
RFileSimple & operator=(RFileSimple &&other)=delete