Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RRawFile.hxx
Go to the documentation of this file.
1// @(#)root/io:$Id$
2// Author: Jakob Blomer
3
4/*************************************************************************
5 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#ifndef ROOT_RRawFile
13#define ROOT_RRawFile
14
15#include <string_view>
16
17#include <cstddef>
18#include <cstdint>
19#include <memory>
20#include <string>
21
22namespace ROOT {
23namespace Internal {
24
25/**
26 * \class RRawFile RRawFile.hxx
27 * \ingroup IO
28 *
29 * The RRawFile provides read-only access to local and remote files. Data can be read either byte-wise or line-wise.
30 * The RRawFile base class provides line-wise access and buffering for byte-wise access. Derived classes provide the
31 * low-level read operations, e.g. from a local file system or from a web server. The RRawFile is used for non-ROOT
32 * RDataSource implementations and for RNTuple.
33 *
34 * Files are addressed by URL consisting of a transport protocol part and a location, like file:///path/to/data
35 * If the transport protocol part and the :// separator are missing, the default protocol is local file. Files are
36 * opened when required (on reading, getting file size) and closed on object destruction.
37 *
38 * RRawFiles manage system resources and are therefore made non-copyable. They can be explicitly cloned though.
39 *
40 * RRawFile objects are conditionally thread safe. See the user manual for further details:
41 * https://root.cern/manual/thread_safety/
42 */
43class RRawFile {
44public:
45 /// kAuto detects the line break from the first line, kSystem picks the system's default
47
48 /// On construction, an ROptions parameter can customize the RRawFile behavior
49 struct ROptions {
50 static constexpr size_t kUseDefaultBlockSize = std::size_t(-1); ///< Use protocol-dependent default block size
51
53 /// Read at least fBlockSize bytes at a time. A value of zero turns off I/O buffering.
55 // Define an empty constructor to work around a bug in Clang: https://github.com/llvm/llvm-project/issues/36032
57 };
58
59 /// Used for vector reads from multiple offsets into multiple buffers. This is unlike readv(), which scatters a
60 /// single byte range from disk into multiple buffers.
61 struct RIOVec {
62 /// The destination for reading
63 void *fBuffer = nullptr;
64 /// The file offset
65 std::uint64_t fOffset = 0;
66 /// The number of desired bytes
67 std::size_t fSize = 0;
68 /// The number of actually read bytes, set by ReadV()
69 std::size_t fOutBytes = 0;
70 };
71
72 /// Implementations may enforce limits on the use of vector reads. These limits can depend on the server or
73 /// the specific file opened and can be queried per RRawFile object through GetReadVLimits().
74 /// Note that due to such limits, a vector read with a single request can behave differently from a Read() call.
75 struct RIOVecLimits {
76 /// Maximum number of elements in a ReadV request vector
77 std::size_t fMaxReqs = static_cast<std::size_t>(-1);
78 /// Maximum size in bytes of any single request in the request vector
79 std::size_t fMaxSingleSize = static_cast<std::size_t>(-1);
80 /// Maximum size in bytes of the sum of requests in the vector
81 std::uint64_t fMaxTotalSize = static_cast<std::uint64_t>(-1);
82
83 bool HasReqsLimit() const { return fMaxReqs != static_cast<std::size_t>(-1); }
84 bool HasSizeLimit() const
85 {
86 return fMaxSingleSize != static_cast<std::size_t>(-1) || fMaxTotalSize != static_cast<std::uint64_t>(-1);
87 }
88 };
89
90private:
91 /// Don't change without adapting ReadAt()
92 static constexpr unsigned int kNumBlockBuffers = 2;
93 struct RBlockBuffer {
94 /// Where in the open file does fBuffer start
95 std::uint64_t fBufferOffset = 0;
96 /// The number of currently buffered bytes in fBuffer
97 size_t fBufferSize = 0;
98 /// Points into the I/O buffer with data from the file, not owned.
99 unsigned char *fBuffer = nullptr;
100
101 RBlockBuffer() = default;
102 RBlockBuffer(const RBlockBuffer &) = delete;
104 ~RBlockBuffer() = default;
105
106 /// Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copied.
107 size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset);
108 };
109 /// To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers
110 unsigned int fBlockBufferIdx = 0;
111 /// An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in the file
113 /// Memory block containing the block buffers consecutively
114 std::unique_ptr<unsigned char[]> fBufferSpace;
115 /// Used as a marker that the file size was not yet queried
116 static constexpr std::uint64_t kUnknownFileSize = std::uint64_t(-1);
117 /// The cached file size
119 /// Files are opened lazily and only when required; the open state is kept by this flag
120 bool fIsOpen = false;
121 /// Runtime switch to decide if reads are buffered or directly sent to ReadAtImpl()
122 bool fIsBuffering = true;
123
124protected:
125 std::string fUrl;
127 /// The current position in the file, which can be changed by Seek, Read, and Readln
128 std::uint64_t fFilePos = 0;
129
130 /// OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize. If fOptions.fBlocksize
131 /// is negative, derived classes are responsible to set a sensible value. After a call to OpenImpl(),
132 /// fOptions.fBlocksize must be larger or equal to zero.
133 virtual void OpenImpl() = 0;
134 /// Derived classes should implement low-level reading without buffering. Short reads indicate the end of the file,
135 /// therefore derived classes should return nbytes bytes if available.
136 virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset) = 0;
137 /// Derived classes should return the file size
138 virtual std::uint64_t GetSizeImpl() = 0;
139
140 /// By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX implementations
141 virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq);
142
143 /// Open the file if not already open. Otherwise noop.
144 void EnsureOpen();
145
146public:
147 RRawFile(std::string_view url, ROptions options);
148 RRawFile(const RRawFile &) = delete;
149 RRawFile &operator=(const RRawFile &) = delete;
150 virtual ~RRawFile() = default;
151
152 /// Create a new RawFile that accesses the same resource. The file pointer is reset to zero.
153 virtual std::unique_ptr<RRawFile> Clone() const = 0;
154
155 /// Factory method that returns a suitable concrete implementation according to the transport in the url
156 static std::unique_ptr<RRawFile> Create(std::string_view url, ROptions options = ROptions());
157 /// Returns only the file location, e.g. "server/file" for http://server/file
158 static std::string GetLocation(std::string_view url);
159 /// Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file
160 static std::string GetTransport(std::string_view url);
161
162 /// Buffered read from a random position. Returns the actual number of bytes read.
163 /// Short reads indicate the end of the file
164 size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset);
165 /// Read from fFilePos offset. Returns the actual number of bytes read.
166 size_t Read(void *buffer, size_t nbytes);
167 /// Change the cursor fFilePos
168 void Seek(std::uint64_t offset);
169 /// Returns the offset for the next Read/Readln call
170 std::uint64_t GetFilePos() const { return fFilePos; }
171 /// Returns the size of the file
172 std::uint64_t GetSize();
173 /// Returns the url of the file
174 std::string GetUrl() const;
175
176 /// Opens the file if necessary and calls ReadVImpl
177 void ReadV(RIOVec *ioVec, unsigned int nReq);
178 /// Returns the limits regarding the ioVec input to ReadV for this specific file; may open the file as a side-effect.
180
181 /// Turn off buffered reads; all scalar read requests go directly to the implementation. Buffering can be turned
182 /// back on.
183 void SetBuffering(bool value);
184 bool IsBuffering() const { return fIsBuffering; }
185
186 /// Read the next line starting from the current value of fFilePos. Returns false if the end of the file is reached.
187 bool Readln(std::string &line);
188
189 /// Once opened, the file stay open until destruction of the RRawFile object
190 bool IsOpen() const { return fIsOpen; }
191}; // class RRawFile
192
193} // namespace Internal
194} // namespace ROOT
195
196#endif
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
virtual std::uint64_t GetSizeImpl()=0
Derived classes should return the file size.
std::uint64_t GetFilePos() const
Returns the offset for the next Read/Readln call.
Definition RRawFile.hxx:170
static std::string GetLocation(std::string_view url)
Returns only the file location, e.g. "server/file" for http://server/file.
Definition RRawFile.cxx:106
unsigned int fBlockBufferIdx
To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers.
Definition RRawFile.hxx:110
RRawFile & operator=(const RRawFile &)=delete
std::uint64_t fFilePos
The current position in the file, which can be changed by Seek, Read, and Readln.
Definition RRawFile.hxx:128
virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq)
By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX impleme...
Definition RRawFile.cxx:99
static std::string GetTransport(std::string_view url)
Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file.
Definition RRawFile.cxx:128
static constexpr unsigned int kNumBlockBuffers
Don't change without adapting ReadAt()
Definition RRawFile.hxx:92
std::uint64_t GetSize()
Returns the size of the file.
Definition RRawFile.cxx:114
bool fIsBuffering
Runtime switch to decide if reads are buffered or directly sent to ReadAtImpl()
Definition RRawFile.hxx:122
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url.
Definition RRawFile.cxx:64
void Seek(std::uint64_t offset)
Change the cursor fFilePos.
Definition RRawFile.cxx:240
static constexpr std::uint64_t kUnknownFileSize
Used as a marker that the file size was not yet queried.
Definition RRawFile.hxx:116
virtual RIOVecLimits GetReadVLimits()
Returns the limits regarding the ioVec input to ReadV for this specific file; may open the file as a ...
Definition RRawFile.hxx:179
ELineBreaks
kAuto detects the line break from the first line, kSystem picks the system's default
Definition RRawFile.hxx:46
size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset)
Buffered read from a random position.
Definition RRawFile.cxx:145
bool fIsOpen
Files are opened lazily and only when required; the open state is kept by this flag.
Definition RRawFile.hxx:120
virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset)=0
Derived classes should implement low-level reading without buffering.
bool Readln(std::string &line)
Read the next line starting from the current value of fFilePos. Returns false if the end of the file ...
Definition RRawFile.cxx:207
RRawFile(const RRawFile &)=delete
void EnsureOpen()
Open the file if not already open. Otherwise noop.
Definition RRawFile.cxx:90
bool IsOpen() const
Once opened, the file stay open until destruction of the RRawFile object.
Definition RRawFile.hxx:190
virtual std::unique_ptr< RRawFile > Clone() const =0
Create a new RawFile that accesses the same resource. The file pointer is reset to zero.
std::unique_ptr< unsigned char[]> fBufferSpace
Memory block containing the block buffers consecutively.
Definition RRawFile.hxx:114
void ReadV(RIOVec *ioVec, unsigned int nReq)
Opens the file if necessary and calls ReadVImpl.
Definition RRawFile.cxx:194
virtual void OpenImpl()=0
OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize.
size_t Read(void *buffer, size_t nbytes)
Read from fFilePos offset. Returns the actual number of bytes read.
Definition RRawFile.cxx:138
void SetBuffering(bool value)
Turn off buffered reads; all scalar read requests go directly to the implementation.
Definition RRawFile.cxx:200
std::string GetUrl() const
Returns the url of the file.
Definition RRawFile.cxx:124
std::uint64_t fFileSize
The cached file size.
Definition RRawFile.hxx:118
RBlockBuffer fBlockBuffers[kNumBlockBuffers]
An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in ...
Definition RRawFile.hxx:112
virtual ~RRawFile()=default
TLine * line
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
std::uint64_t fBufferOffset
Where in the open file does fBuffer start.
Definition RRawFile.hxx:95
RBlockBuffer(const RBlockBuffer &)=delete
unsigned char * fBuffer
Points into the I/O buffer with data from the file, not owned.
Definition RRawFile.hxx:99
size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset)
Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copie...
Definition RRawFile.cxx:46
RBlockBuffer & operator=(const RBlockBuffer &)=delete
size_t fBufferSize
The number of currently buffered bytes in fBuffer.
Definition RRawFile.hxx:97
Implementations may enforce limits on the use of vector reads.
Definition RRawFile.hxx:75
std::size_t fMaxSingleSize
Maximum size in bytes of any single request in the request vector.
Definition RRawFile.hxx:79
std::uint64_t fMaxTotalSize
Maximum size in bytes of the sum of requests in the vector.
Definition RRawFile.hxx:81
std::size_t fMaxReqs
Maximum number of elements in a ReadV request vector.
Definition RRawFile.hxx:77
Used for vector reads from multiple offsets into multiple buffers.
Definition RRawFile.hxx:61
std::size_t fOutBytes
The number of actually read bytes, set by ReadV()
Definition RRawFile.hxx:69
std::size_t fSize
The number of desired bytes.
Definition RRawFile.hxx:67
void * fBuffer
The destination for reading.
Definition RRawFile.hxx:63
std::uint64_t fOffset
The file offset.
Definition RRawFile.hxx:65
On construction, an ROptions parameter can customize the RRawFile behavior.
Definition RRawFile.hxx:49
static constexpr size_t kUseDefaultBlockSize
Use protocol-dependent default block size.
Definition RRawFile.hxx:50
size_t fBlockSize
Read at least fBlockSize bytes at a time. A value of zero turns off I/O buffering.
Definition RRawFile.hxx:54