Logo ROOT  
Reference Guide
RRawFile.hxx
Go to the documentation of this file.
1// @(#)root/io:$Id$
2// Author: Jakob Blomer
3
4/*************************************************************************
5 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#ifndef ROOT_RRawFile
13#define ROOT_RRawFile
14
15#include <ROOT/RStringView.hxx>
16
17#include <cstddef>
18#include <cstdint>
19#include <memory>
20#include <string>
21
22namespace ROOT {
23namespace Internal {
24
25/**
26 * \class RRawFile RRawFile.hxx
27 * \ingroup IO
28 *
29 * The RRawFile provides read-only access to local and remote files. Data can be read either byte-wise or line-wise.
30 * The RRawFile base class provides line-wise access and buffering for byte-wise access. Derived classes provide the
31 * low-level read operations, e.g. from a local file system or from a web server. The RRawFile is used for non-ROOT
32 * RDataSource implementations and for RNTuple.
33 *
34 * Files are addressed by URL consisting of a transport protocol part and a location, like file:///path/to/data
35 * If the transport protocol part and the :// separator are missing, the default protocol is local file. Files are
36 * opened when required (on reading, getting file size) and closed on object destruction.
37 *
38 * RRawFiles manage system resources and are therefore made non-copyable. They can be explicitly cloned though.
39 *
40 * RRawFile objects are conditionally thread safe. See the user manual for further details:
41 * https://root.cern/manual/thread_safety/
42 */
43class RRawFile {
44public:
45 /// Derived classes do not necessarily need to provide file size information but they can return "not known" instead
46 static constexpr std::uint64_t kUnknownFileSize = std::uint64_t(-1);
47 /// kAuto detects the line break from the first line, kSystem picks the system's default
49
50 // Combination of flags provided by derived classes about the nature of the file
51 /// GetSize() does not return kUnknownFileSize
52 static constexpr int kFeatureHasSize = 0x01;
53 /// Map() and Unmap() are implemented
54 static constexpr int kFeatureHasMmap = 0x02;
55 /// File supports async IO
56 static constexpr int kFeatureHasAsyncIo = 0x04;
57
58 /// On construction, an ROptions parameter can customize the RRawFile behavior
59 struct ROptions {
61 /**
62 * Read at least fBlockSize bytes at a time. A value of zero turns off I/O buffering. A negative value indicates
63 * that the protocol-dependent default block size should be used.
64 */
67 };
68
69 /// Used for vector reads from multiple offsets into multiple buffers. This is unlike readv(), which scatters a
70 /// single byte range from disk into multiple buffers.
71 struct RIOVec {
72 /// The destination for reading
73 void *fBuffer = nullptr;
74 /// The file offset
75 std::uint64_t fOffset = 0;
76 /// The number of desired bytes
77 std::size_t fSize = 0;
78 /// The number of actually read bytes, set by ReadV()
79 std::size_t fOutBytes = 0;
80 };
81
82private:
83 /// Don't change without adapting ReadAt()
84 static constexpr unsigned int kNumBlockBuffers = 2;
85 struct RBlockBuffer {
86 /// Where in the open file does fBuffer start
87 std::uint64_t fBufferOffset;
88 /// The number of currently buffered bytes in fBuffer
90 /// Points into the I/O buffer with data from the file, not owned.
91 unsigned char *fBuffer;
92
94 RBlockBuffer(const RBlockBuffer &) = delete;
96 ~RBlockBuffer() = default;
97
98 /// Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copied.
99 size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset);
100 };
101 /// To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers
102 unsigned int fBlockBufferIdx;
103 /// An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in the file
105 /// Memory block containing the block buffers consecutively
106 unsigned char *fBufferSpace;
107 /// The cached file size
108 std::uint64_t fFileSize;
109 /// Files are opened lazily and only when required; the open state is kept by this flag
111
112protected:
113 std::string fUrl;
115 /// The current position in the file, which can be changed by Seek, Read, and Readln
116 std::uint64_t fFilePos;
117
118 /**
119 * OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize. If fOptions.fBlocksize
120 * is negative, derived classes are responsible to set a sensible value. After a call to OpenImpl(),
121 * fOptions.fBlocksize must be larger or equal to zero.
122 */
123 virtual void OpenImpl() = 0;
124 /**
125 * Derived classes should implement low-level reading without buffering. Short reads indicate the end of the file,
126 * therefore derived classes should return nbytes bytes if available.
127 */
128 virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset) = 0;
129 /// Derived classes should return the file size or kUnknownFileSize
130 virtual std::uint64_t GetSizeImpl() = 0;
131
132 /// If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented, too
133 /// The default implementation throws an error
134 virtual void *MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset);
135 /// Derived classes with mmap support must be able to unmap the memory area handed out by Map()
136 virtual void UnmapImpl(void *region, size_t nbytes);
137
138 /// By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX implementations
139 virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq);
140
141public:
142 RRawFile(std::string_view url, ROptions options);
143 RRawFile(const RRawFile &) = delete;
144 RRawFile &operator=(const RRawFile &) = delete;
145 virtual ~RRawFile();
146
147 /// Create a new RawFile that accesses the same resource. The file pointer is reset to zero.
148 virtual std::unique_ptr<RRawFile> Clone() const = 0;
149
150 /// Factory method that returns a suitable concrete implementation according to the transport in the url
151 static std::unique_ptr<RRawFile> Create(std::string_view url, ROptions options = ROptions());
152 /// Returns only the file location, e.g. "server/file" for http://server/file
153 static std::string GetLocation(std::string_view url);
154 /// Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file
155 static std::string GetTransport(std::string_view url);
156
157 /**
158 * Buffered read from a random position. Returns the actual number of bytes read.
159 * Short reads indicate the end of the file
160 */
161 size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset);
162 /// Read from fFilePos offset. Returns the actual number of bytes read.
163 size_t Read(void *buffer, size_t nbytes);
164 /// Change the cursor fFilePos
165 void Seek(std::uint64_t offset);
166 /// Returns the offset for the next Read/Readln call
167 std::uint64_t GetFilePos() const { return fFilePos; }
168 /// Returns the size of the file
169 std::uint64_t GetSize();
170 /// Returns the url of the file
171 std::string GetUrl() const;
172
173 /// Opens the file if necessary and calls ReadVImpl
174 void ReadV(RIOVec *ioVec, unsigned int nReq);
175
176 /// Memory mapping according to POSIX standard; in particular, new mappings of the same range replace older ones.
177 /// Mappings need to be aligned at page boundaries, therefore the real offset can be smaller than the desired value.
178 /// Users become owner of the address returned by Map() and are responsible for calling Unmap() with the full length.
179 void *Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset);
180 /// Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping
181 void Unmap(void *region, size_t nbytes);
182
183 /// Derived classes shall inform the user about the supported functionality, which can possibly depend
184 /// on the file at hand
185 virtual int GetFeatures() const = 0;
186
187 /// Read the next line starting from the current value of fFilePos. Returns false if the end of the file is reached.
188 bool Readln(std::string &line);
189}; // class RRawFile
190
191} // namespace Internal
192} // namespace ROOT
193
194#endif
std::string kAuto
Definition: RColor.cxx:37
The RRawFile provides read-only access to local and remote files.
Definition: RRawFile.hxx:43
virtual std::uint64_t GetSizeImpl()=0
Derived classes should return the file size or kUnknownFileSize.
unsigned char * fBufferSpace
Memory block containing the block buffers consecutively.
Definition: RRawFile.hxx:106
std::uint64_t GetFilePos() const
Returns the offset for the next Read/Readln call.
Definition: RRawFile.hxx:167
static std::string GetLocation(std::string_view url)
Returns only the file location, e.g. "server/file" for http://server/file.
Definition: RRawFile.cxx:113
unsigned int fBlockBufferIdx
To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers.
Definition: RRawFile.hxx:102
RRawFile(std::string_view url, ROptions options)
Definition: RRawFile.cxx:61
RRawFile & operator=(const RRawFile &)=delete
std::uint64_t fFilePos
The current position in the file, which can be changed by Seek, Read, and Readln.
Definition: RRawFile.hxx:116
virtual void * MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented,...
Definition: RRawFile.cxx:95
static constexpr int kFeatureHasMmap
Map() and Unmap() are implemented.
Definition: RRawFile.hxx:54
void Unmap(void *region, size_t nbytes)
Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping.
Definition: RRawFile.cxx:253
virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq)
By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX impleme...
Definition: RRawFile.cxx:101
static std::string GetTransport(std::string_view url)
Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file.
Definition: RRawFile.cxx:136
static constexpr unsigned int kNumBlockBuffers
Don't change without adapting ReadAt()
Definition: RRawFile.hxx:84
std::uint64_t GetSize()
Returns the size of the file.
Definition: RRawFile.cxx:121
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url.
Definition: RRawFile.cxx:73
void Seek(std::uint64_t offset)
Change the cursor fFilePos.
Definition: RRawFile.cxx:248
static constexpr std::uint64_t kUnknownFileSize
Derived classes do not necessarily need to provide file size information but they can return "not kno...
Definition: RRawFile.hxx:46
ELineBreaks
kAuto detects the line break from the first line, kSystem picks the system's default
Definition: RRawFile.hxx:48
size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset)
Buffered read from a random position.
Definition: RRawFile.cxx:161
bool fIsOpen
Files are opened lazily and only when required; the open state is kept by this flag.
Definition: RRawFile.hxx:110
virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset)=0
Derived classes should implement low-level reading without buffering.
bool Readln(std::string &line)
Read the next line starting from the current value of fFilePos. Returns false if the end of the file ...
Definition: RRawFile.cxx:215
RRawFile(const RRawFile &)=delete
virtual std::unique_ptr< RRawFile > Clone() const =0
Create a new RawFile that accesses the same resource. The file pointer is reset to zero.
void ReadV(RIOVec *ioVec, unsigned int nReq)
Opens the file if necessary and calls ReadVImpl.
Definition: RRawFile.cxx:207
static constexpr int kFeatureHasSize
GetSize() does not return kUnknownFileSize.
Definition: RRawFile.hxx:52
virtual void OpenImpl()=0
OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize.
size_t Read(void *buffer, size_t nbytes)
Read from fFilePos offset. Returns the actual number of bytes read.
Definition: RRawFile.cxx:154
static constexpr int kFeatureHasAsyncIo
File supports async IO.
Definition: RRawFile.hxx:56
std::string GetUrl() const
Returns the url of the file.
Definition: RRawFile.cxx:132
std::uint64_t fFileSize
The cached file size.
Definition: RRawFile.hxx:108
RBlockBuffer fBlockBuffers[kNumBlockBuffers]
An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in ...
Definition: RRawFile.hxx:104
virtual void UnmapImpl(void *region, size_t nbytes)
Derived classes with mmap support must be able to unmap the memory area handed out by Map()
Definition: RRawFile.cxx:108
virtual int GetFeatures() const =0
Derived classes shall inform the user about the supported functionality, which can possibly depend on...
void * Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
Memory mapping according to POSIX standard; in particular, new mappings of the same range replace old...
Definition: RRawFile.cxx:146
TLine * line
basic_string_view< char > string_view
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
std::uint64_t fBufferOffset
Where in the open file does fBuffer start.
Definition: RRawFile.hxx:87
RBlockBuffer(const RBlockBuffer &)=delete
unsigned char * fBuffer
Points into the I/O buffer with data from the file, not owned.
Definition: RRawFile.hxx:91
size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset)
Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copie...
Definition: RRawFile.cxx:46
RBlockBuffer & operator=(const RBlockBuffer &)=delete
size_t fBufferSize
The number of currently buffered bytes in fBuffer.
Definition: RRawFile.hxx:89
Used for vector reads from multiple offsets into multiple buffers.
Definition: RRawFile.hxx:71
std::size_t fOutBytes
The number of actually read bytes, set by ReadV()
Definition: RRawFile.hxx:79
std::size_t fSize
The number of desired bytes.
Definition: RRawFile.hxx:77
void * fBuffer
The destination for reading.
Definition: RRawFile.hxx:73
std::uint64_t fOffset
The file offset.
Definition: RRawFile.hxx:75
On construction, an ROptions parameter can customize the RRawFile behavior.
Definition: RRawFile.hxx:59
int fBlockSize
Read at least fBlockSize bytes at a time.
Definition: RRawFile.hxx:65