Logo ROOT  
Reference Guide
RRawFile.hxx
Go to the documentation of this file.
1// @(#)root/io:$Id$
2// Author: Jakob Blomer
3
4/*************************************************************************
5 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#ifndef ROOT_RRawFile
13#define ROOT_RRawFile
14
15#include <ROOT/RStringView.hxx>
16
17#include <cstddef>
18#include <cstdint>
19#include <memory>
20#include <string>
21
22namespace ROOT {
23namespace Internal {
24
25/**
26 * \class RRawFile RRawFile.hxx
27 * \ingroup IO
28 *
29 * The RRawFile provides read-only access to local and remote files. Data can be read either byte-wise or line-wise.
30 * The RRawFile base class provides line-wise access and buffering for byte-wise access. Derived classes provide the
31 * low-level read operations, e.g. from a local file system or from a web server. The RRawFile is used for non-ROOT
32 * RDataSource implementations and for RNTuple.
33 *
34 * Files are addressed by URL consisting of a transport protocol part and a location, like file:///path/to/data
35 * If the transport protocol part and the :// separator are missing, the default protocol is local file. Files are
36 * opened when required (on reading, getting file size) and closed on object destruction.
37 *
38 * RRawFiles manage system respources and are therefore made non-copyable. They can be explicitly cloned though.
39 */
40class RRawFile {
41public:
42 /// Derived classes do not necessarily need to provide file size information but they can return "not known" instead
43 static constexpr std::uint64_t kUnknownFileSize = std::uint64_t(-1);
44 /// kAuto detects the line break from the first line, kSystem picks the system's default
45 enum class ELineBreaks { kAuto, kSystem, kUnix, kWindows };
46
47 // Combination of flags provided by derived classes about the nature of the file
48 /// GetSize() does not return kUnknownFileSize
49 static constexpr int kFeatureHasSize = 0x01;
50 /// Map() and Unmap() are implemented
51 static constexpr int kFeatureHasMmap = 0x02;
52
53 /// On construction, an ROptions parameter can customize the RRawFile behavior
54 struct ROptions {
56 /**
57 * Read at least fBlockSize bytes at a time. A value of zero turns off I/O buffering. A negative value indicates
58 * that the protocol-dependent default block size should be used.
59 */
62 };
63
64 /// Used for vector reads from multiple offsets into multiple buffers. This is unlike readv(), which scatters a
65 /// single byte range from disk into multiple buffers.
66 struct RIOVec {
67 /// The destination for reading
68 void *fBuffer = nullptr;
69 /// The file offset
70 std::uint64_t fOffset = 0;
71 /// The number of desired bytes
72 std::size_t fSize = 0;
73 /// The number of actually read bytes, set by ReadV()
74 std::size_t fOutBytes = 0;
75 };
76
77private:
78 /// Don't change without adapting ReadAt()
79 static constexpr unsigned int kNumBlockBuffers = 2;
80 struct RBlockBuffer {
81 /// Where in the open file does fBuffer start
82 std::uint64_t fBufferOffset;
83 /// The number of currently buffered bytes in fBuffer
85 /// Points into the I/O buffer with data from the file, not owned.
86 unsigned char *fBuffer;
87
89 RBlockBuffer(const RBlockBuffer &) = delete;
91 ~RBlockBuffer() = default;
92
93 /// Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copied.
94 size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset);
95 };
96 /// To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers
97 unsigned int fBlockBufferIdx;
98 /// An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in the file
100 /// Memory block containing the block buffers consecutively
101 unsigned char *fBufferSpace;
102 /// The cached file size
103 std::uint64_t fFileSize;
104 /// Files are opened lazily and only when required; the open state is kept by this flag
106
107protected:
108 std::string fUrl;
110 /// The current position in the file, which can be changed by Seek, Read, and Readln
111 std::uint64_t fFilePos;
112
113 /**
114 * OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize. If fOptions.fBlocksize
115 * is negative, derived classes are responsible to set a sensible value. After a call to OpenImpl(),
116 * fOptions.fBlocksize must be larger or equal to zero.
117 */
118 virtual void OpenImpl() = 0;
119 /**
120 * Derived classes should implement low-level reading without buffering. Short reads indicate the end of the file,
121 * therefore derived classes should return nbytes bytes if available.
122 */
123 virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset) = 0;
124 /// Derived classes should return the file size or kUnknownFileSize
125 virtual std::uint64_t GetSizeImpl() = 0;
126
127 /// If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented, too
128 /// The default implementation throws an error
129 virtual void *MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset);
130 /// Derived classes with mmap support must be able to unmap the memory area handed out by Map()
131 virtual void UnmapImpl(void *region, size_t nbytes);
132
133 /// By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX implementations
134 virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq);
135
136public:
137 RRawFile(std::string_view url, ROptions options);
138 RRawFile(const RRawFile &) = delete;
139 RRawFile &operator=(const RRawFile &) = delete;
140 virtual ~RRawFile();
141
142 /// Create a new RawFile that accesses the same resource. The file pointer is reset to zero.
143 virtual std::unique_ptr<RRawFile> Clone() const = 0;
144
145 /// Factory method that returns a suitable concrete implementation according to the transport in the url
146 static std::unique_ptr<RRawFile> Create(std::string_view url, ROptions options = ROptions());
147 /// Returns only the file location, e.g. "server/file" for http://server/file
148 static std::string GetLocation(std::string_view url);
149 /// Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file
150 static std::string GetTransport(std::string_view url);
151
152 /**
153 * Buffered read from a random position. Returns the actual number of bytes read.
154 * Short reads indicate the end of the file
155 */
156 size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset);
157 /// Read from fFilePos offset. Returns the actual number of bytes read.
158 size_t Read(void *buffer, size_t nbytes);
159 /// Change the cursor fFilePos
160 void Seek(std::uint64_t offset);
161 /// Returns the size of the file
162 std::uint64_t GetSize();
163
164 /// Opens the file if necessary and calls ReadVImpl
165 void ReadV(RIOVec *ioVec, unsigned int nReq);
166
167 /// Memory mapping according to POSIX standard; in particular, new mappings of the same range replace older ones.
168 /// Mappings need to be aligned at page boundaries, therefore the real offset can be smaller than the desired value.
169 /// Users become owner of the address returned by Map() and are responsible for calling Unmap() with the full length.
170 void *Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset);
171 /// Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping
172 void Unmap(void *region, size_t nbytes);
173
174 /// Derived classes shall inform the user about the supported functionality, which can possibly depend
175 /// on the file at hand
176 virtual int GetFeatures() const = 0;
177
178 /// Read the next line starting from the current value of fFilePos. Returns false if the end of the file is reached.
179 bool Readln(std::string &line);
180}; // class RRawFile
181
182} // namespace Internal
183} // namespace ROOT
184
185#endif
The RRawFile provides read-only access to local and remote files.
Definition: RRawFile.hxx:40
virtual std::uint64_t GetSizeImpl()=0
Derived classes should return the file size or kUnknownFileSize.
unsigned char * fBufferSpace
Memory block containing the block buffers consecutively.
Definition: RRawFile.hxx:101
static std::string GetLocation(std::string_view url)
Returns only the file location, e.g. "server/file" for http://server/file.
Definition: RRawFile.cxx:113
unsigned int fBlockBufferIdx
To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers.
Definition: RRawFile.hxx:97
RRawFile(std::string_view url, ROptions options)
Definition: RRawFile.cxx:61
RRawFile & operator=(const RRawFile &)=delete
std::uint64_t fFilePos
The current position in the file, which can be changed by Seek, Read, and Readln.
Definition: RRawFile.hxx:111
virtual void * MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented,...
Definition: RRawFile.cxx:95
static constexpr int kFeatureHasMmap
Map() and Unmap() are implemented.
Definition: RRawFile.hxx:51
void Unmap(void *region, size_t nbytes)
Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping.
Definition: RRawFile.cxx:249
virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq)
By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX impleme...
Definition: RRawFile.cxx:101
static std::string GetTransport(std::string_view url)
Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file.
Definition: RRawFile.cxx:132
static constexpr unsigned int kNumBlockBuffers
Don't change without adapting ReadAt()
Definition: RRawFile.hxx:79
std::uint64_t GetSize()
Returns the size of the file.
Definition: RRawFile.cxx:121
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url.
Definition: RRawFile.cxx:73
void Seek(std::uint64_t offset)
Change the cursor fFilePos.
Definition: RRawFile.cxx:244
static constexpr std::uint64_t kUnknownFileSize
Derived classes do not necessarily need to provide file size information but they can return "not kno...
Definition: RRawFile.hxx:43
ELineBreaks
kAuto detects the line break from the first line, kSystem picks the system's default
Definition: RRawFile.hxx:45
size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset)
Buffered read from a random position.
Definition: RRawFile.cxx:157
bool fIsOpen
Files are opened lazily and only when required; the open state is kept by this flag.
Definition: RRawFile.hxx:105
virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset)=0
Derived classes should implement low-level reading without buffering.
bool Readln(std::string &line)
Read the next line starting from the current value of fFilePos. Returns false if the end of the file ...
Definition: RRawFile.cxx:211
RRawFile(const RRawFile &)=delete
virtual std::unique_ptr< RRawFile > Clone() const =0
Create a new RawFile that accesses the same resource. The file pointer is reset to zero.
void ReadV(RIOVec *ioVec, unsigned int nReq)
Opens the file if necessary and calls ReadVImpl.
Definition: RRawFile.cxx:203
static constexpr int kFeatureHasSize
GetSize() does not return kUnknownFileSize.
Definition: RRawFile.hxx:49
virtual void OpenImpl()=0
OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize.
size_t Read(void *buffer, size_t nbytes)
Read from fFilePos offset. Returns the actual number of bytes read.
Definition: RRawFile.cxx:150
std::uint64_t fFileSize
The cached file size.
Definition: RRawFile.hxx:103
RBlockBuffer fBlockBuffers[kNumBlockBuffers]
An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in ...
Definition: RRawFile.hxx:99
virtual void UnmapImpl(void *region, size_t nbytes)
Derived classes with mmap support must be able to unmap the memory area handed out by Map()
Definition: RRawFile.cxx:108
virtual int GetFeatures() const =0
Derived classes shall inform the user about the supported functionality, which can possibly depend on...
void * Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
Memory mapping according to POSIX standard; in particular, new mappings of the same range replace old...
Definition: RRawFile.cxx:142
TLine * line
basic_string_view< char > string_view
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition: StringConv.hxx:21
std::uint64_t fBufferOffset
Where in the open file does fBuffer start.
Definition: RRawFile.hxx:82
RBlockBuffer(const RBlockBuffer &)=delete
unsigned char * fBuffer
Points into the I/O buffer with data from the file, not owned.
Definition: RRawFile.hxx:86
size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset)
Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copie...
Definition: RRawFile.cxx:46
RBlockBuffer & operator=(const RBlockBuffer &)=delete
size_t fBufferSize
The number of currently buffered bytes in fBuffer.
Definition: RRawFile.hxx:84
Used for vector reads from multiple offsets into multiple buffers.
Definition: RRawFile.hxx:66
std::size_t fOutBytes
The number of actually read bytes, set by ReadV()
Definition: RRawFile.hxx:74
std::size_t fSize
The number of desired bytes.
Definition: RRawFile.hxx:72
void * fBuffer
The destination for reading.
Definition: RRawFile.hxx:68
std::uint64_t fOffset
The file offset.
Definition: RRawFile.hxx:70
On construction, an ROptions parameter can customize the RRawFile behavior.
Definition: RRawFile.hxx:54
int fBlockSize
Read at least fBlockSize bytes at a time.
Definition: RRawFile.hxx:60