Logo ROOT  
Reference Guide
RRawFile.hxx
Go to the documentation of this file.
1// @(#)root/io:$Id$
2// Author: Jakob Blomer
3
4/*************************************************************************
5 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#ifndef ROOT_RRawFile
13#define ROOT_RRawFile
14
15#include <ROOT/RStringView.hxx>
16
17#include <cstddef>
18#include <cstdint>
19#include <memory>
20#include <string>
21
22namespace ROOT {
23namespace Internal {
24
25/**
26 * \class RRawFile RRawFile.hxx
27 * \ingroup IO
28 *
29 * The RRawFile provides read-only access to local and remote files. Data can be read either byte-wise or line-wise.
30 * The RRawFile base class provides line-wise access and buffering for byte-wise access. Derived classes provide the
31 * low-level read operations, e.g. from a local file system or from a web server. The RRawFile is used for non-ROOT
32 * RDataSource implementations and for RNTuple.
33 *
34 * Files are addressed by URL consisting of a transport protocol part and a location, like file:///path/to/data
35 * If the transport protocol part and the :// separator are missing, the default protocol is local file. Files are
36 * opened when required (on reading, getting file size) and closed on object destruction.
37 *
38 * RRawFiles manage system respources and are therefore made non-copyable. They can be explicitly cloned though.
39 */
40class RRawFile {
41public:
42 /// Derived classes do not necessarily need to provide file size information but they can return "not known" instead
43 static constexpr std::uint64_t kUnknownFileSize = std::uint64_t(-1);
44 /// kAuto detects the line break from the first line, kSystem picks the system's default
45 enum class ELineBreaks { kAuto, kSystem, kUnix, kWindows };
46
47 // Combination of flags provided by derived classes about the nature of the file
48 /// GetSize() does not return kUnknownFileSize
49 static constexpr int kFeatureHasSize = 0x01;
50 /// Map() and Unmap() are implemented
51 static constexpr int kFeatureHasMmap = 0x02;
52
53 /// On construction, an ROptions parameter can customize the RRawFile behavior
54 struct ROptions {
56 /**
57 * Read at least fBlockSize bytes at a time. A value of zero turns off I/O buffering. A negative value indicates
58 * that the protocol-dependent default block size should be used.
59 */
62 };
63
64private:
65 /// Don't change without adapting ReadAt()
66 static constexpr unsigned int kNumBlockBuffers = 2;
67 struct RBlockBuffer {
68 /// Where in the open file does fBuffer start
69 std::uint64_t fBufferOffset;
70 /// The number of currently buffered bytes in fBuffer
72 /// Points into the I/O buffer with data from the file, not owned.
73 unsigned char *fBuffer;
74
76 RBlockBuffer(const RBlockBuffer &) = delete;
78 ~RBlockBuffer() = default;
79
80 /// Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copied.
81 size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset);
82 };
83 /// To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers
84 unsigned int fBlockBufferIdx;
85 /// An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in the file
87 /// Memory block containing the block buffers consecutively
88 unsigned char *fBufferSpace;
89 /// The cached file size
90 std::uint64_t fFileSize;
91 /// Files are opened lazily and only when required; the open state is kept by this flag
92 bool fIsOpen;
93
94protected:
95 std::string fUrl;
97 /// The current position in the file, which can be changed by Seek, Read, and Readln
98 std::uint64_t fFilePos;
99
100 /**
101 * OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize. If fOptions.fBlocksize
102 * is negative, derived classes are responsible to set a sensible value. After a call to OpenImpl(),
103 * fOptions.fBlocksize must be larger or equal to zero.
104 */
105 virtual void OpenImpl() = 0;
106 /**
107 * Derived classes should implement low-level reading without buffering. Short reads indicate the end of the file,
108 * therefore derived classes should return nbytes bytes if available.
109 */
110 virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset) = 0;
111 /// Derived classes should return the file size or kUnknownFileSize
112 virtual std::uint64_t GetSizeImpl() = 0;
113
114 /// If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented, too
115 /// The default implementation throws an error
116 virtual void *MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset);
117 /// Derived classes with mmap support must be able to unmap the memory area handed out by Map()
118 virtual void UnmapImpl(void *region, size_t nbytes);
119
120public:
121 RRawFile(std::string_view url, ROptions options);
122 RRawFile(const RRawFile &) = delete;
123 RRawFile &operator=(const RRawFile &) = delete;
124 virtual ~RRawFile();
125
126 /// Create a new RawFile that accesses the same resource. The file pointer is reset to zero.
127 virtual std::unique_ptr<RRawFile> Clone() const = 0;
128
129 /// Factory method that returns a suitable concrete implementation according to the transport in the url
130 static std::unique_ptr<RRawFile> Create(std::string_view url, ROptions options = ROptions());
131 /// Returns only the file location, e.g. "server/file" for http://server/file
132 static std::string GetLocation(std::string_view url);
133 /// Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file
134 static std::string GetTransport(std::string_view url);
135
136 /**
137 * Buffered read from a random position. Returns the actual number of bytes read.
138 * Short reads indicate the end of the file
139 */
140 size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset);
141 /// Read from fFilePos offset. Returns the actual number of bytes read.
142 size_t Read(void *buffer, size_t nbytes);
143 /// Change the cursor fFilePos
144 void Seek(std::uint64_t offset);
145 /// Returns the size of the file
146 std::uint64_t GetSize();
147
148 /// Memory mapping according to POSIX standard; in particular, new mappings of the same range replace older ones.
149 /// Mappings need to be aligned at page boundaries, therefore the real offset can be smaller than the desired value.
150 /// Users become owner of the address returned by Map() and are responsible for calling Unmap() with the full length.
151 void *Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset);
152 /// Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping
153 void Unmap(void *region, size_t nbytes);
154
155 /// Derived classes shall inform the user about the supported functionality, which can possibly depend
156 /// on the file at hand
157 virtual int GetFeatures() const = 0;
158
159 /// Read the next line starting from the current value of fFilePos. Returns false if the end of the file is reached.
160 bool Readln(std::string &line);
161}; // class RRawFile
162
163} // namespace Internal
164} // namespace ROOT
165
166#endif
The RRawFile provides read-only access to local and remote files.
Definition: RRawFile.hxx:40
virtual std::uint64_t GetSizeImpl()=0
Derived classes should return the file size or kUnknownFileSize.
unsigned char * fBufferSpace
Memory block containing the block buffers consecutively.
Definition: RRawFile.hxx:88
static std::string GetLocation(std::string_view url)
Returns only the file location, e.g. "server/file" for http://server/file.
Definition: RRawFile.cxx:106
unsigned int fBlockBufferIdx
To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers.
Definition: RRawFile.hxx:84
RRawFile(std::string_view url, ROptions options)
Definition: RRawFile.cxx:61
RRawFile & operator=(const RRawFile &)=delete
std::uint64_t fFilePos
The current position in the file, which can be changed by Seek, Read, and Readln.
Definition: RRawFile.hxx:98
virtual void * MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented,...
Definition: RRawFile.cxx:95
static constexpr int kFeatureHasMmap
Map() and Unmap() are implemented.
Definition: RRawFile.hxx:51
void Unmap(void *region, size_t nbytes)
Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping.
Definition: RRawFile.cxx:234
static std::string GetTransport(std::string_view url)
Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file.
Definition: RRawFile.cxx:125
static constexpr unsigned int kNumBlockBuffers
Don't change without adapting ReadAt()
Definition: RRawFile.hxx:66
std::uint64_t GetSize()
Returns the size of the file.
Definition: RRawFile.cxx:114
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url.
Definition: RRawFile.cxx:73
void Seek(std::uint64_t offset)
Change the cursor fFilePos.
Definition: RRawFile.cxx:229
static constexpr std::uint64_t kUnknownFileSize
Derived classes do not necessarily need to provide file size information but they can return "not kno...
Definition: RRawFile.hxx:43
ELineBreaks
kAuto detects the line break from the first line, kSystem picks the system's default
Definition: RRawFile.hxx:45
size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset)
Buffered read from a random position.
Definition: RRawFile.cxx:150
bool fIsOpen
Files are opened lazily and only when required; the open state is kept by this flag.
Definition: RRawFile.hxx:92
virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset)=0
Derived classes should implement low-level reading without buffering.
bool Readln(std::string &line)
Read the next line starting from the current value of fFilePos. Returns false if the end of the file ...
Definition: RRawFile.cxx:196
RRawFile(const RRawFile &)=delete
virtual std::unique_ptr< RRawFile > Clone() const =0
Create a new RawFile that accesses the same resource. The file pointer is reset to zero.
static constexpr int kFeatureHasSize
GetSize() does not return kUnknownFileSize.
Definition: RRawFile.hxx:49
virtual void OpenImpl()=0
OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize.
size_t Read(void *buffer, size_t nbytes)
Read from fFilePos offset. Returns the actual number of bytes read.
Definition: RRawFile.cxx:143
std::uint64_t fFileSize
The cached file size.
Definition: RRawFile.hxx:90
RBlockBuffer fBlockBuffers[kNumBlockBuffers]
An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in ...
Definition: RRawFile.hxx:86
virtual void UnmapImpl(void *region, size_t nbytes)
Derived classes with mmap support must be able to unmap the memory area handed out by Map()
Definition: RRawFile.cxx:101
virtual int GetFeatures() const =0
Derived classes shall inform the user about the supported functionality, which can possibly depend on...
void * Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
Memory mapping according to POSIX standard; in particular, new mappings of the same range replace old...
Definition: RRawFile.cxx:135
TLine * line
basic_string_view< char > string_view
VSD Structures.
Definition: StringConv.hxx:21
std::uint64_t fBufferOffset
Where in the open file does fBuffer start.
Definition: RRawFile.hxx:69
RBlockBuffer(const RBlockBuffer &)=delete
unsigned char * fBuffer
Points into the I/O buffer with data from the file, not owned.
Definition: RRawFile.hxx:73
size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset)
Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copie...
Definition: RRawFile.cxx:46
RBlockBuffer & operator=(const RBlockBuffer &)=delete
size_t fBufferSize
The number of currently buffered bytes in fBuffer.
Definition: RRawFile.hxx:71
On construction, an ROptions parameter can customize the RRawFile behavior.
Definition: RRawFile.hxx:54
int fBlockSize
Read at least fBlockSize bytes at a time.
Definition: RRawFile.hxx:60