Logo ROOT   master
Reference Guide
RRawFile.hxx
Go to the documentation of this file.
1 // @(#)root/io:$Id$
2 // Author: Jakob Blomer
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #ifndef ROOT_RRawFile
13 #define ROOT_RRawFile
14 
15 #include <ROOT/RStringView.hxx>
16 
17 #include <cstddef>
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 
22 namespace ROOT {
23 namespace Internal {
24 
25 /**
26  * \class RRawFile RRawFile.hxx
27  * \ingroup IO
28  *
29  * The RRawFile provides read-only access to local and remote files. Data can be read either byte-wise or line-wise.
30  * The RRawFile base class provides line-wise access and buffering for byte-wise access. Derived classes provide the
31  * low-level read operations, e.g. from a local file system or from a web server. The RRawFile is used for non-ROOT
32  * RDataSource implementations and for RNTuple.
33  *
34  * Files are addressed by URL consisting of a transport protocol part and a location, like file:///path/to/data
35  * If the transport protocol part and the :// separator are missing, the default protocol is local file. Files are
36  * opened when required (on reading, getting file size) and closed on object destruction.
37  *
38  * RRawFiles manage system respources and are therefore made non-copyable. They can be explicitly cloned though.
39  */
40 class RRawFile {
41 public:
42  /// Derived classes do not necessarily need to provide file size information but they can return "not known" instead
43  static constexpr std::uint64_t kUnknownFileSize = std::uint64_t(-1);
44  /// kAuto detects the line break from the first line, kSystem picks the system's default
45  enum class ELineBreaks { kAuto, kSystem, kUnix, kWindows };
46 
47  // Combination of flags provided by derived classes about the nature of the file
48  /// GetSize() does not return kUnknownFileSize
49  static constexpr int kFeatureHasSize = 0x01;
50  /// Map() and Unmap() are implemented
51  static constexpr int kFeatureHasMmap = 0x02;
52 
53  /// On construction, an ROptions parameter can customize the RRawFile behavior
54  struct ROptions {
56  /**
57  * Read at least fBlockSize bytes at a time. A value of zero turns off I/O buffering. A negative value indicates
58  * that the protocol-dependent default block size should be used.
59  */
62  };
63 
64  /// Used for vector reads from multiple offsets into multiple buffers. This is unlike readv(), which scatters a
65  /// single byte range from disk into multiple buffers.
66  struct RIOVec {
67  /// The destination for reading
68  void *fBuffer = nullptr;
69  /// The file offset
70  std::uint64_t fOffset = 0;
71  /// The number of desired bytes
72  std::size_t fSize = 0;
73  /// The number of actually read bytes, set by ReadV()
74  std::size_t fOutBytes = 0;
75  };
76 
77 private:
78  /// Don't change without adapting ReadAt()
79  static constexpr unsigned int kNumBlockBuffers = 2;
80  struct RBlockBuffer {
81  /// Where in the open file does fBuffer start
82  std::uint64_t fBufferOffset;
83  /// The number of currently buffered bytes in fBuffer
84  size_t fBufferSize;
85  /// Points into the I/O buffer with data from the file, not owned.
86  unsigned char *fBuffer;
87 
89  RBlockBuffer(const RBlockBuffer &) = delete;
90  RBlockBuffer &operator=(const RBlockBuffer &) = delete;
91  ~RBlockBuffer() = default;
92 
93  /// Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copied.
94  size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset);
95  };
96  /// To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers
97  unsigned int fBlockBufferIdx;
98  /// An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in the file
100  /// Memory block containing the block buffers consecutively
101  unsigned char *fBufferSpace;
102  /// The cached file size
103  std::uint64_t fFileSize;
104  /// Files are opened lazily and only when required; the open state is kept by this flag
105  bool fIsOpen;
106 
107 protected:
108  std::string fUrl;
110  /// The current position in the file, which can be changed by Seek, Read, and Readln
111  std::uint64_t fFilePos;
112 
113  /**
114  * OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize. If fOptions.fBlocksize
115  * is negative, derived classes are responsible to set a sensible value. After a call to OpenImpl(),
116  * fOptions.fBlocksize must be larger or equal to zero.
117  */
118  virtual void OpenImpl() = 0;
119  /**
120  * Derived classes should implement low-level reading without buffering. Short reads indicate the end of the file,
121  * therefore derived classes should return nbytes bytes if available.
122  */
123  virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset) = 0;
124  /// Derived classes should return the file size or kUnknownFileSize
125  virtual std::uint64_t GetSizeImpl() = 0;
126 
127  /// If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented, too
128  /// The default implementation throws an error
129  virtual void *MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset);
130  /// Derived classes with mmap support must be able to unmap the memory area handed out by Map()
131  virtual void UnmapImpl(void *region, size_t nbytes);
132 
133  /// By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX implementations
134  virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq);
135 
136 public:
137  RRawFile(std::string_view url, ROptions options);
138  RRawFile(const RRawFile &) = delete;
139  RRawFile &operator=(const RRawFile &) = delete;
140  virtual ~RRawFile();
141 
142  /// Create a new RawFile that accesses the same resource. The file pointer is reset to zero.
143  virtual std::unique_ptr<RRawFile> Clone() const = 0;
144 
145  /// Factory method that returns a suitable concrete implementation according to the transport in the url
146  static std::unique_ptr<RRawFile> Create(std::string_view url, ROptions options = ROptions());
147  /// Returns only the file location, e.g. "server/file" for http://server/file
148  static std::string GetLocation(std::string_view url);
149  /// Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file
150  static std::string GetTransport(std::string_view url);
151 
152  /**
153  * Buffered read from a random position. Returns the actual number of bytes read.
154  * Short reads indicate the end of the file
155  */
156  size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset);
157  /// Read from fFilePos offset. Returns the actual number of bytes read.
158  size_t Read(void *buffer, size_t nbytes);
159  /// Change the cursor fFilePos
160  void Seek(std::uint64_t offset);
161  /// Returns the size of the file
162  std::uint64_t GetSize();
163 
164  /// Opens the file if necessary and calls ReadVImpl
165  void ReadV(RIOVec *ioVec, unsigned int nReq);
166 
167  /// Memory mapping according to POSIX standard; in particular, new mappings of the same range replace older ones.
168  /// Mappings need to be aligned at page boundaries, therefore the real offset can be smaller than the desired value.
169  /// Users become owner of the address returned by Map() and are responsible for calling Unmap() with the full length.
170  void *Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset);
171  /// Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping
172  void Unmap(void *region, size_t nbytes);
173 
174  /// Derived classes shall inform the user about the supported functionality, which can possibly depend
175  /// on the file at hand
176  virtual int GetFeatures() const = 0;
177 
178  /// Read the next line starting from the current value of fFilePos. Returns false if the end of the file is reached.
179  bool Readln(std::string &line);
180 }; // class RRawFile
181 
182 } // namespace Internal
183 } // namespace ROOT
184 
185 #endif
std::uint64_t fFilePos
The current position in the file, which can be changed by Seek, Read, and Readln. ...
Definition: RRawFile.hxx:111
virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset)=0
Derived classes should implement low-level reading without buffering.
Returns the available number of logical cores.
Definition: RNumpyDS.hxx:30
TLine * line
static std::string GetTransport(std::string_view url)
Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file.
Definition: RRawFile.cxx:132
virtual std::unique_ptr< RRawFile > Clone() const =0
Create a new RawFile that accesses the same resource. The file pointer is reset to zero...
static constexpr int kFeatureHasSize
GetSize() does not return kUnknownFileSize.
Definition: RRawFile.hxx:49
size_t fBufferSize
The number of currently buffered bytes in fBuffer.
Definition: RRawFile.hxx:84
void * fBuffer
The destination for reading.
Definition: RRawFile.hxx:68
std::size_t fSize
The number of desired bytes.
Definition: RRawFile.hxx:72
std::uint64_t fFileSize
The cached file size.
Definition: RRawFile.hxx:103
static constexpr std::uint64_t kUnknownFileSize
Derived classes do not necessarily need to provide file size information but they can return "not kno...
Definition: RRawFile.hxx:43
RBlockBuffer & operator=(const RBlockBuffer &)=delete
RRawFile & operator=(const RRawFile &)=delete
std::size_t fOutBytes
The number of actually read bytes, set by ReadV()
Definition: RRawFile.hxx:74
virtual std::uint64_t GetSizeImpl()=0
Derived classes should return the file size or kUnknownFileSize.
size_t Read(void *buffer, size_t nbytes)
Read from fFilePos offset. Returns the actual number of bytes read.
Definition: RRawFile.cxx:150
ELineBreaks
kAuto detects the line break from the first line, kSystem picks the system&#39;s default ...
Definition: RRawFile.hxx:45
virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq)
By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX impleme...
Definition: RRawFile.cxx:101
bool fIsOpen
Files are opened lazily and only when required; the open state is kept by this flag.
Definition: RRawFile.hxx:105
std::uint64_t fOffset
The file offset.
Definition: RRawFile.hxx:70
std::uint64_t GetSize()
Returns the size of the file.
Definition: RRawFile.cxx:121
void Seek(std::uint64_t offset)
Change the cursor fFilePos.
Definition: RRawFile.cxx:244
Used for vector reads from multiple offsets into multiple buffers.
Definition: RRawFile.hxx:66
virtual int GetFeatures() const =0
Derived classes shall inform the user about the supported functionality, which can possibly depend on...
void Unmap(void *region, size_t nbytes)
Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping...
Definition: RRawFile.cxx:249
On construction, an ROptions parameter can customize the RRawFile behavior.
Definition: RRawFile.hxx:54
static constexpr unsigned int kNumBlockBuffers
Don&#39;t change without adapting ReadAt()
Definition: RRawFile.hxx:79
bool Readln(std::string &line)
Read the next line starting from the current value of fFilePos. Returns false if the end of the file ...
Definition: RRawFile.cxx:211
virtual void UnmapImpl(void *region, size_t nbytes)
Derived classes with mmap support must be able to unmap the memory area handed out by Map() ...
Definition: RRawFile.cxx:108
void * Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
Memory mapping according to POSIX standard; in particular, new mappings of the same range replace old...
Definition: RRawFile.cxx:142
size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset)
Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copie...
Definition: RRawFile.cxx:46
unsigned char * fBufferSpace
Memory block containing the block buffers consecutively.
Definition: RRawFile.hxx:101
size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset)
Buffered read from a random position.
Definition: RRawFile.cxx:157
RBlockBuffer fBlockBuffers[kNumBlockBuffers]
An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in ...
Definition: RRawFile.hxx:99
static std::string GetLocation(std::string_view url)
Returns only the file location, e.g. "server/file" for http://server/file.
Definition: RRawFile.cxx:113
virtual void OpenImpl()=0
OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize.
RRawFile(std::string_view url, ROptions options)
Definition: RRawFile.cxx:61
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url...
Definition: RRawFile.cxx:73
void ReadV(RIOVec *ioVec, unsigned int nReq)
Opens the file if necessary and calls ReadVImpl.
Definition: RRawFile.cxx:203
The RRawFile provides read-only access to local and remote files.
Definition: RRawFile.hxx:40
virtual void * MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented...
Definition: RRawFile.cxx:95
static constexpr int kFeatureHasMmap
Map() and Unmap() are implemented.
Definition: RRawFile.hxx:51
unsigned int fBlockBufferIdx
To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers.
Definition: RRawFile.hxx:97
int fBlockSize
Read at least fBlockSize bytes at a time.
Definition: RRawFile.hxx:60
unsigned char * fBuffer
Points into the I/O buffer with data from the file, not owned.
Definition: RRawFile.hxx:86
std::uint64_t fBufferOffset
Where in the open file does fBuffer start.
Definition: RRawFile.hxx:82