Logo ROOT  
Reference Guide
RRawFile.hxx
Go to the documentation of this file.
1 // @(#)root/io:$Id$
2 // Author: Jakob Blomer
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #ifndef ROOT_RRawFile
13 #define ROOT_RRawFile
14 
15 #include <ROOT/RStringView.hxx>
16 
17 #include <cstddef>
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 
22 namespace ROOT {
23 namespace Internal {
24 
25 /**
26  * \class RRawFile RRawFile.hxx
27  * \ingroup IO
28  *
29  * The RRawFile provides read-only access to local and remote files. Data can be read either byte-wise or line-wise.
30  * The RRawFile base class provides line-wise access and buffering for byte-wise access. Derived classes provide the
31  * low-level read operations, e.g. from a local file system or from a web server. The RRawFile is used for non-ROOT
32  * RDataSource implementations and for RNTuple.
33  *
34  * Files are addressed by URL consisting of a transport protocol part and a location, like file:///path/to/data
35  * If the transport protocol part and the :// separator are missing, the default protocol is local file. Files are
36  * opened when required (on reading, getting file size) and closed on object destruction.
37  *
38  * RRawFiles manage system resources and are therefore made non-copyable. They can be explicitly cloned though.
39  *
40  * RRawFile objects are conditionally thread safe. See the user manual for further details:
41  * https://root.cern/manual/thread_safety/
42  */
43 class RRawFile {
44 public:
45  /// Derived classes do not necessarily need to provide file size information but they can return "not known" instead
46  static constexpr std::uint64_t kUnknownFileSize = std::uint64_t(-1);
47  /// kAuto detects the line break from the first line, kSystem picks the system's default
48  enum class ELineBreaks { kAuto, kSystem, kUnix, kWindows };
49 
50  // Combination of flags provided by derived classes about the nature of the file
51  /// GetSize() does not return kUnknownFileSize
52  static constexpr int kFeatureHasSize = 0x01;
53  /// Map() and Unmap() are implemented
54  static constexpr int kFeatureHasMmap = 0x02;
55  /// File supports async IO
56  static constexpr int kFeatureHasAsyncIo = 0x04;
57 
58  /// On construction, an ROptions parameter can customize the RRawFile behavior
59  struct ROptions {
61  /**
62  * Read at least fBlockSize bytes at a time. A value of zero turns off I/O buffering. A negative value indicates
63  * that the protocol-dependent default block size should be used.
64  */
67  };
68 
69  /// Used for vector reads from multiple offsets into multiple buffers. This is unlike readv(), which scatters a
70  /// single byte range from disk into multiple buffers.
71  struct RIOVec {
72  /// The destination for reading
73  void *fBuffer = nullptr;
74  /// The file offset
75  std::uint64_t fOffset = 0;
76  /// The number of desired bytes
77  std::size_t fSize = 0;
78  /// The number of actually read bytes, set by ReadV()
79  std::size_t fOutBytes = 0;
80  };
81 
82 private:
83  /// Don't change without adapting ReadAt()
84  static constexpr unsigned int kNumBlockBuffers = 2;
85  struct RBlockBuffer {
86  /// Where in the open file does fBuffer start
87  std::uint64_t fBufferOffset;
88  /// The number of currently buffered bytes in fBuffer
89  size_t fBufferSize;
90  /// Points into the I/O buffer with data from the file, not owned.
91  unsigned char *fBuffer;
92 
94  RBlockBuffer(const RBlockBuffer &) = delete;
95  RBlockBuffer &operator=(const RBlockBuffer &) = delete;
96  ~RBlockBuffer() = default;
97 
98  /// Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copied.
99  size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset);
100  };
101  /// To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers
102  unsigned int fBlockBufferIdx;
103  /// An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in the file
105  /// Memory block containing the block buffers consecutively
106  unsigned char *fBufferSpace;
107  /// The cached file size
108  std::uint64_t fFileSize;
109  /// Files are opened lazily and only when required; the open state is kept by this flag
110  bool fIsOpen;
111 
112 protected:
113  std::string fUrl;
115  /// The current position in the file, which can be changed by Seek, Read, and Readln
116  std::uint64_t fFilePos;
117 
118  /**
119  * OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize. If fOptions.fBlocksize
120  * is negative, derived classes are responsible to set a sensible value. After a call to OpenImpl(),
121  * fOptions.fBlocksize must be larger or equal to zero.
122  */
123  virtual void OpenImpl() = 0;
124  /**
125  * Derived classes should implement low-level reading without buffering. Short reads indicate the end of the file,
126  * therefore derived classes should return nbytes bytes if available.
127  */
128  virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset) = 0;
129  /// Derived classes should return the file size or kUnknownFileSize
130  virtual std::uint64_t GetSizeImpl() = 0;
131 
132  /// If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented, too
133  /// The default implementation throws an error
134  virtual void *MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset);
135  /// Derived classes with mmap support must be able to unmap the memory area handed out by Map()
136  virtual void UnmapImpl(void *region, size_t nbytes);
137 
138  /// By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX implementations
139  virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq);
140 
141 public:
142  RRawFile(std::string_view url, ROptions options);
143  RRawFile(const RRawFile &) = delete;
144  RRawFile &operator=(const RRawFile &) = delete;
145  virtual ~RRawFile();
146 
147  /// Create a new RawFile that accesses the same resource. The file pointer is reset to zero.
148  virtual std::unique_ptr<RRawFile> Clone() const = 0;
149 
150  /// Factory method that returns a suitable concrete implementation according to the transport in the url
151  static std::unique_ptr<RRawFile> Create(std::string_view url, ROptions options = ROptions());
152  /// Returns only the file location, e.g. "server/file" for http://server/file
153  static std::string GetLocation(std::string_view url);
154  /// Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file
155  static std::string GetTransport(std::string_view url);
156 
157  /**
158  * Buffered read from a random position. Returns the actual number of bytes read.
159  * Short reads indicate the end of the file
160  */
161  size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset);
162  /// Read from fFilePos offset. Returns the actual number of bytes read.
163  size_t Read(void *buffer, size_t nbytes);
164  /// Change the cursor fFilePos
165  void Seek(std::uint64_t offset);
166  /// Returns the offset for the next Read/Readln call
167  std::uint64_t GetFilePos() const { return fFilePos; }
168  /// Returns the size of the file
169  std::uint64_t GetSize();
170  /// Returns the url of the file
171  std::string GetUrl() const;
172 
173  /// Opens the file if necessary and calls ReadVImpl
174  void ReadV(RIOVec *ioVec, unsigned int nReq);
175 
176  /// Memory mapping according to POSIX standard; in particular, new mappings of the same range replace older ones.
177  /// Mappings need to be aligned at page boundaries, therefore the real offset can be smaller than the desired value.
178  /// Users become owner of the address returned by Map() and are responsible for calling Unmap() with the full length.
179  void *Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset);
180  /// Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping
181  void Unmap(void *region, size_t nbytes);
182 
183  /// Derived classes shall inform the user about the supported functionality, which can possibly depend
184  /// on the file at hand
185  virtual int GetFeatures() const = 0;
186 
187  /// Read the next line starting from the current value of fFilePos. Returns false if the end of the file is reached.
188  bool Readln(std::string &line);
189 }; // class RRawFile
190 
191 } // namespace Internal
192 } // namespace ROOT
193 
194 #endif
ROOT::Internal::RRawFile::ELineBreaks::kAuto
@ kAuto
ROOT::Internal::RRawFile::ROptions::fBlockSize
int fBlockSize
Read at least fBlockSize bytes at a time.
Definition: RRawFile.hxx:65
ROOT::Internal::RRawFile::RBlockBuffer::CopyTo
size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset)
Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copie...
Definition: RRawFile.cxx:46
ROOT::Internal::RRawFile::fBufferSpace
unsigned char * fBufferSpace
Memory block containing the block buffers consecutively.
Definition: RRawFile.hxx:106
ROOT::Internal::RRawFile::ReadVImpl
virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq)
By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX impleme...
Definition: RRawFile.cxx:101
ROOT::Internal::RRawFile::GetFeatures
virtual int GetFeatures() const =0
Derived classes shall inform the user about the supported functionality, which can possibly depend on...
ROOT::Internal::RRawFile::~RRawFile
virtual ~RRawFile()
Definition: RRawFile.cxx:67
ROOT::Internal::RRawFile::Create
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url.
Definition: RRawFile.cxx:73
ROOT::Internal::RRawFile::fBlockBuffers
RBlockBuffer fBlockBuffers[kNumBlockBuffers]
An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in ...
Definition: RRawFile.hxx:104
ROOT::Internal::RRawFile::ReadV
void ReadV(RIOVec *ioVec, unsigned int nReq)
Opens the file if necessary and calls ReadVImpl.
Definition: RRawFile.cxx:207
ROOT::Internal::RRawFile::operator=
RRawFile & operator=(const RRawFile &)=delete
ROOT::Internal::RRawFile::GetSize
std::uint64_t GetSize()
Returns the size of the file.
Definition: RRawFile.cxx:121
ROOT::Internal::RRawFile::Seek
void Seek(std::uint64_t offset)
Change the cursor fFilePos.
Definition: RRawFile.cxx:248
string_view
basic_string_view< char > string_view
Definition: libcpp_string_view.h:785
ROOT::Internal::RRawFile::RRawFile
RRawFile(const RRawFile &)=delete
ROOT::Internal::RRawFile::OpenImpl
virtual void OpenImpl()=0
OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize.
ROOT::Internal::RRawFile::UnmapImpl
virtual void UnmapImpl(void *region, size_t nbytes)
Derived classes with mmap support must be able to unmap the memory area handed out by Map()
Definition: RRawFile.cxx:108
ROOT::Internal::RRawFile::fBlockBufferIdx
unsigned int fBlockBufferIdx
To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers.
Definition: RRawFile.hxx:102
ROOT::Internal::RRawFile
The RRawFile provides read-only access to local and remote files.
Definition: RRawFile.hxx:43
ROOT::Internal::RRawFile::kNumBlockBuffers
static constexpr unsigned int kNumBlockBuffers
Don't change without adapting ReadAt()
Definition: RRawFile.hxx:84
ROOT::Internal::RRawFile::GetFilePos
std::uint64_t GetFilePos() const
Returns the offset for the next Read/Readln call.
Definition: RRawFile.hxx:167
ROOT::Internal::RRawFile::RBlockBuffer::operator=
RBlockBuffer & operator=(const RBlockBuffer &)=delete
ROOT::Internal::RRawFile::fFilePos
std::uint64_t fFilePos
The current position in the file, which can be changed by Seek, Read, and Readln.
Definition: RRawFile.hxx:116
ROOT::Internal::RRawFile::ELineBreaks::kWindows
@ kWindows
ROOT::Internal::RRawFile::RIOVec::fOffset
std::uint64_t fOffset
The file offset.
Definition: RRawFile.hxx:75
ROOT::Internal::RRawFile::ELineBreaks::kSystem
@ kSystem
ROOT::Internal::RRawFile::GetLocation
static std::string GetLocation(std::string_view url)
Returns only the file location, e.g. "server/file" for http://server/file.
Definition: RRawFile.cxx:113
ROOT::Internal::RRawFile::MapImpl
virtual void * MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented,...
Definition: RRawFile.cxx:95
ROOT::Internal::RRawFile::Readln
bool Readln(std::string &line)
Read the next line starting from the current value of fFilePos. Returns false if the end of the file ...
Definition: RRawFile.cxx:215
ROOT::Internal::RRawFile::kFeatureHasAsyncIo
static constexpr int kFeatureHasAsyncIo
File supports async IO.
Definition: RRawFile.hxx:56
ROOT::Internal::RRawFile::kFeatureHasMmap
static constexpr int kFeatureHasMmap
Map() and Unmap() are implemented.
Definition: RRawFile.hxx:54
ROOT::Internal::RRawFile::RIOVec
Used for vector reads from multiple offsets into multiple buffers.
Definition: RRawFile.hxx:71
ROOT::Internal::RRawFile::fUrl
std::string fUrl
Definition: RRawFile.hxx:113
ROOT::Internal::RRawFile::kUnknownFileSize
static constexpr std::uint64_t kUnknownFileSize
Derived classes do not necessarily need to provide file size information but they can return "not kno...
Definition: RRawFile.hxx:46
ROOT::Internal::RRawFile::RIOVec::fOutBytes
std::size_t fOutBytes
The number of actually read bytes, set by ReadV()
Definition: RRawFile.hxx:79
ROOT::Internal::RRawFile::fFileSize
std::uint64_t fFileSize
The cached file size.
Definition: RRawFile.hxx:108
ROOT::Internal::RRawFile::RBlockBuffer
Definition: RRawFile.hxx:85
ROOT::Internal::RRawFile::GetUrl
std::string GetUrl() const
Returns the url of the file.
Definition: RRawFile.cxx:132
RStringView.hxx
ROOT::Internal::RRawFile::fOptions
ROptions fOptions
Definition: RRawFile.hxx:114
ROOT::Internal::RRawFile::RBlockBuffer::fBufferOffset
std::uint64_t fBufferOffset
Where in the open file does fBuffer start.
Definition: RRawFile.hxx:87
ROOT::Internal::RRawFile::ROptions::ROptions
ROptions()
Definition: RRawFile.hxx:66
ROOT::Internal::RRawFile::ROptions::fLineBreak
ELineBreaks fLineBreak
Definition: RRawFile.hxx:60
ROOT::Internal::RRawFile::RRawFile
RRawFile(std::string_view url, ROptions options)
Definition: RRawFile.cxx:61
ROOT::Internal::RRawFile::RBlockBuffer::fBufferSize
size_t fBufferSize
The number of currently buffered bytes in fBuffer.
Definition: RRawFile.hxx:89
line
TLine * line
Definition: entrylistblock_figure1.C:235
ROOT::Internal::RRawFile::fIsOpen
bool fIsOpen
Files are opened lazily and only when required; the open state is kept by this flag.
Definition: RRawFile.hxx:110
ROOT::Internal::RRawFile::RBlockBuffer::fBuffer
unsigned char * fBuffer
Points into the I/O buffer with data from the file, not owned.
Definition: RRawFile.hxx:91
ROOT::Internal::RRawFile::ReadAtImpl
virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset)=0
Derived classes should implement low-level reading without buffering.
ROOT::Internal::RRawFile::RBlockBuffer::~RBlockBuffer
~RBlockBuffer()=default
ROOT::Internal::RRawFile::ELineBreaks
ELineBreaks
kAuto detects the line break from the first line, kSystem picks the system's default
Definition: RRawFile.hxx:48
ROOT::Internal::RRawFile::GetTransport
static std::string GetTransport(std::string_view url)
Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file.
Definition: RRawFile.cxx:136
ROOT::Internal::RRawFile::Clone
virtual std::unique_ptr< RRawFile > Clone() const =0
Create a new RawFile that accesses the same resource. The file pointer is reset to zero.
ROOT::Internal::RRawFile::ROptions
On construction, an ROptions parameter can customize the RRawFile behavior.
Definition: RRawFile.hxx:59
ROOT::Internal::RRawFile::RBlockBuffer::RBlockBuffer
RBlockBuffer()
Definition: RRawFile.hxx:93
ROOT::Internal::RRawFile::ELineBreaks::kUnix
@ kUnix
ROOT::Internal::RRawFile::Read
size_t Read(void *buffer, size_t nbytes)
Read from fFilePos offset. Returns the actual number of bytes read.
Definition: RRawFile.cxx:154
ROOT::Internal::RRawFile::Unmap
void Unmap(void *region, size_t nbytes)
Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping.
Definition: RRawFile.cxx:253
ROOT::Internal::RRawFile::RIOVec::fSize
std::size_t fSize
The number of desired bytes.
Definition: RRawFile.hxx:77
ROOT::Internal::RRawFile::GetSizeImpl
virtual std::uint64_t GetSizeImpl()=0
Derived classes should return the file size or kUnknownFileSize.
ROOT::Internal::RRawFile::kFeatureHasSize
static constexpr int kFeatureHasSize
GetSize() does not return kUnknownFileSize.
Definition: RRawFile.hxx:52
ROOT::Internal::RRawFile::RBlockBuffer::RBlockBuffer
RBlockBuffer(const RBlockBuffer &)=delete
ROOT
VSD Structures.
Definition: StringConv.hxx:21
ROOT::Internal::RRawFile::ReadAt
size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset)
Buffered read from a random position.
Definition: RRawFile.cxx:161
ROOT::Internal::RRawFile::Map
void * Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
Memory mapping according to POSIX standard; in particular, new mappings of the same range replace old...
Definition: RRawFile.cxx:146
ROOT::Internal::RRawFile::RIOVec::fBuffer
void * fBuffer
The destination for reading.
Definition: RRawFile.hxx:73