Logo ROOT   master
Reference Guide
RRawFile.cxx
Go to the documentation of this file.
1 // @(#)root/io:$Id$
2 // Author: Jakob Blomer
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #include <ROOT/RConfig.h>
13 #include <ROOT/RRawFile.hxx>
14 #ifdef _WIN32
15 #include <ROOT/RRawFileWin.hxx>
16 #else
17 #include <ROOT/RRawFileUnix.hxx>
18 #endif
19 
20 #include "TError.h"
21 #include "TPluginManager.h"
22 #include "TROOT.h"
23 
24 #include <algorithm>
25 #include <cctype> // for towlower
26 #include <cerrno>
27 #include <cstddef>
28 #include <cstdint>
29 #include <cstring>
30 #include <stdexcept>
31 #include <string>
32 
33 namespace {
34 const char *kTransportSeparator = "://";
35 // Corresponds to ELineBreaks
36 #ifdef _WIN32
37 const char *kLineBreakTokens[] = {"", "\r\n", "\n", "\r\n"};
38 constexpr unsigned int kLineBreakTokenSizes[] = {0, 2, 1, 2};
39 #else
40 const char *kLineBreakTokens[] = {"", "\n", "\n", "\r\n"};
41 constexpr unsigned int kLineBreakTokenSizes[] = {0, 1, 1, 2};
42 #endif
43 constexpr unsigned int kLineBuffer = 128; // On Readln, look for line-breaks in chunks of 128 bytes
44 } // anonymous namespace
45 
46 size_t ROOT::Internal::RRawFile::RBlockBuffer::CopyTo(void *buffer, size_t nbytes, std::uint64_t offset)
47 {
48  if (offset < fBufferOffset)
49  return 0;
50 
51  size_t copiedBytes = 0;
52  std::uint64_t offsetInBuffer = offset - fBufferOffset;
53  if (offsetInBuffer < static_cast<std::uint64_t>(fBufferSize)) {
54  size_t bytesInBuffer = std::min(nbytes, static_cast<size_t>(fBufferSize - offsetInBuffer));
55  memcpy(buffer, fBuffer + offsetInBuffer, bytesInBuffer);
56  copiedBytes = bytesInBuffer;
57  }
58  return copiedBytes;
59 }
60 
61 ROOT::Internal::RRawFile::RRawFile(std::string_view url, ROptions options)
62  : fBlockBufferIdx(0), fBufferSpace(nullptr), fFileSize(kUnknownFileSize), fIsOpen(false), fUrl(url),
63  fOptions(options), fFilePos(0)
64 {
65 }
66 
68 {
69  delete[] fBufferSpace;
70 }
71 
72 std::unique_ptr<ROOT::Internal::RRawFile>
73 ROOT::Internal::RRawFile::Create(std::string_view url, ROptions options)
74 {
75  std::string transport = GetTransport(url);
76  if (transport == "file") {
77 #ifdef _WIN32
78  return std::unique_ptr<RRawFile>(new RRawFileWin(url, options));
79 #else
80  return std::unique_ptr<RRawFile>(new RRawFileUnix(url, options));
81 #endif
82  }
83  if (transport == "http" || transport == "https") {
84  if (TPluginHandler *h = gROOT->GetPluginManager()->FindHandler("ROOT::Internal::RRawFile")) {
85  if (h->LoadPlugin() == 0) {
86  return std::unique_ptr<RRawFile>(reinterpret_cast<RRawFile *>(h->ExecPlugin(2, &url, &options)));
87  }
88  throw std::runtime_error("Cannot load plugin handler for RRawFileDavix");
89  }
90  throw std::runtime_error("Cannot find plugin handler for RRawFileDavix");
91  }
92  throw std::runtime_error("Unsupported transport protocol: " + transport);
93 }
94 
95 void *ROOT::Internal::RRawFile::MapImpl(size_t /* nbytes */, std::uint64_t /* offset */,
96  std::uint64_t& /* mapdOffset */)
97 {
98  throw std::runtime_error("Memory mapping unsupported");
99 }
100 
101 void ROOT::Internal::RRawFile::ReadVImpl(RIOVec *ioVec, unsigned int nReq)
102 {
103  for (unsigned i = 0; i < nReq; ++i) {
104  ioVec[i].fOutBytes = ReadAt(ioVec[i].fBuffer, ioVec[i].fSize, ioVec[i].fOffset);
105  }
106 }
107 
108 void ROOT::Internal::RRawFile::UnmapImpl(void * /* region */, size_t /* nbytes */)
109 {
110  throw std::runtime_error("Memory mapping unsupported");
111 }
112 
113 std::string ROOT::Internal::RRawFile::GetLocation(std::string_view url)
114 {
115  auto idx = url.find(kTransportSeparator);
116  if (idx == std::string_view::npos)
117  return std::string(url);
118  return std::string(url.substr(idx + strlen(kTransportSeparator)));
119 }
120 
122 {
123  if (!fIsOpen)
124  OpenImpl();
125  fIsOpen = true;
126 
127  if (fFileSize == kUnknownFileSize)
128  fFileSize = GetSizeImpl();
129  return fFileSize;
130 }
131 
132 std::string ROOT::Internal::RRawFile::GetTransport(std::string_view url)
133 {
134  auto idx = url.find(kTransportSeparator);
135  if (idx == std::string_view::npos)
136  return "file";
137  std::string transport(url.substr(0, idx));
138  std::transform(transport.begin(), transport.end(), transport.begin(), ::tolower);
139  return transport;
140 }
141 
142 void *ROOT::Internal::RRawFile::Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
143 {
144  if (!fIsOpen)
145  OpenImpl();
146  fIsOpen = true;
147  return MapImpl(nbytes, offset, mapdOffset);
148 }
149 
150 size_t ROOT::Internal::RRawFile::Read(void *buffer, size_t nbytes)
151 {
152  size_t res = ReadAt(buffer, nbytes, fFilePos);
153  fFilePos += res;
154  return res;
155 }
156 
157 size_t ROOT::Internal::RRawFile::ReadAt(void *buffer, size_t nbytes, std::uint64_t offset)
158 {
159  if (!fIsOpen)
160  OpenImpl();
161  R__ASSERT(fOptions.fBlockSize >= 0);
162  fIsOpen = true;
163 
164  // "Large" reads are served directly, bypassing the cache
165  if (nbytes > static_cast<unsigned int>(fOptions.fBlockSize))
166  return ReadAtImpl(buffer, nbytes, offset);
167 
168  if (fBufferSpace == nullptr) {
169  fBufferSpace = new unsigned char[kNumBlockBuffers * fOptions.fBlockSize];
170  for (unsigned int i = 0; i < kNumBlockBuffers; ++i)
171  fBlockBuffers[i].fBuffer = fBufferSpace + i * fOptions.fBlockSize;
172  }
173 
174  size_t totalBytes = 0;
175  size_t copiedBytes = 0;
176  /// Try to serve as many bytes as possible from the block buffers
177  for (unsigned int idx = fBlockBufferIdx; idx < fBlockBufferIdx + kNumBlockBuffers; ++idx) {
178  copiedBytes = fBlockBuffers[idx % kNumBlockBuffers].CopyTo(buffer, nbytes, offset);
179  buffer = reinterpret_cast<unsigned char *>(buffer) + copiedBytes;
180  nbytes -= copiedBytes;
181  offset += copiedBytes;
182  totalBytes += copiedBytes;
183  if (copiedBytes > 0)
184  fBlockBufferIdx = idx;
185  if (nbytes == 0)
186  return totalBytes;
187  }
188  fBlockBufferIdx++;
189 
190  /// The request was not fully satisfied and fBlockBufferIdx now points to the previous shadow buffer
191 
192  /// The remaining bytes populate the newly promoted main buffer
193  RBlockBuffer *thisBuffer = &fBlockBuffers[fBlockBufferIdx % kNumBlockBuffers];
194  size_t res = ReadAtImpl(thisBuffer->fBuffer, fOptions.fBlockSize, offset);
195  thisBuffer->fBufferOffset = offset;
196  thisBuffer->fBufferSize = res;
197  size_t remainingBytes = std::min(res, nbytes);
198  memcpy(buffer, thisBuffer->fBuffer, remainingBytes);
199  totalBytes += remainingBytes;
200  return totalBytes;
201 }
202 
203 void ROOT::Internal::RRawFile::ReadV(RIOVec *ioVec, unsigned int nReq)
204 {
205  if (!fIsOpen)
206  OpenImpl();
207  fIsOpen = true;
208  ReadVImpl(ioVec, nReq);
209 }
210 
212 {
213  if (fOptions.fLineBreak == ELineBreaks::kAuto) {
214  // Auto-detect line breaks according to the break discovered in the first line
215  fOptions.fLineBreak = ELineBreaks::kUnix;
216  bool res = Readln(line);
217  if ((line.length() > 0) && (*line.rbegin() == '\r')) {
218  fOptions.fLineBreak = ELineBreaks::kWindows;
219  line.resize(line.length() - 1);
220  }
221  return res;
222  }
223 
224  line.clear();
225  char buffer[kLineBuffer];
226  size_t nbytes;
227  do {
228  nbytes = Read(buffer, sizeof(buffer));
229  std::string_view bufferView(buffer, nbytes);
230  auto idx = bufferView.find(kLineBreakTokens[static_cast<int>(fOptions.fLineBreak)]);
231  if (idx != std::string_view::npos) {
232  // Line break found, return the string and skip the linebreak itself
233  line.append(buffer, idx);
234  fFilePos -= nbytes - idx;
235  fFilePos += kLineBreakTokenSizes[static_cast<int>(fOptions.fLineBreak)];
236  return true;
237  }
238  line.append(buffer, nbytes);
239  } while (nbytes > 0);
240 
241  return !line.empty();
242 }
243 
244 void ROOT::Internal::RRawFile::Seek(std::uint64_t offset)
245 {
246  fFilePos = offset;
247 }
248 
249 void ROOT::Internal::RRawFile::Unmap(void *region, size_t nbytes)
250 {
251  if (!fIsOpen)
252  throw std::runtime_error("Cannot unmap, file not open");
253  UnmapImpl(region, nbytes);
254 }
auto MapImpl(F &&f, const RVec< T > &... vs) -> RVec< decltype(f(vs[0]...))>
Definition: RVec.hxx:82
The RRawFileUnix class uses POSIX calls to read from a mounted file system.
std::uint64_t fFilePos
The current position in the file, which can be changed by Seek, Read, and Readln. ...
Definition: RRawFile.hxx:111
TLine * line
static std::string GetTransport(std::string_view url)
Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file.
Definition: RRawFile.cxx:132
size_t fBufferSize
The number of currently buffered bytes in fBuffer.
Definition: RRawFile.hxx:84
#define R__ASSERT(e)
Definition: TError.h:96
#define gROOT
Definition: TROOT.h:405
std::uint64_t fFileSize
The cached file size.
Definition: RRawFile.hxx:103
static constexpr std::uint64_t kUnknownFileSize
Derived classes do not necessarily need to provide file size information but they can return "not kno...
Definition: RRawFile.hxx:43
std::size_t fOutBytes
The number of actually read bytes, set by ReadV()
Definition: RRawFile.hxx:74
size_t Read(void *buffer, size_t nbytes)
Read from fFilePos offset. Returns the actual number of bytes read.
Definition: RRawFile.cxx:150
virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq)
By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX impleme...
Definition: RRawFile.cxx:101
bool fIsOpen
Files are opened lazily and only when required; the open state is kept by this flag.
Definition: RRawFile.hxx:105
std::uint64_t GetSize()
Returns the size of the file.
Definition: RRawFile.cxx:121
void Seek(std::uint64_t offset)
Change the cursor fFilePos.
Definition: RRawFile.cxx:244
Used for vector reads from multiple offsets into multiple buffers.
Definition: RRawFile.hxx:66
void Unmap(void *region, size_t nbytes)
Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping...
Definition: RRawFile.cxx:249
On construction, an ROptions parameter can customize the RRawFile behavior.
Definition: RRawFile.hxx:54
bool Readln(std::string &line)
Read the next line starting from the current value of fFilePos. Returns false if the end of the file ...
Definition: RRawFile.cxx:211
The RRawFileWin class uses portable C I/O calls to read from a drive.
Definition: RRawFileWin.hxx:32
virtual void UnmapImpl(void *region, size_t nbytes)
Derived classes with mmap support must be able to unmap the memory area handed out by Map() ...
Definition: RRawFile.cxx:108
void * Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
Memory mapping according to POSIX standard; in particular, new mappings of the same range replace old...
Definition: RRawFile.cxx:142
#define h(i)
Definition: RSha256.hxx:106
size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset)
Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copie...
Definition: RRawFile.cxx:46
unsigned char * fBufferSpace
Memory block containing the block buffers consecutively.
Definition: RRawFile.hxx:101
size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset)
Buffered read from a random position.
Definition: RRawFile.cxx:157
static std::string GetLocation(std::string_view url)
Returns only the file location, e.g. "server/file" for http://server/file.
Definition: RRawFile.cxx:113
RRawFile(std::string_view url, ROptions options)
Definition: RRawFile.cxx:61
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url...
Definition: RRawFile.cxx:73
void ReadV(RIOVec *ioVec, unsigned int nReq)
Opens the file if necessary and calls ReadVImpl.
Definition: RRawFile.cxx:203
The RRawFile provides read-only access to local and remote files.
Definition: RRawFile.hxx:40
virtual void * MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented...
Definition: RRawFile.cxx:95
unsigned int fBlockBufferIdx
To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers.
Definition: RRawFile.hxx:97
unsigned char * fBuffer
Points into the I/O buffer with data from the file, not owned.
Definition: RRawFile.hxx:86
std::uint64_t fBufferOffset
Where in the open file does fBuffer start.
Definition: RRawFile.hxx:82