Logo ROOT  
Reference Guide
RRawFile.cxx
Go to the documentation of this file.
1// @(#)root/io:$Id$
2// Author: Jakob Blomer
3
4/*************************************************************************
5 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#include <ROOT/RConfig.h>
13#include <ROOT/RRawFile.hxx>
14#ifdef _WIN32
15#include <ROOT/RRawFileWin.hxx>
16#else
17#include <ROOT/RRawFileUnix.hxx>
18#endif
19
20#include "TError.h"
21#include "TPluginManager.h"
22#include "TROOT.h"
23
24#include <algorithm>
25#include <cctype> // for towlower
26#include <cerrno>
27#include <cstddef>
28#include <cstdint>
29#include <cstring>
30#include <stdexcept>
31#include <string>
32
33namespace {
34const char *kTransportSeparator = "://";
35// Corresponds to ELineBreaks
36#ifdef _WIN32
37const char *kLineBreakTokens[] = {"", "\r\n", "\n", "\r\n"};
38constexpr unsigned int kLineBreakTokenSizes[] = {0, 2, 1, 2};
39#else
40const char *kLineBreakTokens[] = {"", "\n", "\n", "\r\n"};
41constexpr unsigned int kLineBreakTokenSizes[] = {0, 1, 1, 2};
42#endif
43constexpr unsigned int kLineBuffer = 128; // On Readln, look for line-breaks in chunks of 128 bytes
44} // anonymous namespace
45
46size_t ROOT::Internal::RRawFile::RBlockBuffer::CopyTo(void *buffer, size_t nbytes, std::uint64_t offset)
47{
48 if (offset < fBufferOffset)
49 return 0;
50
51 size_t copiedBytes = 0;
52 std::uint64_t offsetInBuffer = offset - fBufferOffset;
53 if (offsetInBuffer < static_cast<std::uint64_t>(fBufferSize)) {
54 size_t bytesInBuffer = std::min(nbytes, static_cast<size_t>(fBufferSize - offsetInBuffer));
55 memcpy(buffer, fBuffer + offsetInBuffer, bytesInBuffer);
56 copiedBytes = bytesInBuffer;
57 }
58 return copiedBytes;
59}
60
63 fOptions(options), fFilePos(0)
64{
65}
66
68{
69 delete[] fBufferSpace;
70}
71
72std::unique_ptr<ROOT::Internal::RRawFile>
74{
75 std::string transport = GetTransport(url);
76 if (transport == "file") {
77#ifdef _WIN32
78 return std::unique_ptr<RRawFile>(new RRawFileWin(url, options));
79#else
80 return std::unique_ptr<RRawFile>(new RRawFileUnix(url, options));
81#endif
82 }
83 if (transport == "http" || transport == "https") {
84 if (TPluginHandler *h = gROOT->GetPluginManager()->FindHandler("ROOT::Internal::RRawFile")) {
85 if (h->LoadPlugin() == 0) {
86 return std::unique_ptr<RRawFile>(reinterpret_cast<RRawFile *>(h->ExecPlugin(2, &url, &options)));
87 }
88 throw std::runtime_error("Cannot load plugin handler for RRawFileDavix");
89 }
90 throw std::runtime_error("Cannot find plugin handler for RRawFileDavix");
91 }
92 throw std::runtime_error("Unsupported transport protocol: " + transport);
93}
94
95void *ROOT::Internal::RRawFile::MapImpl(size_t /* nbytes */, std::uint64_t /* offset */,
96 std::uint64_t& /* mapdOffset */)
97{
98 throw std::runtime_error("Memory mapping unsupported");
99}
100
101void ROOT::Internal::RRawFile::ReadVImpl(RIOVec *ioVec, unsigned int nReq)
102{
103 for (unsigned i = 0; i < nReq; ++i) {
104 ioVec[i].fOutBytes = ReadAt(ioVec[i].fBuffer, ioVec[i].fSize, ioVec[i].fOffset);
105 }
106}
107
108void ROOT::Internal::RRawFile::UnmapImpl(void * /* region */, size_t /* nbytes */)
109{
110 throw std::runtime_error("Memory mapping unsupported");
111}
112
114{
115 auto idx = url.find(kTransportSeparator);
116 if (idx == std::string_view::npos)
117 return std::string(url);
118 return std::string(url.substr(idx + strlen(kTransportSeparator)));
119}
120
122{
123 if (!fIsOpen)
124 OpenImpl();
125 fIsOpen = true;
126
127 if (fFileSize == kUnknownFileSize)
128 fFileSize = GetSizeImpl();
129 return fFileSize;
130}
131
133 return fUrl;
134}
135
137{
138 auto idx = url.find(kTransportSeparator);
139 if (idx == std::string_view::npos)
140 return "file";
141 std::string transport(url.substr(0, idx));
142 std::transform(transport.begin(), transport.end(), transport.begin(), ::tolower);
143 return transport;
144}
145
146void *ROOT::Internal::RRawFile::Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
147{
148 if (!fIsOpen)
149 OpenImpl();
150 fIsOpen = true;
151 return MapImpl(nbytes, offset, mapdOffset);
152}
153
154size_t ROOT::Internal::RRawFile::Read(void *buffer, size_t nbytes)
155{
156 size_t res = ReadAt(buffer, nbytes, fFilePos);
157 fFilePos += res;
158 return res;
159}
160
161size_t ROOT::Internal::RRawFile::ReadAt(void *buffer, size_t nbytes, std::uint64_t offset)
162{
163 if (!fIsOpen)
164 OpenImpl();
165 R__ASSERT(fOptions.fBlockSize >= 0);
166 fIsOpen = true;
167
168 // "Large" reads are served directly, bypassing the cache
169 if (nbytes > static_cast<unsigned int>(fOptions.fBlockSize))
170 return ReadAtImpl(buffer, nbytes, offset);
171
172 if (fBufferSpace == nullptr) {
173 fBufferSpace = new unsigned char[kNumBlockBuffers * fOptions.fBlockSize];
174 for (unsigned int i = 0; i < kNumBlockBuffers; ++i)
175 fBlockBuffers[i].fBuffer = fBufferSpace + i * fOptions.fBlockSize;
176 }
177
178 size_t totalBytes = 0;
179 size_t copiedBytes = 0;
180 /// Try to serve as many bytes as possible from the block buffers
181 for (unsigned int idx = fBlockBufferIdx; idx < fBlockBufferIdx + kNumBlockBuffers; ++idx) {
182 copiedBytes = fBlockBuffers[idx % kNumBlockBuffers].CopyTo(buffer, nbytes, offset);
183 buffer = reinterpret_cast<unsigned char *>(buffer) + copiedBytes;
184 nbytes -= copiedBytes;
185 offset += copiedBytes;
186 totalBytes += copiedBytes;
187 if (copiedBytes > 0)
188 fBlockBufferIdx = idx;
189 if (nbytes == 0)
190 return totalBytes;
191 }
192 fBlockBufferIdx++;
193
194 /// The request was not fully satisfied and fBlockBufferIdx now points to the previous shadow buffer
195
196 /// The remaining bytes populate the newly promoted main buffer
197 RBlockBuffer *thisBuffer = &fBlockBuffers[fBlockBufferIdx % kNumBlockBuffers];
198 size_t res = ReadAtImpl(thisBuffer->fBuffer, fOptions.fBlockSize, offset);
199 thisBuffer->fBufferOffset = offset;
200 thisBuffer->fBufferSize = res;
201 size_t remainingBytes = std::min(res, nbytes);
202 memcpy(buffer, thisBuffer->fBuffer, remainingBytes);
203 totalBytes += remainingBytes;
204 return totalBytes;
205}
206
207void ROOT::Internal::RRawFile::ReadV(RIOVec *ioVec, unsigned int nReq)
208{
209 if (!fIsOpen)
210 OpenImpl();
211 fIsOpen = true;
212 ReadVImpl(ioVec, nReq);
213}
214
216{
217 if (fOptions.fLineBreak == ELineBreaks::kAuto) {
218 // Auto-detect line breaks according to the break discovered in the first line
219 fOptions.fLineBreak = ELineBreaks::kUnix;
220 bool res = Readln(line);
221 if ((line.length() > 0) && (*line.rbegin() == '\r')) {
222 fOptions.fLineBreak = ELineBreaks::kWindows;
223 line.resize(line.length() - 1);
224 }
225 return res;
226 }
227
228 line.clear();
229 char buffer[kLineBuffer];
230 size_t nbytes;
231 do {
232 nbytes = Read(buffer, sizeof(buffer));
233 std::string_view bufferView(buffer, nbytes);
234 auto idx = bufferView.find(kLineBreakTokens[static_cast<int>(fOptions.fLineBreak)]);
235 if (idx != std::string_view::npos) {
236 // Line break found, return the string and skip the linebreak itself
237 line.append(buffer, idx);
238 fFilePos -= nbytes - idx;
239 fFilePos += kLineBreakTokenSizes[static_cast<int>(fOptions.fLineBreak)];
240 return true;
241 }
242 line.append(buffer, nbytes);
243 } while (nbytes > 0);
244
245 return !line.empty();
246}
247
248void ROOT::Internal::RRawFile::Seek(std::uint64_t offset)
249{
250 fFilePos = offset;
251}
252
253void ROOT::Internal::RRawFile::Unmap(void *region, size_t nbytes)
254{
255 if (!fIsOpen)
256 throw std::runtime_error("Cannot unmap, file not open");
257 UnmapImpl(region, nbytes);
258}
size_t fSize
std::string fBuffer
std::string kAuto
Definition: RColor.cxx:37
#define h(i)
Definition: RSha256.hxx:106
#define R__ASSERT(e)
Definition: TError.h:118
#define gROOT
Definition: TROOT.h:404
The RRawFileUnix class uses POSIX calls to read from a mounted file system.
The RRawFileWin class uses portable C I/O calls to read from a drive.
Definition: RRawFileWin.hxx:32
The RRawFile provides read-only access to local and remote files.
Definition: RRawFile.hxx:43
unsigned char * fBufferSpace
Memory block containing the block buffers consecutively.
Definition: RRawFile.hxx:106
static std::string GetLocation(std::string_view url)
Returns only the file location, e.g. "server/file" for http://server/file.
Definition: RRawFile.cxx:113
unsigned int fBlockBufferIdx
To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers.
Definition: RRawFile.hxx:102
RRawFile(std::string_view url, ROptions options)
Definition: RRawFile.cxx:61
std::uint64_t fFilePos
The current position in the file, which can be changed by Seek, Read, and Readln.
Definition: RRawFile.hxx:116
virtual void * MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented,...
Definition: RRawFile.cxx:95
void Unmap(void *region, size_t nbytes)
Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping.
Definition: RRawFile.cxx:253
virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq)
By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX impleme...
Definition: RRawFile.cxx:101
static std::string GetTransport(std::string_view url)
Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file.
Definition: RRawFile.cxx:136
std::uint64_t GetSize()
Returns the size of the file.
Definition: RRawFile.cxx:121
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url.
Definition: RRawFile.cxx:73
void Seek(std::uint64_t offset)
Change the cursor fFilePos.
Definition: RRawFile.cxx:248
static constexpr std::uint64_t kUnknownFileSize
Derived classes do not necessarily need to provide file size information but they can return "not kno...
Definition: RRawFile.hxx:46
size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset)
Buffered read from a random position.
Definition: RRawFile.cxx:161
bool fIsOpen
Files are opened lazily and only when required; the open state is kept by this flag.
Definition: RRawFile.hxx:110
bool Readln(std::string &line)
Read the next line starting from the current value of fFilePos. Returns false if the end of the file ...
Definition: RRawFile.cxx:215
void ReadV(RIOVec *ioVec, unsigned int nReq)
Opens the file if necessary and calls ReadVImpl.
Definition: RRawFile.cxx:207
size_t Read(void *buffer, size_t nbytes)
Read from fFilePos offset. Returns the actual number of bytes read.
Definition: RRawFile.cxx:154
std::string GetUrl() const
Returns the url of the file.
Definition: RRawFile.cxx:132
std::uint64_t fFileSize
The cached file size.
Definition: RRawFile.hxx:108
virtual void UnmapImpl(void *region, size_t nbytes)
Derived classes with mmap support must be able to unmap the memory area handed out by Map()
Definition: RRawFile.cxx:108
void * Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset)
Memory mapping according to POSIX standard; in particular, new mappings of the same range replace old...
Definition: RRawFile.cxx:146
TLine * line
basic_string_view< char > string_view
auto MapImpl(F &&f, const RVec< T > &... vs) -> RVec< decltype(f(vs[0]...))>
Definition: RVec.hxx:78
std::uint64_t fBufferOffset
Where in the open file does fBuffer start.
Definition: RRawFile.hxx:87
unsigned char * fBuffer
Points into the I/O buffer with data from the file, not owned.
Definition: RRawFile.hxx:91
size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset)
Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copie...
Definition: RRawFile.cxx:46
size_t fBufferSize
The number of currently buffered bytes in fBuffer.
Definition: RRawFile.hxx:89
Used for vector reads from multiple offsets into multiple buffers.
Definition: RRawFile.hxx:71
std::size_t fOutBytes
The number of actually read bytes, set by ReadV()
Definition: RRawFile.hxx:79
On construction, an ROptions parameter can customize the RRawFile behavior.
Definition: RRawFile.hxx:59