Logo ROOT  
Reference Guide
RSqliteDS.cxx
Go to the documentation of this file.
1// Author: Jakob Blomer CERN 07/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2017, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11
12#include <ROOT/RSqliteDS.hxx>
13#include <ROOT/RConfig.hxx>
14#include <ROOT/RDF/Utils.hxx>
15#include <ROOT/RRawFile.hxx>
16
17#include "TError.h"
18#include "TRandom.h"
19#include "TSystem.h"
20
21#include <algorithm>
22#include <cctype>
23#include <cerrno>
24#include <cstring> // for memcpy
25#include <ctime>
26#include <memory> // for placement new
27#include <stdexcept>
28#include <utility>
29
30#include <sqlite3.h>
31
32namespace {
33
34// In order to provide direct access to remote sqlite files through HTTP and HTTPS, this datasource provides a custom
35// "SQlite VFS module" that uses Davix for data access. The SQlite VFS modules are roughly what TSystem is
36// for ROOT -- an abstraction of the operating system interface.
37//
38// SQlite allows for registering custom VFS modules, which are a set of C callback functions that SQlite invokes when
39// it needs to read from a file, write to a file, etc. More information is available under https://sqlite.org/vfs.html
40//
41// In the context of a data source, SQlite will only ever call reading functions from the VFS module, the sqlite
42// files are not modified. Therefore, only a subset of the callback functions provide a non-trivial implementation.
43// The custom VFS module uses a RRawFile for the byte access, thereby it can access local and remote files.
44
45////////////////////////////////////////////////////////////////////////////
46/// SQlite VFS modules are identified by string names. The name has to be unique for the entire application.
47constexpr char const *gSQliteVfsName = "ROOT-Davix-readonly";
48
49////////////////////////////////////////////////////////////////////////////
50/// Holds the state of an open sqlite database. Objects of this struct are created in VfsRdOnlyOpen()
51/// and then passed by sqlite to the file I/O callbacks (read, close, etc.). This uses C style inheritance
52/// where the struct starts with a sqlite3_file member (base class) which is extended by members related to
53/// this particular VFS module. Every callback here thus casts the sqlite3_file input parameter to its "derived"
54/// type VfsRootFile.
55struct VfsRootFile {
56 VfsRootFile() = default;
57
58 sqlite3_file pFile;
59 std::unique_ptr<ROOT::Internal::RRawFile> fRawFile;
60};
61
62// The following callbacks implement the I/O operations of an open database
63
64////////////////////////////////////////////////////////////////////////////
65/// Releases the resources associated to a file opened with davix
66int VfsRdOnlyClose(sqlite3_file *pFile)
67{
68 VfsRootFile *p = reinterpret_cast<VfsRootFile *>(pFile);
69 // We can't use delete because the storage for p is managed by sqlite
70 p->~VfsRootFile();
71 return SQLITE_OK;
72}
73
74////////////////////////////////////////////////////////////////////////////
75/// Issues a byte range request for a chunk to the raw file
76int VfsRdOnlyRead(sqlite3_file *pFile, void *zBuf, int count, sqlite_int64 offset)
77{
78 VfsRootFile *p = reinterpret_cast<VfsRootFile *>(pFile);
79 auto nbytes = p->fRawFile->ReadAt(zBuf, count, offset);
80 return (nbytes != static_cast<unsigned int>(count)) ? SQLITE_IOERR : SQLITE_OK;
81}
82
83////////////////////////////////////////////////////////////////////////////
84/// We do not write to a database in the RDataSource and therefore can simply return an error for this callback
85int VfsRdOnlyWrite(sqlite3_file * /*pFile*/, const void * /*zBuf*/, int /*iAmt*/, sqlite_int64 /*iOfst*/)
86{
87 return SQLITE_OPEN_READONLY;
88}
89
90////////////////////////////////////////////////////////////////////////////
91/// We do not write to a database in the RDataSource and therefore can simply return an error for this callback
92int VfsRdOnlyTruncate(sqlite3_file * /*pFile*/, sqlite_int64 /*size*/)
93{
94 return SQLITE_OPEN_READONLY;
95}
96
97////////////////////////////////////////////////////////////////////////////
98/// As the database is read-only, syncing data to disc is a no-op and always succeeds
99int VfsRdOnlySync(sqlite3_file * /*pFile*/, int /*flags*/)
100{
101 return SQLITE_OK;
102}
103
104////////////////////////////////////////////////////////////////////////////
105/// Returns the cached file size
106int VfsRdOnlyFileSize(sqlite3_file *pFile, sqlite_int64 *pSize)
107{
108 VfsRootFile *p = reinterpret_cast<VfsRootFile *>(pFile);
109 *pSize = p->fRawFile->GetSize();
110 return SQLITE_OK;
111}
112
113////////////////////////////////////////////////////////////////////////////
114/// As the database is read-only, locks for concurrent access are no-ops and always succeeds
115int VfsRdOnlyLock(sqlite3_file * /*pFile*/, int /*level*/)
116{
117 return SQLITE_OK;
118}
119
120////////////////////////////////////////////////////////////////////////////
121/// As the database is read-only, locks for concurrent access are no-ops and always succeeds
122int VfsRdOnlyUnlock(sqlite3_file * /*pFile*/, int /*level*/)
123{
124 return SQLITE_OK;
125}
126
127////////////////////////////////////////////////////////////////////////////
128/// As the database is read-only, locks for concurrent access are no-ops and always succeeds
129int VfsRdOnlyCheckReservedLock(sqlite3_file * /*pFile*/, int *pResOut)
130{
131 *pResOut = 0;
132 return SQLITE_OK;
133}
134
135////////////////////////////////////////////////////////////////////////////
136/// As the database is read-only, we know there are no additional control files such as a database journal
137int VfsRdOnlyFileControl(sqlite3_file * /*p*/, int /*op*/, void * /*pArg*/)
138{
139 return SQLITE_NOTFOUND;
140}
141
142////////////////////////////////////////////////////////////////////////////
143/// The database device's sector size is only needed for writing
144int VfsRdOnlySectorSize(sqlite3_file * /*pFile*/)
145{
146 return SQLITE_OPEN_READONLY;
147}
148
149////////////////////////////////////////////////////////////////////////////
150/// The database device's properties are only needed for writing
151int VfsRdOnlyDeviceCharacteristics(sqlite3_file * /*pFile*/)
152{
153 return SQLITE_OPEN_READONLY;
154}
155
156////////////////////////////////////////////////////////////////////////////
157/// Set the function pointers of the custom VFS I/O operations in a
158/// forward-compatible way
159static sqlite3_io_methods GetSqlite3IoMethods()
160{
161 // The C style initialization is compatible with version 1 and later versions of the struct.
162 // Version 1 was introduced with sqlite 3.6, version 2 with sqlite 3.7.8, version 3 with sqlite 3.7.17
163 sqlite3_io_methods io_methods;
164 memset(&io_methods, 0, sizeof(io_methods));
165 io_methods.iVersion = 1;
166 io_methods.xClose = VfsRdOnlyClose;
167 io_methods.xRead = VfsRdOnlyRead;
168 io_methods.xWrite = VfsRdOnlyWrite;
169 io_methods.xTruncate = VfsRdOnlyTruncate;
170 io_methods.xSync = VfsRdOnlySync;
171 io_methods.xFileSize = VfsRdOnlyFileSize;
172 io_methods.xLock = VfsRdOnlyLock;
173 io_methods.xUnlock = VfsRdOnlyUnlock;
174 io_methods.xCheckReservedLock = VfsRdOnlyCheckReservedLock;
175 io_methods.xFileControl = VfsRdOnlyFileControl;
176 io_methods.xSectorSize = VfsRdOnlySectorSize;
177 io_methods.xDeviceCharacteristics = VfsRdOnlyDeviceCharacteristics;
178 return io_methods;
179}
180
181////////////////////////////////////////////////////////////////////////////
182/// Fills a new VfsRootFile struct enclosing a Davix file
183int VfsRdOnlyOpen(sqlite3_vfs * /*vfs*/, const char *zName, sqlite3_file *pFile, int flags, int * /*pOutFlags*/)
184{
185 // Storage for the VfsRootFile structure has been already allocated by sqlite, so we use placement new
186 VfsRootFile *p = new (pFile) VfsRootFile();
187 p->pFile.pMethods = nullptr;
188
189 // This global struct contains the function pointers to all the callback operations that act on an open database.
190 // It is passed via the pFile struct back to sqlite so that it can call back to the functions provided above.
191 static const sqlite3_io_methods io_methods = GetSqlite3IoMethods();
192
193 if (flags & (SQLITE_OPEN_READWRITE | SQLITE_OPEN_DELETEONCLOSE | SQLITE_OPEN_EXCLUSIVE))
194 return SQLITE_IOERR;
195
196 p->fRawFile = ROOT::Internal::RRawFile::Create(zName);
197 if (!p->fRawFile) {
198 ::Error("VfsRdOnlyOpen", "Cannot open %s\n", zName);
199 return SQLITE_IOERR;
200 }
201
202 if (!(p->fRawFile->GetFeatures() & ROOT::Internal::RRawFile::kFeatureHasSize)) {
203 ::Error("VfsRdOnlyOpen", "cannot determine file size of %s\n", zName);
204 return SQLITE_IOERR;
205 }
206
207 p->pFile.pMethods = &io_methods;
208 return SQLITE_OK;
209}
210
211// The following callbacks implement operating system specific functionality. In contrast to the previous callbacks,
212// there is no need to implement any customized logic for the following ones. An implementation has to be
213// provided nevertheless to have a fully functional VFS module.
214
215////////////////////////////////////////////////////////////////////////////
216/// This VFS module cannot remove files
217int VfsRdOnlyDelete(sqlite3_vfs * /*vfs*/, const char * /*zName*/, int /*syncDir*/)
218{
219 return SQLITE_IOERR_DELETE;
220}
221
222////////////////////////////////////////////////////////////////////////////
223/// Access control always allows read-only access to databases
224int VfsRdOnlyAccess(sqlite3_vfs * /*vfs*/, const char * /*zPath*/, int flags, int *pResOut)
225{
226 *pResOut = 0;
227 if (flags == SQLITE_ACCESS_READWRITE) {
228 return SQLITE_OPEN_READONLY;
229 }
230 return SQLITE_OK;
231}
232
233////////////////////////////////////////////////////////////////////////////
234/// No distinction between relative and full paths for URLs, returns the input path name
235int VfsRdOnlyFullPathname(sqlite3_vfs * /*vfs*/, const char *zPath, int nOut, char *zOut)
236{
237 zOut[nOut - 1] = '\0';
238 sqlite3_snprintf(nOut, zOut, "%s", zPath);
239 return SQLITE_OK;
240}
241
242////////////////////////////////////////////////////////////////////////////
243/// Let TRandom fill the buffer with random bytes
244int VfsRdOnlyRandomness(sqlite3_vfs * /*vfs*/, int nBuf, char *zBuf)
245{
246 for (int i = 0; i < nBuf; ++i) {
247 zBuf[i] = (char)gRandom->Integer(256);
248 }
249 return nBuf;
250}
251
252////////////////////////////////////////////////////////////////////////////
253/// Use ROOT's platform independent sleep wrapper
254int VfsRdOnlySleep(sqlite3_vfs * /*vfs*/, int microseconds)
255{
256 // Millisecond precision but sleep at least number of given microseconds as requested
257 gSystem->Sleep((microseconds + 1000 - 1) / 1000);
258 return microseconds;
259}
260
261////////////////////////////////////////////////////////////////////////////
262/// Use sqlite default implementation
263int VfsRdOnlyGetLastError(sqlite3_vfs * /*vfs*/, int /*not_used1*/, char * /*not_used2*/)
264{
265 return errno;
266}
267
268////////////////////////////////////////////////////////////////////////////
269/// Return UTC as being done in the sqlite unix VFS without gettimeofday()
270int VfsRdOnlyCurrentTimeInt64(sqlite3_vfs * /*vfs*/, sqlite3_int64 *piNow)
271{
272 static constexpr sqlite3_int64 unixEpoch = 24405875 * (sqlite3_int64)8640000;
273 time_t t;
274 time(&t);
275 *piNow = ((sqlite3_int64)t) * 1000 + unixEpoch;
276 return SQLITE_OK;
277}
278
279////////////////////////////////////////////////////////////////////////////
280/// Wrapper around VfsRdOnlyCurrentTimeInt64
281int VfsRdOnlyCurrentTime(sqlite3_vfs *vfs, double *prNow)
282{
283 sqlite3_int64 i = 0;
284 int rc = VfsRdOnlyCurrentTimeInt64(vfs, &i);
285 *prNow = i / 86400000.0;
286 return rc;
287}
288
289////////////////////////////////////////////////////////////////////////////
290/// Set the function pointers of the VFS implementation in a
291/// forward-compatible way
292static sqlite3_vfs GetSqlite3Vfs()
293{
294 // The C style initialization is compatible with version 1 and later versions of the struct.
295 // Version 1 was introduced with sqlite 3.5, version 2 with sqlite 3.7, version 3 with sqlite 3.7.6
296 sqlite3_vfs vfs;
297 memset(&vfs, 0, sizeof(vfs));
298 vfs.iVersion = 1;
299 vfs.szOsFile = sizeof(VfsRootFile);
300 vfs.mxPathname = 2000;
301 vfs.zName = gSQliteVfsName;
302 vfs.xOpen = VfsRdOnlyOpen;
303 vfs.xDelete = VfsRdOnlyDelete;
304 vfs.xAccess = VfsRdOnlyAccess;
305 vfs.xFullPathname = VfsRdOnlyFullPathname;
306 vfs.xRandomness = VfsRdOnlyRandomness;
307 vfs.xSleep = VfsRdOnlySleep;
308 vfs.xCurrentTime = VfsRdOnlyCurrentTime;
309 vfs.xGetLastError = VfsRdOnlyGetLastError;
310 return vfs;
311}
312
313////////////////////////////////////////////////////////////////////////////
314/// A global struct of function pointers and details on the VfsRootFile class that together constitue a VFS module
315static struct sqlite3_vfs kSqlite3Vfs = GetSqlite3Vfs();
316
317static bool RegisterSqliteVfs()
318{
319 int retval;
320 retval = sqlite3_vfs_register(&kSqlite3Vfs, false);
321 return (retval == SQLITE_OK);
322}
323
324} // anonymous namespace
325
326namespace ROOT {
327
328namespace RDF {
329
330namespace Internal {
331////////////////////////////////////////////////////////////////////////////
332/// The state of an open dataset in terms of the sqlite3 C library.
334 sqlite3 *fDb = nullptr;
335 sqlite3_stmt *fQuery = nullptr;
336};
337}
338
340 : fType(type), fIsActive(false), fInteger(0), fReal(0.0), fText(), fBlob(), fNull(nullptr)
341{
342 switch (type) {
343 case ETypes::kInteger: fPtr = &fInteger; break;
344 case ETypes::kReal: fPtr = &fReal; break;
345 case ETypes::kText: fPtr = &fText; break;
346 case ETypes::kBlob: fPtr = &fBlob; break;
347 case ETypes::kNull: fPtr = &fNull; break;
348 default: throw std::runtime_error("Internal error");
349 }
350}
351
352constexpr char const *RSqliteDS::fgTypeNames[];
353
354////////////////////////////////////////////////////////////////////////////
355/// \brief Build the dataframe
356/// \param[in] fileName The path to an sqlite3 file, will be opened read-only
357/// \param[in] query A valid sqlite3 SELECT query
358///
359/// The constructor opens the sqlite file, prepares the query engine and determines the column names and types.
360RSqliteDS::RSqliteDS(const std::string &fileName, const std::string &query)
361 : fDataSet(std::make_unique<Internal::RSqliteDSDataSet>()), fNSlots(0), fNRow(0)
362{
363 static bool hasSqliteVfs = RegisterSqliteVfs();
364 if (!hasSqliteVfs)
365 throw std::runtime_error("Cannot register SQlite VFS in RSqliteDS");
366
367 int retval;
368
369 retval = sqlite3_open_v2(fileName.c_str(), &fDataSet->fDb, SQLITE_OPEN_READONLY | SQLITE_OPEN_NOMUTEX,
370 gSQliteVfsName);
371 if (retval != SQLITE_OK)
372 SqliteError(retval);
373
374 retval = sqlite3_prepare_v2(fDataSet->fDb, query.c_str(), -1, &fDataSet->fQuery, nullptr);
375 if (retval != SQLITE_OK)
376 SqliteError(retval);
377
378 int colCount = sqlite3_column_count(fDataSet->fQuery);
379 retval = sqlite3_step(fDataSet->fQuery);
380 if ((retval != SQLITE_ROW) && (retval != SQLITE_DONE))
381 SqliteError(retval);
382
383 fValues.reserve(colCount);
384 for (int i = 0; i < colCount; ++i) {
385 fColumnNames.emplace_back(sqlite3_column_name(fDataSet->fQuery, i));
386 int type = SQLITE_NULL;
387 // Try first with the declared column type and then with the dynamic type
388 // for expressions
389 const char *declTypeCstr = sqlite3_column_decltype(fDataSet->fQuery, i);
390 if (declTypeCstr == nullptr) {
391 if (retval == SQLITE_ROW)
392 type = sqlite3_column_type(fDataSet->fQuery, i);
393 } else {
394 std::string declType(declTypeCstr);
395 std::transform(declType.begin(), declType.end(), declType.begin(), ::toupper);
396 if (declType == "INTEGER")
397 type = SQLITE_INTEGER;
398 else if (declType == "FLOAT")
399 type = SQLITE_FLOAT;
400 else if (declType == "TEXT")
401 type = SQLITE_TEXT;
402 else if (declType == "BLOB")
403 type = SQLITE_BLOB;
404 else
405 throw std::runtime_error("Unexpected column decl type");
406 }
407
408 switch (type) {
409 case SQLITE_INTEGER:
411 fValues.emplace_back(ETypes::kInteger);
412 break;
413 case SQLITE_FLOAT:
414 fColumnTypes.push_back(ETypes::kReal);
415 fValues.emplace_back(ETypes::kReal);
416 break;
417 case SQLITE_TEXT:
418 fColumnTypes.push_back(ETypes::kText);
419 fValues.emplace_back(ETypes::kText);
420 break;
421 case SQLITE_BLOB:
422 fColumnTypes.push_back(ETypes::kBlob);
423 fValues.emplace_back(ETypes::kBlob);
424 break;
425 case SQLITE_NULL:
426 // TODO: Null values in first rows are not well handled
427 fColumnTypes.push_back(ETypes::kNull);
428 fValues.emplace_back(ETypes::kNull);
429 break;
430 default: throw std::runtime_error("Unhandled data type");
431 }
432 }
433}
434
435////////////////////////////////////////////////////////////////////////////
436/// Frees the sqlite resources and closes the file.
438{
439 // sqlite3_finalize returns the error code of the most recent operation on fQuery.
440 sqlite3_finalize(fDataSet->fQuery);
441 // Closing can possibly fail with SQLITE_BUSY, in which case resources are leaked. This should not happen
442 // the way it is used in this class because we cleanup the prepared statement before.
443 sqlite3_close(fDataSet->fDb);
444}
445
446////////////////////////////////////////////////////////////////////////////
447/// Returns the SELECT queries names. The column names have been cached in the constructor.
448/// For expressions, the column name is the string of the expression unless the query defines a column name with as
449/// like in "SELECT 1 + 1 as mycolumn FROM table"
450const std::vector<std::string> &RSqliteDS::GetColumnNames() const
451{
452 return fColumnNames;
453}
454
455////////////////////////////////////////////////////////////////////////////
456/// Activates the given column's result value.
458{
459 const auto index = std::distance(fColumnNames.begin(), std::find(fColumnNames.begin(), fColumnNames.end(), name));
460 const auto type = fColumnTypes[index];
461
462 if ((type == ETypes::kInteger && typeid(Long64_t) != ti) || (type == ETypes::kReal && typeid(double) != ti) ||
463 (type == ETypes::kText && typeid(std::string) != ti) ||
464 (type == ETypes::kBlob && typeid(std::vector<unsigned char>) != ti) ||
465 (type == ETypes::kNull && typeid(void *) != ti)) {
466 std::string errmsg = "The type selected for column \"";
467 errmsg += name;
468 errmsg += "\" does not correspond to column type, which is ";
469 errmsg += GetTypeName(name);
470 throw std::runtime_error(errmsg);
471 }
472
473 fValues[index].fIsActive = true;
474 return std::vector<void *>{fNSlots, &fValues[index].fPtr};
475}
476
477////////////////////////////////////////////////////////////////////////////
478/// Returns a range of size 1 as long as more rows are available in the SQL result set.
479/// This inherently serialized the RDF independent of the number of slots.
480std::vector<std::pair<ULong64_t, ULong64_t>> RSqliteDS::GetEntryRanges()
481{
482 std::vector<std::pair<ULong64_t, ULong64_t>> entryRanges;
483 int retval = sqlite3_step(fDataSet->fQuery);
484 switch (retval) {
485 case SQLITE_DONE: return entryRanges;
486 case SQLITE_ROW:
487 entryRanges.emplace_back(fNRow, fNRow + 1);
488 fNRow++;
489 return entryRanges;
490 default:
491 SqliteError(retval);
492 // Never here
493 abort();
494 }
495}
496
497////////////////////////////////////////////////////////////////////////////
498/// Returns the C++ type for a given column name, implemented as a linear search through all the columns.
499std::string RSqliteDS::GetTypeName(std::string_view colName) const
500{
501 unsigned N = fColumnNames.size();
502
503 for (unsigned i = 0; i < N; ++i) {
504 if (colName == fColumnNames[i]) {
505 return fgTypeNames[static_cast<int>(fColumnTypes[i])];
506 }
507 }
508 throw std::runtime_error("Unknown column: " + std::string(colName));
509}
510
511////////////////////////////////////////////////////////////////////////////
512/// A linear search through the columns for the given name
514{
515 return std::find(fColumnNames.begin(), fColumnNames.end(), colName) != fColumnNames.end();
516}
517
518////////////////////////////////////////////////////////////////////////////
519/// Resets the SQlite query engine at the beginning of the event loop.
521{
522 fNRow = 0;
523 int retval = sqlite3_reset(fDataSet->fQuery);
524 if (retval != SQLITE_OK)
525 throw std::runtime_error("SQlite error, reset");
526}
527
529{
530 return "RSqliteDS";
531}
532
533////////////////////////////////////////////////////////////////////////////////////////////////
534/// \brief Factory method to create a SQlite RDataFrame.
535/// \param[in] fileName Path of the sqlite file.
536/// \param[in] query SQL query that defines the data set.
538{
539 ROOT::RDataFrame rdf(std::make_unique<RSqliteDS>(std::string(fileName), std::string(query)));
540 return rdf;
541}
542
543////////////////////////////////////////////////////////////////////////////
544/// Stores the result of the current active sqlite query row as a C++ value.
545bool RSqliteDS::SetEntry(unsigned int /* slot */, ULong64_t entry)
546{
547 R__ASSERT(entry + 1 == fNRow);
548 unsigned N = fValues.size();
549 for (unsigned i = 0; i < N; ++i) {
550 if (!fValues[i].fIsActive)
551 continue;
552
553 int nbytes;
554 switch (fValues[i].fType) {
555 case ETypes::kInteger: fValues[i].fInteger = sqlite3_column_int64(fDataSet->fQuery, i); break;
556 case ETypes::kReal: fValues[i].fReal = sqlite3_column_double(fDataSet->fQuery, i); break;
557 case ETypes::kText:
558 nbytes = sqlite3_column_bytes(fDataSet->fQuery, i);
559 if (nbytes == 0) {
560 fValues[i].fText = "";
561 } else {
562 fValues[i].fText = reinterpret_cast<const char *>(sqlite3_column_text(fDataSet->fQuery, i));
563 }
564 break;
565 case ETypes::kBlob:
566 nbytes = sqlite3_column_bytes(fDataSet->fQuery, i);
567 fValues[i].fBlob.resize(nbytes);
568 if (nbytes > 0) {
569 std::memcpy(fValues[i].fBlob.data(), sqlite3_column_blob(fDataSet->fQuery, i), nbytes);
570 }
571 break;
572 case ETypes::kNull: break;
573 default: throw std::runtime_error("Unhandled column type");
574 }
575 }
576 return true;
577}
578
579////////////////////////////////////////////////////////////////////////////////////////////////
580/// Almost a no-op, many slots can in fact reduce the performance due to thread synchronization.
581void RSqliteDS::SetNSlots(unsigned int nSlots)
582{
583 if (nSlots > 1) {
584 ::Warning("SetNSlots", "Currently the SQlite data source faces performance degradation in multi-threaded mode. "
585 "Consider turning off IMT.");
586 }
587 fNSlots = nSlots;
588}
589
590////////////////////////////////////////////////////////////////////////////////////////////////
591/// Helper function to throw an exception if there is a fatal sqlite error, e.g. an I/O error.
592void RSqliteDS::SqliteError(int errcode)
593{
594 std::string errmsg = "SQlite error: ";
595#if SQLITE_VERSION_NUMBER < 3007015
596 errmsg += std::to_string(errcode);
597#else
598 errmsg += sqlite3_errstr(errcode);
599#endif
600 throw std::runtime_error(errmsg);
601}
602
603} // namespace RDF
604
605} // namespace ROOT
#define R__ASSERT(e)
Definition: TError.h:118
#define N
int type
Definition: TGX11.cxx:121
R__EXTERN TRandom * gRandom
Definition: TRandom.h:62
R__EXTERN TSystem * gSystem
Definition: TSystem.h:559
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url.
Definition: RRawFile.cxx:73
static constexpr int kFeatureHasSize
GetSize() does not return kUnknownFileSize.
Definition: RRawFile.hxx:52
std::vector< void * > Record_t
void SetNSlots(unsigned int nSlots) final
Almost a no-op, many slots can in fact reduce the performance due to thread synchronization.
Definition: RSqliteDS.cxx:581
static constexpr char const * fgTypeNames[]
Corresponds to the types defined in ETypes.
Definition: RSqliteDS.hxx:91
std::string GetLabel() final
Return a string representation of the datasource type.
Definition: RSqliteDS.cxx:528
void Initialise() final
Resets the SQlite query engine at the beginning of the event loop.
Definition: RSqliteDS.cxx:520
unsigned int fNSlots
Definition: RSqliteDS.hxx:82
std::vector< std::string > fColumnNames
Definition: RSqliteDS.hxx:84
~RSqliteDS()
Frees the sqlite resources and closes the file.
Definition: RSqliteDS.cxx:437
bool HasColumn(std::string_view colName) const final
A linear search through the columns for the given name.
Definition: RSqliteDS.cxx:513
std::vector< ETypes > fColumnTypes
Definition: RSqliteDS.hxx:85
std::string GetTypeName(std::string_view colName) const final
Returns the C++ type for a given column name, implemented as a linear search through all the columns.
Definition: RSqliteDS.cxx:499
ETypes
All the types known to SQlite. Changes require changing fgTypeNames, too.
Definition: RSqliteDS.hxx:56
Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final
Activates the given column's result value.
Definition: RSqliteDS.cxx:457
RSqliteDS(const std::string &fileName, const std::string &query)
Build the dataframe.
Definition: RSqliteDS.cxx:360
std::unique_ptr< Internal::RSqliteDSDataSet > fDataSet
Definition: RSqliteDS.hxx:81
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Returns a range of size 1 as long as more rows are available in the SQL result set.
Definition: RSqliteDS.cxx:480
const std::vector< std::string > & GetColumnNames() const final
Returns the SELECT queries names.
Definition: RSqliteDS.cxx:450
bool SetEntry(unsigned int slot, ULong64_t entry) final
Stores the result of the current active sqlite query row as a C++ value.
Definition: RSqliteDS.cxx:545
void SqliteError(int errcode)
Helper function to throw an exception if there is a fatal sqlite error, e.g. an I/O error.
Definition: RSqliteDS.cxx:592
std::vector< Value_t > fValues
The data source is inherently single-threaded and returns only one row at a time. This vector holds t...
Definition: RSqliteDS.hxx:87
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTree,...
Definition: RDataFrame.hxx:42
virtual UInt_t Integer(UInt_t imax)
Returns a random integer uniformly distributed on the interval [ 0, imax-1 ].
Definition: TRandom.cxx:360
virtual void Sleep(UInt_t milliSec)
Sleep milliSec milli seconds.
Definition: TSystem.cxx:440
long long Long64_t
Definition: cpp_cppyy.h:13
unsigned long long ULong64_t
Definition: cpp_cppyy.h:14
basic_string_view< char > string_view
RDataFrame MakeSqliteDataFrame(std::string_view fileName, std::string_view query)
Factory method to create a SQlite RDataFrame.
Definition: RSqliteDS.cxx:537
static const std::string name("name")
void Error(const char *location, const char *va_(fmt),...)
Definition: TClingUtils.h:789
void Warning(const char *location, const char *va_(fmt),...)
Definition: TClingUtils.h:819
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition: RNumpyDS.hxx:30
The state of an open dataset in terms of the sqlite3 C library.
Definition: RSqliteDS.cxx:333
void * fPtr
Points to one of the values; an address to this pointer is returned by GetColumnReadersImpl.
Definition: RSqliteDS.hxx:76
std::vector< unsigned char > fBlob
Definition: RSqliteDS.hxx:74