Logo ROOT  
Reference Guide
RSqliteDS.cxx
Go to the documentation of this file.
1 // Author: Jakob Blomer CERN 07/2018
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2017, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 // clang-format off
12 /** \class ROOT::RDF::RSqliteDS
13  \ingroup dataframe
14  \brief RDataFrame data source class for reading SQlite files.
15 */
16 
17 // clang-format on
18 
19 #include <ROOT/RSqliteDS.hxx>
20 #include <ROOT/RConfig.hxx>
21 #include <ROOT/RDF/Utils.hxx>
22 #include <ROOT/RMakeUnique.hxx>
23 #include <ROOT/RRawFile.hxx>
24 
25 #include "TError.h"
26 #include "TRandom.h"
27 #include "TSystem.h"
28 
29 #include <algorithm>
30 #include <cctype>
31 #include <cerrno>
32 #include <cstring> // for memcpy
33 #include <ctime>
34 #include <memory> // for placement new
35 #include <stdexcept>
36 #include <utility>
37 
38 #include <sqlite3.h>
39 
40 namespace {
41 
42 // In order to provide direct access to remote sqlite files through HTTP and HTTPS, this datasource provides a custom
43 // "SQlite VFS module" that uses Davix for data access. The SQlite VFS modules are roughly what TSystem is
44 // for ROOT -- an abstraction of the operating system interface.
45 //
46 // SQlite allows for registering custom VFS modules, which are a set of C callback functions that SQlite invokes when
47 // it needs to read from a file, write to a file, etc. More information is available under https://sqlite.org/vfs.html
48 //
49 // In the context of a data source, SQlite will only ever call reading functions from the VFS module, the sqlite
50 // files are not modified. Therefore, only a subset of the callback functions provide a non-trivial implementation.
51 // The custom VFS module uses a RRawFile for the byte access, thereby it can access local and remote files.
52 
53 ////////////////////////////////////////////////////////////////////////////
54 /// SQlite VFS modules are identified by string names. The name has to be unique for the entire application.
55 constexpr char const *gSQliteVfsName = "ROOT-Davix-readonly";
56 
57 ////////////////////////////////////////////////////////////////////////////
58 /// Holds the state of an open sqlite database. Objects of this struct are created in VfsRdOnlyOpen()
59 /// and then passed by sqlite to the file I/O callbacks (read, close, etc.). This uses C style inheritance
60 /// where the struct starts with a sqlite3_file member (base class) which is extended by members related to
61 /// this particular VFS module. Every callback here thus casts the sqlite3_file input parameter to its "derived"
62 /// type VfsRootFile.
63 struct VfsRootFile {
64  VfsRootFile() = default;
65 
66  sqlite3_file pFile;
67  std::unique_ptr<ROOT::Internal::RRawFile> fRawFile;
68 };
69 
70 // The following callbacks implement the I/O operations of an open database
71 
72 ////////////////////////////////////////////////////////////////////////////
73 /// Releases the resources associated to a file opened with davix
74 int VfsRdOnlyClose(sqlite3_file *pFile)
75 {
76  VfsRootFile *p = reinterpret_cast<VfsRootFile *>(pFile);
77  // We can't use delete because the storage for p is managed by sqlite
78  p->~VfsRootFile();
79  return SQLITE_OK;
80 }
81 
82 ////////////////////////////////////////////////////////////////////////////
83 /// Issues a byte range request for a chunk to the raw file
84 int VfsRdOnlyRead(sqlite3_file *pFile, void *zBuf, int count, sqlite_int64 offset)
85 {
86  VfsRootFile *p = reinterpret_cast<VfsRootFile *>(pFile);
87  auto nbytes = p->fRawFile->ReadAt(zBuf, count, offset);
88  return (nbytes != static_cast<unsigned int>(count)) ? SQLITE_IOERR : SQLITE_OK;
89 }
90 
91 ////////////////////////////////////////////////////////////////////////////
92 /// We do not write to a database in the RDataSource and therefore can simply return an error for this callback
93 int VfsRdOnlyWrite(sqlite3_file * /*pFile*/, const void * /*zBuf*/, int /*iAmt*/, sqlite_int64 /*iOfst*/)
94 {
95  return SQLITE_OPEN_READONLY;
96 }
97 
98 ////////////////////////////////////////////////////////////////////////////
99 /// We do not write to a database in the RDataSource and therefore can simply return an error for this callback
100 int VfsRdOnlyTruncate(sqlite3_file * /*pFile*/, sqlite_int64 /*size*/)
101 {
102  return SQLITE_OPEN_READONLY;
103 }
104 
105 ////////////////////////////////////////////////////////////////////////////
106 /// As the database is read-only, syncing data to disc is a no-op and always succeeds
107 int VfsRdOnlySync(sqlite3_file * /*pFile*/, int /*flags*/)
108 {
109  return SQLITE_OK;
110 }
111 
112 ////////////////////////////////////////////////////////////////////////////
113 /// Returns the cached file size
114 int VfsRdOnlyFileSize(sqlite3_file *pFile, sqlite_int64 *pSize)
115 {
116  VfsRootFile *p = reinterpret_cast<VfsRootFile *>(pFile);
117  *pSize = p->fRawFile->GetSize();
118  return SQLITE_OK;
119 }
120 
121 ////////////////////////////////////////////////////////////////////////////
122 /// As the database is read-only, locks for concurrent access are no-ops and always succeeds
123 int VfsRdOnlyLock(sqlite3_file * /*pFile*/, int /*level*/)
124 {
125  return SQLITE_OK;
126 }
127 
128 ////////////////////////////////////////////////////////////////////////////
129 /// As the database is read-only, locks for concurrent access are no-ops and always succeeds
130 int VfsRdOnlyUnlock(sqlite3_file * /*pFile*/, int /*level*/)
131 {
132  return SQLITE_OK;
133 }
134 
135 ////////////////////////////////////////////////////////////////////////////
136 /// As the database is read-only, locks for concurrent access are no-ops and always succeeds
137 int VfsRdOnlyCheckReservedLock(sqlite3_file * /*pFile*/, int *pResOut)
138 {
139  *pResOut = 0;
140  return SQLITE_OK;
141 }
142 
143 ////////////////////////////////////////////////////////////////////////////
144 /// As the database is read-only, we know there are no additional control files such as a database journal
145 int VfsRdOnlyFileControl(sqlite3_file * /*p*/, int /*op*/, void * /*pArg*/)
146 {
147  return SQLITE_NOTFOUND;
148 }
149 
150 ////////////////////////////////////////////////////////////////////////////
151 /// The database device's sector size is only needed for writing
152 int VfsRdOnlySectorSize(sqlite3_file * /*pFile*/)
153 {
154  return SQLITE_OPEN_READONLY;
155 }
156 
157 ////////////////////////////////////////////////////////////////////////////
158 /// The database device's properties are only needed for writing
159 int VfsRdOnlyDeviceCharacteristics(sqlite3_file * /*pFile*/)
160 {
161  return SQLITE_OPEN_READONLY;
162 }
163 
164 ////////////////////////////////////////////////////////////////////////////
165 /// Set the function pointers of the custom VFS I/O operations in a
166 /// forward-compatible way
167 static sqlite3_io_methods GetSqlite3IoMethods()
168 {
169  // The C style initialization is compatible with version 1 and later versions of the struct.
170  // Version 1 was introduced with sqlite 3.6, version 2 with sqlite 3.7.8, version 3 with sqlite 3.7.17
171  sqlite3_io_methods io_methods;
172  memset(&io_methods, 0, sizeof(io_methods));
173  io_methods.iVersion = 1;
174  io_methods.xClose = VfsRdOnlyClose;
175  io_methods.xRead = VfsRdOnlyRead;
176  io_methods.xWrite = VfsRdOnlyWrite;
177  io_methods.xTruncate = VfsRdOnlyTruncate;
178  io_methods.xSync = VfsRdOnlySync;
179  io_methods.xFileSize = VfsRdOnlyFileSize;
180  io_methods.xLock = VfsRdOnlyLock;
181  io_methods.xUnlock = VfsRdOnlyUnlock;
182  io_methods.xCheckReservedLock = VfsRdOnlyCheckReservedLock;
183  io_methods.xFileControl = VfsRdOnlyFileControl;
184  io_methods.xSectorSize = VfsRdOnlySectorSize;
185  io_methods.xDeviceCharacteristics = VfsRdOnlyDeviceCharacteristics;
186  return io_methods;
187 }
188 
189 ////////////////////////////////////////////////////////////////////////////
190 /// Fills a new VfsRootFile struct enclosing a Davix file
191 int VfsRdOnlyOpen(sqlite3_vfs * /*vfs*/, const char *zName, sqlite3_file *pFile, int flags, int * /*pOutFlags*/)
192 {
193  // Storage for the VfsRootFile structure has been already allocated by sqlite, so we use placement new
194  VfsRootFile *p = new (pFile) VfsRootFile();
195  p->pFile.pMethods = nullptr;
196 
197  // This global struct contains the function pointers to all the callback operations that act on an open database.
198  // It is passed via the pFile struct back to sqlite so that it can call back to the functions provided above.
199  static const sqlite3_io_methods io_methods = GetSqlite3IoMethods();
200 
201  if (flags & (SQLITE_OPEN_READWRITE | SQLITE_OPEN_DELETEONCLOSE | SQLITE_OPEN_EXCLUSIVE))
202  return SQLITE_IOERR;
203 
204  p->fRawFile = ROOT::Internal::RRawFile::Create(zName);
205  if (!p->fRawFile) {
206  ::Error("VfsRdOnlyOpen", "Cannot open %s\n", zName);
207  return SQLITE_IOERR;
208  }
209 
210  if (!(p->fRawFile->GetFeatures() & ROOT::Internal::RRawFile::kFeatureHasSize)) {
211  ::Error("VfsRdOnlyOpen", "cannot determine file size of %s\n", zName);
212  return SQLITE_IOERR;
213  }
214 
215  p->pFile.pMethods = &io_methods;
216  return SQLITE_OK;
217 }
218 
219 // The following callbacks implement operating system specific functionality. In contrast to the previous callbacks,
220 // there is no need to implement any customized logic for the following ones. An implementation has to be
221 // provided nevertheless to have a fully functional VFS module.
222 
223 ////////////////////////////////////////////////////////////////////////////
224 /// This VFS module cannot remove files
225 int VfsRdOnlyDelete(sqlite3_vfs * /*vfs*/, const char * /*zName*/, int /*syncDir*/)
226 {
227  return SQLITE_IOERR_DELETE;
228 }
229 
230 ////////////////////////////////////////////////////////////////////////////
231 /// Access control always allows read-only access to databases
232 int VfsRdOnlyAccess(sqlite3_vfs * /*vfs*/, const char * /*zPath*/, int flags, int *pResOut)
233 {
234  *pResOut = 0;
235  if (flags == SQLITE_ACCESS_READWRITE) {
236  return SQLITE_OPEN_READONLY;
237  }
238  return SQLITE_OK;
239 }
240 
241 ////////////////////////////////////////////////////////////////////////////
242 /// No distinction between relative and full paths for URLs, returns the input path name
243 int VfsRdOnlyFullPathname(sqlite3_vfs * /*vfs*/, const char *zPath, int nOut, char *zOut)
244 {
245  zOut[nOut - 1] = '\0';
246  sqlite3_snprintf(nOut, zOut, "%s", zPath);
247  return SQLITE_OK;
248 }
249 
250 ////////////////////////////////////////////////////////////////////////////
251 /// Let TRandom fill the buffer with random bytes
252 int VfsRdOnlyRandomness(sqlite3_vfs * /*vfs*/, int nBuf, char *zBuf)
253 {
254  for (int i = 0; i < nBuf; ++i) {
255  zBuf[i] = (char)gRandom->Integer(256);
256  }
257  return nBuf;
258 }
259 
260 ////////////////////////////////////////////////////////////////////////////
261 /// Use ROOT's platform independent sleep wrapper
262 int VfsRdOnlySleep(sqlite3_vfs * /*vfs*/, int microseconds)
263 {
264  // Millisecond precision but sleep at least number of given microseconds as requested
265  gSystem->Sleep((microseconds + 1000 - 1) / 1000);
266  return microseconds;
267 }
268 
269 ////////////////////////////////////////////////////////////////////////////
270 /// Use sqlite default implementation
271 int VfsRdOnlyGetLastError(sqlite3_vfs * /*vfs*/, int /*not_used1*/, char * /*not_used2*/)
272 {
273  return errno;
274 }
275 
276 ////////////////////////////////////////////////////////////////////////////
277 /// Return UTC as being done in the sqlite unix VFS without gettimeofday()
278 int VfsRdOnlyCurrentTimeInt64(sqlite3_vfs * /*vfs*/, sqlite3_int64 *piNow)
279 {
280  static constexpr sqlite3_int64 unixEpoch = 24405875 * (sqlite3_int64)8640000;
281  time_t t;
282  time(&t);
283  *piNow = ((sqlite3_int64)t) * 1000 + unixEpoch;
284  return SQLITE_OK;
285 }
286 
287 ////////////////////////////////////////////////////////////////////////////
288 /// Wrapper around VfsRdOnlyCurrentTimeInt64
289 int VfsRdOnlyCurrentTime(sqlite3_vfs *vfs, double *prNow)
290 {
291  sqlite3_int64 i = 0;
292  int rc = VfsRdOnlyCurrentTimeInt64(vfs, &i);
293  *prNow = i / 86400000.0;
294  return rc;
295 }
296 
297 ////////////////////////////////////////////////////////////////////////////
298 /// Set the function pointers of the VFS implementation in a
299 /// forward-compatible way
300 static sqlite3_vfs GetSqlite3Vfs()
301 {
302  // The C style initialization is compatible with version 1 and later versions of the struct.
303  // Version 1 was introduced with sqlite 3.5, version 2 with sqlite 3.7, version 3 with sqlite 3.7.6
304  sqlite3_vfs vfs;
305  memset(&vfs, 0, sizeof(vfs));
306  vfs.iVersion = 1;
307  vfs.szOsFile = sizeof(VfsRootFile);
308  vfs.mxPathname = 2000;
309  vfs.zName = gSQliteVfsName;
310  vfs.xOpen = VfsRdOnlyOpen;
311  vfs.xDelete = VfsRdOnlyDelete;
312  vfs.xAccess = VfsRdOnlyAccess;
313  vfs.xFullPathname = VfsRdOnlyFullPathname;
314  vfs.xRandomness = VfsRdOnlyRandomness;
315  vfs.xSleep = VfsRdOnlySleep;
316  vfs.xCurrentTime = VfsRdOnlyCurrentTime;
317  vfs.xGetLastError = VfsRdOnlyGetLastError;
318  return vfs;
319 }
320 
321 ////////////////////////////////////////////////////////////////////////////
322 /// A global struct of function pointers and details on the VfsRootFile class that together constitue a VFS module
323 static struct sqlite3_vfs kSqlite3Vfs = GetSqlite3Vfs();
324 
325 static bool RegisterSqliteVfs()
326 {
327  int retval;
328  retval = sqlite3_vfs_register(&kSqlite3Vfs, false);
329  return (retval == SQLITE_OK);
330 }
331 
332 } // anonymous namespace
333 
334 namespace ROOT {
335 
336 namespace RDF {
337 
338 namespace Internal {
339 ////////////////////////////////////////////////////////////////////////////
340 /// The state of an open dataset in terms of the sqlite3 C library.
341 struct RSqliteDSDataSet {
342  sqlite3 *fDb = nullptr;
343  sqlite3_stmt *fQuery = nullptr;
344 };
345 }
346 
348  : fType(type), fIsActive(false), fInteger(0), fReal(0.0), fText(), fBlob(), fNull(nullptr)
349 {
350  switch (type) {
351  case ETypes::kInteger: fPtr = &fInteger; break;
352  case ETypes::kReal: fPtr = &fReal; break;
353  case ETypes::kText: fPtr = &fText; break;
354  case ETypes::kBlob: fPtr = &fBlob; break;
355  case ETypes::kNull: fPtr = &fNull; break;
356  default: throw std::runtime_error("Internal error");
357  }
358 }
359 
360 constexpr char const *RSqliteDS::fgTypeNames[];
361 
362 ////////////////////////////////////////////////////////////////////////////
363 /// \brief Build the dataframe
364 /// \param[in] fileName The path to an sqlite3 file, will be opened read-only
365 /// \param[in] query A valid sqlite3 SELECT query
366 ///
367 /// The constructor opens the sqlite file, prepares the query engine and determines the column names and types.
368 RSqliteDS::RSqliteDS(const std::string &fileName, const std::string &query)
369  : fDataSet(std::make_unique<Internal::RSqliteDSDataSet>()), fNSlots(0), fNRow(0)
370 {
371  static bool hasSqliteVfs = RegisterSqliteVfs();
372  if (!hasSqliteVfs)
373  throw std::runtime_error("Cannot register SQlite VFS in RSqliteDS");
374 
375  int retval;
376 
377  retval = sqlite3_open_v2(fileName.c_str(), &fDataSet->fDb, SQLITE_OPEN_READONLY | SQLITE_OPEN_NOMUTEX,
378  gSQliteVfsName);
379  if (retval != SQLITE_OK)
380  SqliteError(retval);
381 
382  retval = sqlite3_prepare_v2(fDataSet->fDb, query.c_str(), -1, &fDataSet->fQuery, nullptr);
383  if (retval != SQLITE_OK)
384  SqliteError(retval);
385 
386  int colCount = sqlite3_column_count(fDataSet->fQuery);
387  retval = sqlite3_step(fDataSet->fQuery);
388  if ((retval != SQLITE_ROW) && (retval != SQLITE_DONE))
389  SqliteError(retval);
390 
391  fValues.reserve(colCount);
392  for (int i = 0; i < colCount; ++i) {
393  fColumnNames.emplace_back(sqlite3_column_name(fDataSet->fQuery, i));
394  int type = SQLITE_NULL;
395  // Try first with the declared column type and then with the dynamic type
396  // for expressions
397  const char *declTypeCstr = sqlite3_column_decltype(fDataSet->fQuery, i);
398  if (declTypeCstr == nullptr) {
399  if (retval == SQLITE_ROW)
400  type = sqlite3_column_type(fDataSet->fQuery, i);
401  } else {
402  std::string declType(declTypeCstr);
403  std::transform(declType.begin(), declType.end(), declType.begin(), ::toupper);
404  if (declType == "INTEGER")
405  type = SQLITE_INTEGER;
406  else if (declType == "FLOAT")
407  type = SQLITE_FLOAT;
408  else if (declType == "TEXT")
409  type = SQLITE_TEXT;
410  else if (declType == "BLOB")
411  type = SQLITE_BLOB;
412  else
413  throw std::runtime_error("Unexpected column decl type");
414  }
415 
416  switch (type) {
417  case SQLITE_INTEGER:
418  fColumnTypes.push_back(ETypes::kInteger);
419  fValues.emplace_back(ETypes::kInteger);
420  break;
421  case SQLITE_FLOAT:
422  fColumnTypes.push_back(ETypes::kReal);
423  fValues.emplace_back(ETypes::kReal);
424  break;
425  case SQLITE_TEXT:
426  fColumnTypes.push_back(ETypes::kText);
427  fValues.emplace_back(ETypes::kText);
428  break;
429  case SQLITE_BLOB:
430  fColumnTypes.push_back(ETypes::kBlob);
431  fValues.emplace_back(ETypes::kBlob);
432  break;
433  case SQLITE_NULL:
434  // TODO: Null values in first rows are not well handled
435  fColumnTypes.push_back(ETypes::kNull);
436  fValues.emplace_back(ETypes::kNull);
437  break;
438  default: throw std::runtime_error("Unhandled data type");
439  }
440  }
441 }
442 
443 ////////////////////////////////////////////////////////////////////////////
444 /// Frees the sqlite resources and closes the file.
446 {
447  // sqlite3_finalize returns the error code of the most recent operation on fQuery.
448  sqlite3_finalize(fDataSet->fQuery);
449  // Closing can possibly fail with SQLITE_BUSY, in which case resources are leaked. This should not happen
450  // the way it is used in this class because we cleanup the prepared statement before.
451  sqlite3_close(fDataSet->fDb);
452 }
453 
454 ////////////////////////////////////////////////////////////////////////////
455 /// Returns the SELECT queries names. The column names have been cached in the constructor.
456 /// For expressions, the column name is the string of the expression unless the query defines a column name with as
457 /// like in "SELECT 1 + 1 as mycolumn FROM table"
458 const std::vector<std::string> &RSqliteDS::GetColumnNames() const
459 {
460  return fColumnNames;
461 }
462 
463 ////////////////////////////////////////////////////////////////////////////
464 /// Activates the given column's result value.
466 {
467  const auto index = std::distance(fColumnNames.begin(), std::find(fColumnNames.begin(), fColumnNames.end(), name));
468  const auto type = fColumnTypes[index];
469 
470  if ((type == ETypes::kInteger && typeid(Long64_t) != ti) || (type == ETypes::kReal && typeid(double) != ti) ||
471  (type == ETypes::kText && typeid(std::string) != ti) ||
472  (type == ETypes::kBlob && typeid(std::vector<unsigned char>) != ti) ||
473  (type == ETypes::kNull && typeid(void *) != ti)) {
474  std::string errmsg = "The type selected for column \"";
475  errmsg += name;
476  errmsg += "\" does not correspond to column type, which is ";
477  errmsg += GetTypeName(name);
478  throw std::runtime_error(errmsg);
479  }
480 
481  fValues[index].fIsActive = true;
482  return std::vector<void *>{fNSlots, &fValues[index].fPtr};
483 }
484 
485 ////////////////////////////////////////////////////////////////////////////
486 /// Returns a range of size 1 as long as more rows are available in the SQL result set.
487 /// This inherently serialized the RDF independent of the number of slots.
488 std::vector<std::pair<ULong64_t, ULong64_t>> RSqliteDS::GetEntryRanges()
489 {
490  std::vector<std::pair<ULong64_t, ULong64_t>> entryRanges;
491  int retval = sqlite3_step(fDataSet->fQuery);
492  switch (retval) {
493  case SQLITE_DONE: return entryRanges;
494  case SQLITE_ROW:
495  entryRanges.emplace_back(fNRow, fNRow + 1);
496  fNRow++;
497  return entryRanges;
498  default:
499  SqliteError(retval);
500  // Never here
501  abort();
502  }
503 }
504 
505 ////////////////////////////////////////////////////////////////////////////
506 /// Returns the C++ type for a given column name, implemented as a linear search through all the columns.
507 std::string RSqliteDS::GetTypeName(std::string_view colName) const
508 {
509  unsigned N = fColumnNames.size();
510 
511  for (unsigned i = 0; i < N; ++i) {
512  if (colName == fColumnNames[i]) {
513  return fgTypeNames[static_cast<int>(fColumnTypes[i])];
514  }
515  }
516  throw std::runtime_error("Unknown column: " + std::string(colName));
517 }
518 
519 ////////////////////////////////////////////////////////////////////////////
520 /// A linear search through the columns for the given name
522 {
523  return std::find(fColumnNames.begin(), fColumnNames.end(), colName) != fColumnNames.end();
524 }
525 
526 ////////////////////////////////////////////////////////////////////////////
527 /// Resets the SQlite query engine at the beginning of the event loop.
529 {
530  fNRow = 0;
531  int retval = sqlite3_reset(fDataSet->fQuery);
532  if (retval != SQLITE_OK)
533  throw std::runtime_error("SQlite error, reset");
534 }
535 
536 std::string RSqliteDS::GetLabel()
537 {
538  return "RSqliteDS";
539 }
540 
541 ////////////////////////////////////////////////////////////////////////////////////////////////
542 /// \brief Factory method to create a SQlite RDataFrame.
543 /// \param[in] fileName Path of the sqlite file.
544 /// \param[in] query SQL query that defines the data set.
546 {
547  ROOT::RDataFrame rdf(std::make_unique<RSqliteDS>(std::string(fileName), std::string(query)));
548  return rdf;
549 }
550 
551 ////////////////////////////////////////////////////////////////////////////
552 /// Stores the result of the current active sqlite query row as a C++ value.
553 bool RSqliteDS::SetEntry(unsigned int /* slot */, ULong64_t entry)
554 {
555  R__ASSERT(entry + 1 == fNRow);
556  unsigned N = fValues.size();
557  for (unsigned i = 0; i < N; ++i) {
558  if (!fValues[i].fIsActive)
559  continue;
560 
561  int nbytes;
562  switch (fValues[i].fType) {
563  case ETypes::kInteger: fValues[i].fInteger = sqlite3_column_int64(fDataSet->fQuery, i); break;
564  case ETypes::kReal: fValues[i].fReal = sqlite3_column_double(fDataSet->fQuery, i); break;
565  case ETypes::kText:
566  nbytes = sqlite3_column_bytes(fDataSet->fQuery, i);
567  if (nbytes == 0) {
568  fValues[i].fText = "";
569  } else {
570  fValues[i].fText = reinterpret_cast<const char *>(sqlite3_column_text(fDataSet->fQuery, i));
571  }
572  break;
573  case ETypes::kBlob:
574  nbytes = sqlite3_column_bytes(fDataSet->fQuery, i);
575  fValues[i].fBlob.resize(nbytes);
576  if (nbytes > 0) {
577  std::memcpy(fValues[i].fBlob.data(), sqlite3_column_blob(fDataSet->fQuery, i), nbytes);
578  }
579  break;
580  case ETypes::kNull: break;
581  default: throw std::runtime_error("Unhandled column type");
582  }
583  }
584  return true;
585 }
586 
587 ////////////////////////////////////////////////////////////////////////////////////////////////
588 /// Almost a no-op, many slots can in fact reduce the performance due to thread synchronization.
589 void RSqliteDS::SetNSlots(unsigned int nSlots)
590 {
591  if (nSlots > 1) {
592  ::Warning("SetNSlots", "Currently the SQlite data source faces performance degradation in multi-threaded mode. "
593  "Consider turning off IMT.");
594  }
595  fNSlots = nSlots;
596 }
597 
598 ////////////////////////////////////////////////////////////////////////////////////////////////
599 /// Helper function to throw an exception if there is a fatal sqlite error, e.g. an I/O error.
600 void RSqliteDS::SqliteError(int errcode)
601 {
602  std::string errmsg = "SQlite error: ";
603 #if SQLITE_VERSION_NUMBER < 3007015
604  errmsg += std::to_string(errcode);
605 #else
606  errmsg += sqlite3_errstr(errcode);
607 #endif
608  throw std::runtime_error(errmsg);
609 }
610 
611 } // namespace RDF
612 
613 } // namespace ROOT
ROOT::RDF::RSqliteDS::Value_t::fPtr
void * fPtr
Points to one of the values; an address to this pointer is returned by GetColumnReadersImpl.
Definition: RSqliteDS.hxx:88
ROOT::Internal::RRawFile::Create
static std::unique_ptr< RRawFile > Create(std::string_view url, ROptions options=ROptions())
Factory method that returns a suitable concrete implementation according to the transport in the url.
Definition: RRawFile.cxx:73
ROOT::RDF::RSqliteDS::ETypes::kInteger
@ kInteger
Warning
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition: TError.cxx:231
ROOT::RDF::RSqliteDS::fDataSet
std::unique_ptr< Internal::RSqliteDSDataSet > fDataSet
Definition: RSqliteDS.hxx:93
ROOT::RDF::RSqliteDS::SetEntry
bool SetEntry(unsigned int slot, ULong64_t entry) final
Stores the result of the current active sqlite query row as a C++ value.
Definition: RSqliteDS.cxx:553
ROOT::RDF::RSqliteDS::ETypes::kNull
@ kNull
ROOT::RDF::RSqliteDS::ETypes::kText
@ kText
ROOT::RDF::RSqliteDS::fNSlots
unsigned int fNSlots
Definition: RSqliteDS.hxx:94
Long64_t
long long Long64_t
Definition: RtypesCore.h:73
string_view
basic_string_view< char > string_view
Definition: libcpp_string_view.h:785
ROOT::RDF::RSqliteDS::Value_t::fReal
double fReal
Definition: RSqliteDS.hxx:84
ROOT::RDF::RSqliteDS::GetColumnReadersImpl
Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final
Activates the given column's result value.
Definition: RSqliteDS.cxx:465
ROOT::RDF::MakeSqliteDataFrame
RDataFrame MakeSqliteDataFrame(std::string_view fileName, std::string_view query)
Factory method to create a SQlite RDataFrame.
Definition: RSqliteDS.cxx:545
Utils.hxx
TRandom.h
N
#define N
ROOT::RDF::RSqliteDS::Value_t::fInteger
Long64_t fInteger
Definition: RSqliteDS.hxx:83
ROOT::RDF::RSqliteDS::ETypes::kReal
@ kReal
RConfig.hxx
RSqliteDS.hxx
ROOT::RDataFrame
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
Definition: RDataFrame.hxx:42
ROOT::RDF::RSqliteDS::ETypes::kBlob
@ kBlob
ROOT::RDF::RSqliteDS::fColumnTypes
std::vector< ETypes > fColumnTypes
Definition: RSqliteDS.hxx:97
ROOT::RDF::RSqliteDS::RSqliteDS
RSqliteDS(const std::string &fileName, const std::string &query)
Build the dataframe.
Definition: RSqliteDS.cxx:368
TSystem.h
RRawFile.hxx
gRandom
R__EXTERN TRandom * gRandom
Definition: TRandom.h:62
ROOT::RDF::RSqliteDS::ETypes
ETypes
All the types known to SQlite. Changes require changing fgTypeNames, too.
Definition: RSqliteDS.hxx:68
ROOT::RDF::RSqliteDS::GetEntryRanges
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Returns a range of size 1 as long as more rows are available in the SQL result set.
Definition: RSqliteDS.cxx:488
ROOT::RDF::RSqliteDS::SetNSlots
void SetNSlots(unsigned int nSlots) final
Almost a no-op, many slots can in fact reduce the performance due to thread synchronization.
Definition: RSqliteDS.cxx:589
ROOT::RDF::RSqliteDS::Initialise
void Initialise() final
Resets the SQlite query engine at the beginning of the event loop.
Definition: RSqliteDS.cxx:528
ROOT::RDF::RSqliteDS::Value_t::fNull
void * fNull
Definition: RSqliteDS.hxx:87
TSystem::Sleep
virtual void Sleep(UInt_t milliSec)
Sleep milliSec milli seconds.
Definition: TSystem.cxx:438
TRandom::Integer
virtual UInt_t Integer(UInt_t imax)
Returns a random integer uniformly distributed on the interval [ 0, imax-1 ].
Definition: TRandom.cxx:349
ROOT::RDF::RSqliteDS::fgTypeNames
static constexpr const char * fgTypeNames[]
Corresponds to the types defined in ETypes.
Definition: RSqliteDS.hxx:103
gSystem
R__EXTERN TSystem * gSystem
Definition: TSystem.h:559
ROOT::RDF::RSqliteDS::GetLabel
std::string GetLabel() final
Return a string representation of the datasource type.
Definition: RSqliteDS.cxx:536
ROOT::RDF::RSqliteDS::HasColumn
bool HasColumn(std::string_view colName) const final
A linear search through the columns for the given name.
Definition: RSqliteDS.cxx:521
ULong64_t
unsigned long long ULong64_t
Definition: RtypesCore.h:74
ROOT::RDF::RDataSource::Record_t
std::vector< void * > Record_t
Definition: RDataSource.hxx:108
ROOT::RDF::RSqliteDS::fNRow
ULong64_t fNRow
Definition: RSqliteDS.hxx:95
ROOT::RDF::RSqliteDS::GetTypeName
std::string GetTypeName(std::string_view colName) const final
Returns the C++ type for a given column name, implemented as a linear search through all the columns.
Definition: RSqliteDS.cxx:507
R__ASSERT
#define R__ASSERT(e)
Definition: TError.h:120
ROOT::RDF::RSqliteDS::~RSqliteDS
~RSqliteDS()
Frees the sqlite resources and closes the file.
Definition: RSqliteDS.cxx:445
name
char name[80]
Definition: TGX11.cxx:110
ROOT::RDF::RSqliteDS::GetColumnNames
const std::vector< std::string > & GetColumnNames() const final
Returns the SELECT queries names.
Definition: RSqliteDS.cxx:458
RMakeUnique.hxx
ROOT::RDF::RSqliteDS::SqliteError
void SqliteError(int errcode)
Helper function to throw an exception if there is a fatal sqlite error, e.g. an I/O error.
Definition: RSqliteDS.cxx:600
type
int type
Definition: TGX11.cxx:121
ROOT::RDF::RSqliteDS::fValues
std::vector< Value_t > fValues
The data source is inherently single-threaded and returns only one row at a time. This vector holds t...
Definition: RSqliteDS.hxx:99
ROOT::Internal::RRawFile::kFeatureHasSize
static constexpr int kFeatureHasSize
GetSize() does not return kUnknownFileSize.
Definition: RRawFile.hxx:61
ROOT::RDF::RSqliteDS::fColumnNames
std::vector< std::string > fColumnNames
Definition: RSqliteDS.hxx:96
ROOT::RDF::RSqliteDS::Value_t::Value_t
Value_t(ETypes type)
Definition: RSqliteDS.cxx:347
ROOT
VSD Structures.
Definition: StringConv.hxx:21
ROOT::RDF::RSqliteDS::Value_t::fBlob
std::vector< unsigned char > fBlob
Definition: RSqliteDS.hxx:86
Error
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Definition: TError.cxx:187
TError.h
ROOT::RDF::RSqliteDS::Value_t::fText
std::string fText
Definition: RSqliteDS.hxx:85