Logo ROOT  
Reference Guide
RSqliteDS.hxx
Go to the documentation of this file.
1 // Author: Jakob Blomer CERN 07/2018
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2017, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #ifndef ROOT_RSQLITEDS
12 #define ROOT_RSQLITEDS
13 
14 #include "ROOT/RDataFrame.hxx"
15 #include "ROOT/RDataSource.hxx"
16 #include "ROOT/RStringView.hxx"
17 
18 #include <memory>
19 #include <mutex>
20 #include <string>
21 #include <vector>
22 
23 namespace ROOT {
24 
25 namespace RDF {
26 
27 namespace Internal {
28 // Members are defined in RSqliteDS.cxx in order to not pullute this header file with sqlite3.h
29 struct RSqliteDSDataSet;
30 }
31 
32 // clang-format off
33 /**
34 \class ROOT::RDF::RSqliteDS
35 \ingroup dataframe
36 \brief RSqliteDS is an RDF data source implementation for SQL result sets from sqlite3 files.
37 
38 The RSqliteDS is able to feed an RDataFrame with data from a SQlite SELECT query. One can use it like
39 
40  auto rdf = ROOT::RDF::MakeSqliteDataFrame("/path/to/file.sqlite", "select name from table");
41  auto h = rdf.Define("lName", "name.length()").Histo1D("lName");
42 
43 The data source has to provide column types for all the columns. Determining column types in SQlite is tricky
44 as it is dynamically typed and in principle each row can have different column types. The following heuristics
45 is used:
46 
47  - If a table column is queried as is ("SELECT colname FROM table"), the default/declared column type is taken.
48  - For expressions ("SELECT 1+1 FROM table"), the type of the first row of the result set determines the column type.
49  That can result in a column to be of thought of type NULL where subsequent rows actually have meaningful values.
50  The provided SELECT query can be used to avoid such ambiguities.
51 */
52 class RSqliteDS final : public ROOT::RDF::RDataSource {
53 private:
54  // clang-format off
55  /// All the types known to SQlite. Changes require changing fgTypeNames, too.
56  enum class ETypes {
57  kInteger,
58  kReal,
59  kText,
60  kBlob,
61  kNull
62  };
63  // clang-format on
64 
65  /// Used to hold a single "cell" of the SELECT query's result table. Can be changed to std::variant once available.
66  struct Value_t {
67  explicit Value_t(ETypes type);
68 
69  ETypes fType;
70  bool fIsActive; ///< Not all columns of the query are necessarily used by the RDF. Allows for skipping them.
72  double fReal;
73  std::string fText;
74  std::vector<unsigned char> fBlob;
75  void *fNull;
76  void *fPtr; ///< Points to one of the values; an address to this pointer is returned by GetColumnReadersImpl.
77  };
78 
79  void SqliteError(int errcode);
80 
81  std::unique_ptr<Internal::RSqliteDSDataSet> fDataSet;
82  unsigned int fNSlots;
84  std::vector<std::string> fColumnNames;
85  std::vector<ETypes> fColumnTypes;
86  /// The data source is inherently single-threaded and returns only one row at a time. This vector holds the results.
87  std::vector<Value_t> fValues;
88 
89  // clang-format off
90  /// Corresponds to the types defined in ETypes.
91  static constexpr char const *fgTypeNames[] = {
92  "Long64_t",
93  "double",
94  "std::string",
95  "std::vector<unsigned char>",
96  "void *"
97  };
98  // clang-format on
99 
100 public:
101  RSqliteDS(const std::string &fileName, const std::string &query);
102  ~RSqliteDS();
103  void SetNSlots(unsigned int nSlots) final;
104  const std::vector<std::string> &GetColumnNames() const final;
105  bool HasColumn(std::string_view colName) const final;
106  std::string GetTypeName(std::string_view colName) const final;
107  std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final;
108  bool SetEntry(unsigned int slot, ULong64_t entry) final;
109  void Initialise() final;
110  std::string GetLabel() final;
111 
112 protected:
113  Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final;
114 };
115 
116 RDataFrame MakeSqliteDataFrame(std::string_view fileName, std::string_view query);
117 
118 } // namespace RDF
119 
120 } // namespace ROOT
121 
122 #endif
ROOT::RDF::RSqliteDS::Value_t::fPtr
void * fPtr
Points to one of the values; an address to this pointer is returned by GetColumnReadersImpl.
Definition: RSqliteDS.hxx:88
ROOT::RDF::RSqliteDS::ETypes::kInteger
@ kInteger
ROOT::RDF::RSqliteDS::fDataSet
std::unique_ptr< Internal::RSqliteDSDataSet > fDataSet
Definition: RSqliteDS.hxx:93
ROOT::RDF::RSqliteDS::SetEntry
bool SetEntry(unsigned int slot, ULong64_t entry) final
Stores the result of the current active sqlite query row as a C++ value.
Definition: RSqliteDS.cxx:553
ROOT::RDF::RSqliteDS::ETypes::kNull
@ kNull
ROOT::RDF::RSqliteDS::ETypes::kText
@ kText
basic_string_view
Definition: libcpp_string_view.h:199
ROOT::RDF::RSqliteDS::fNSlots
unsigned int fNSlots
Definition: RSqliteDS.hxx:94
Long64_t
long long Long64_t
Definition: RtypesCore.h:73
ROOT::RDF::RSqliteDS::Value_t::fReal
double fReal
Definition: RSqliteDS.hxx:84
ROOT::RDF::RSqliteDS::GetColumnReadersImpl
Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final
Activates the given column's result value.
Definition: RSqliteDS.cxx:465
ROOT::RDF::MakeSqliteDataFrame
RDataFrame MakeSqliteDataFrame(std::string_view fileName, std::string_view query)
Factory method to create a SQlite RDataFrame.
Definition: RSqliteDS.cxx:545
ROOT::RDF::RSqliteDS::Value_t::fInteger
Long64_t fInteger
Definition: RSqliteDS.hxx:83
ROOT::RDF::RSqliteDS::ETypes::kReal
@ kReal
ROOT::RDataFrame
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
Definition: RDataFrame.hxx:42
ROOT::RDF::RDataSource
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
Definition: RDataSource.hxx:105
RDataFrame.hxx
RDataSource.hxx
ROOT::RDF::RSqliteDS::ETypes::kBlob
@ kBlob
ROOT::RDF::RSqliteDS::fColumnTypes
std::vector< ETypes > fColumnTypes
Definition: RSqliteDS.hxx:97
ROOT::RDF::RSqliteDS::RSqliteDS
RSqliteDS(const std::string &fileName, const std::string &query)
Build the dataframe.
Definition: RSqliteDS.cxx:368
ROOT::RDF::RSqliteDS::Value_t::fType
ETypes fType
Definition: RSqliteDS.hxx:81
RStringView.hxx
ROOT::RDF::RSqliteDS::Value_t
Used to hold a single "cell" of the SELECT query's result table. Can be changed to std::variant once ...
Definition: RSqliteDS.hxx:78
ROOT::RDF::RSqliteDS::ETypes
ETypes
All the types known to SQlite. Changes require changing fgTypeNames, too.
Definition: RSqliteDS.hxx:68
ROOT::RDF::RSqliteDS::GetEntryRanges
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Returns a range of size 1 as long as more rows are available in the SQL result set.
Definition: RSqliteDS.cxx:488
ROOT::RDF::RSqliteDS::SetNSlots
void SetNSlots(unsigned int nSlots) final
Almost a no-op, many slots can in fact reduce the performance due to thread synchronization.
Definition: RSqliteDS.cxx:589
ROOT::RDF::RSqliteDS::Initialise
void Initialise() final
Resets the SQlite query engine at the beginning of the event loop.
Definition: RSqliteDS.cxx:528
ROOT::RDF::RSqliteDS::Value_t::fNull
void * fNull
Definition: RSqliteDS.hxx:87
ROOT::RDF::RSqliteDS::fgTypeNames
static constexpr const char * fgTypeNames[]
Corresponds to the types defined in ETypes.
Definition: RSqliteDS.hxx:103
ROOT::RDF::RSqliteDS::GetLabel
std::string GetLabel() final
Return a string representation of the datasource type.
Definition: RSqliteDS.cxx:536
ROOT::RDF::RSqliteDS::HasColumn
bool HasColumn(std::string_view colName) const final
A linear search through the columns for the given name.
Definition: RSqliteDS.cxx:521
ULong64_t
unsigned long long ULong64_t
Definition: RtypesCore.h:74
ROOT::RDF::RDataSource::Record_t
std::vector< void * > Record_t
Definition: RDataSource.hxx:108
ROOT::RDF::RSqliteDS::fNRow
ULong64_t fNRow
Definition: RSqliteDS.hxx:95
ROOT::RDF::RSqliteDS::GetTypeName
std::string GetTypeName(std::string_view colName) const final
Returns the C++ type for a given column name, implemented as a linear search through all the columns.
Definition: RSqliteDS.cxx:507
ROOT::RDF::RSqliteDS::~RSqliteDS
~RSqliteDS()
Frees the sqlite resources and closes the file.
Definition: RSqliteDS.cxx:445
ROOT::RDF::RSqliteDS
RSqliteDS is an RDF data source implementation for SQL result sets from sqlite3 files.
Definition: RSqliteDS.hxx:64
ROOT::RDF::RSqliteDS::Value_t::fIsActive
bool fIsActive
Not all columns of the query are necessarily used by the RDF. Allows for skipping them.
Definition: RSqliteDS.hxx:82
name
char name[80]
Definition: TGX11.cxx:110
ROOT::RDF::RSqliteDS::GetColumnNames
const std::vector< std::string > & GetColumnNames() const final
Returns the SELECT queries names.
Definition: RSqliteDS.cxx:458
ROOT::RDF::RSqliteDS::SqliteError
void SqliteError(int errcode)
Helper function to throw an exception if there is a fatal sqlite error, e.g. an I/O error.
Definition: RSqliteDS.cxx:600
type
int type
Definition: TGX11.cxx:121
ROOT::RDF::RSqliteDS::fValues
std::vector< Value_t > fValues
The data source is inherently single-threaded and returns only one row at a time. This vector holds t...
Definition: RSqliteDS.hxx:99
ROOT::RDF::RSqliteDS::fColumnNames
std::vector< std::string > fColumnNames
Definition: RSqliteDS.hxx:96
ROOT::RDF::RSqliteDS::Value_t::Value_t
Value_t(ETypes type)
Definition: RSqliteDS.cxx:347
ROOT
VSD Structures.
Definition: StringConv.hxx:21
ROOT::RDF::RSqliteDS::Value_t::fBlob
std::vector< unsigned char > fBlob
Definition: RSqliteDS.hxx:86
ROOT::RDF::RSqliteDS::Value_t::fText
std::string fText
Definition: RSqliteDS.hxx:85