Logo ROOT   6.21/01
Reference Guide
RArrowDS.hxx
Go to the documentation of this file.
1 #ifndef ROOT_RARROWTDS
2 #define ROOT_RARROWTDS
3 
4 #include "ROOT/RDataFrame.hxx"
5 #include "ROOT/RDataSource.hxx"
6 
7 #include <memory>
8 
9 namespace arrow {
10 class Table;
11 }
12 
13 namespace ROOT {
14 namespace Internal {
15 namespace RDF {
16 class TValueGetter;
17 } // namespace RDF
18 } // namespace Internal
19 
20 namespace RDF {
21 
22 class RArrowDS final : public RDataSource {
23 private:
24  std::shared_ptr<arrow::Table> fTable;
25  std::vector<std::pair<ULong64_t, ULong64_t>> fEntryRanges;
26  std::vector<std::string> fColumnNames;
27  size_t fNSlots = 0U;
28 
29  std::vector<std::pair<size_t, size_t>> fGetterIndex; // (columnId, visitorId)
30  std::vector<std::unique_ptr<ROOT::Internal::RDF::TValueGetter>> fValueGetters; // Visitors to be used to track and get entries. One per column.
31  std::vector<void *> GetColumnReadersImpl(std::string_view name, const std::type_info &type) override;
32 
33 public:
34  RArrowDS(std::shared_ptr<arrow::Table> table, std::vector<std::string> const &columns);
35  ~RArrowDS();
36  const std::vector<std::string> &GetColumnNames() const override;
37  std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() override;
38  std::string GetTypeName(std::string_view colName) const override;
39  bool HasColumn(std::string_view colName) const override;
40  bool SetEntry(unsigned int slot, ULong64_t entry) override;
41  void InitSlot(unsigned int slot, ULong64_t firstEntry) override;
42  void SetNSlots(unsigned int nSlots) override;
43  void Initialise() override;
44  std::string GetLabel() override;
45 };
46 
47 ////////////////////////////////////////////////////////////////////////////////////////////////
48 /// \brief Factory method to create a Apache Arrow RDataFrame.
49 /// \param[in] table an apache::arrow table to use as a source.
50 RDataFrame MakeArrowDataFrame(std::shared_ptr<arrow::Table> table, std::vector<std::string> const &columns);
51 
52 } // namespace RDF
53 
54 } // namespace ROOT
55 
56 #endif
Returns the available number of logical cores.
Definition: StringConv.hxx:21
bool HasColumn(std::string_view colName) const override
Checks if the dataset has a certain column.
Definition: RArrowDS.cxx:489
RDataFrame data source class to interface with Apache Arrow.
Definition: RArrowDS.hxx:22
RDataFrame MakeArrowDataFrame(std::shared_ptr< arrow::Table > table, std::vector< std::string > const &columns)
Factory method to create a Apache Arrow RDataFrame.
Definition: RArrowDS.cxx:587
RArrowDS(std::shared_ptr< arrow::Table > table, std::vector< std::string > const &columns)
Constructor to create an Arrow RDataSource for RDataFrame.
Definition: RArrowDS.cxx:387
std::string GetTypeName(std::string_view colName) const override
Type of a column as a string, e.g.
Definition: RArrowDS.cxx:471
bool SetEntry(unsigned int slot, ULong64_t entry) override
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot...
Definition: RArrowDS.cxx:498
std::vector< void * > GetColumnReadersImpl(std::string_view name, const std::type_info &type) override
This needs to return a pointer to the pointer each value getter will point to.
Definition: RArrowDS.cxx:553
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
Definition: RArrowDS.hxx:25
const std::vector< std::string > & GetColumnNames() const override
Returns a reference to the collection of the dataset&#39;s column names.
Definition: RArrowDS.cxx:460
std::vector< std::unique_ptr< ROOT::Internal::RDF::TValueGetter > > fValueGetters
Definition: RArrowDS.hxx:30
void InitSlot(unsigned int slot, ULong64_t firstEntry) override
Convenience method called at the start of the data processing associated to a slot.
Definition: RArrowDS.cxx:507
void SetNSlots(unsigned int nSlots) override
Inform RDataSource of the number of processing slots (i.e.
Definition: RArrowDS.cxx:537
ROOT&#39;s RDataFrame offers a high level interface for analyses of data stored in TTrees, CSV&#39;s and other data formats.
Definition: RDataFrame.hxx:42
~RArrowDS()
Destructor.
Definition: RArrowDS.cxx:456
int type
Definition: TGX11.cxx:120
unsigned long long ULong64_t
Definition: RtypesCore.h:70
std::string GetLabel() override
Return a string representation of the datasource type.
Definition: RArrowDS.cxx:578
std::shared_ptr< arrow::Table > fTable
Definition: RArrowDS.hxx:24
void Initialise() override
Convenience method called before starting an event-loop.
Definition: RArrowDS.cxx:572
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
char name[80]
Definition: TGX11.cxx:109
std::vector< std::string > fColumnNames
Definition: RArrowDS.hxx:26
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() override
Return ranges of entries to distribute to tasks.
Definition: RArrowDS.cxx:465
std::vector< std::pair< size_t, size_t > > fGetterIndex
Definition: RArrowDS.hxx:29