Logo ROOT  
Reference Guide
RArrowDS.hxx
Go to the documentation of this file.
1 /*************************************************************************
2  * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
3  * All rights reserved. *
4  * *
5  * For the licensing terms see $ROOTSYS/LICENSE. *
6  * For the list of contributors see $ROOTSYS/README/CREDITS. *
7  *************************************************************************/
8 
9 #ifndef ROOT_RARROWTDS
10 #define ROOT_RARROWTDS
11 
12 #include "ROOT/RDataFrame.hxx"
13 #include "ROOT/RDataSource.hxx"
14 
15 #include <memory>
16 
17 namespace arrow {
18 class Table;
19 }
20 
21 namespace ROOT {
22 namespace Internal {
23 namespace RDF {
24 class TValueGetter;
25 } // namespace RDF
26 } // namespace Internal
27 
28 namespace RDF {
29 
30 class RArrowDS final : public RDataSource {
31 private:
32  std::shared_ptr<arrow::Table> fTable;
33  std::vector<std::pair<ULong64_t, ULong64_t>> fEntryRanges;
34  std::vector<std::string> fColumnNames;
35  size_t fNSlots = 0U;
36 
37  std::vector<std::pair<size_t, size_t>> fGetterIndex; // (columnId, visitorId)
38  std::vector<std::unique_ptr<ROOT::Internal::RDF::TValueGetter>> fValueGetters; // Visitors to be used to track and get entries. One per column.
39  std::vector<void *> GetColumnReadersImpl(std::string_view name, const std::type_info &type) override;
40 
41 public:
42  RArrowDS(std::shared_ptr<arrow::Table> table, std::vector<std::string> const &columns);
43  ~RArrowDS();
44  const std::vector<std::string> &GetColumnNames() const override;
45  std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() override;
46  std::string GetTypeName(std::string_view colName) const override;
47  bool HasColumn(std::string_view colName) const override;
48  bool SetEntry(unsigned int slot, ULong64_t entry) override;
49  void InitSlot(unsigned int slot, ULong64_t firstEntry) override;
50  void SetNSlots(unsigned int nSlots) override;
51  void Initialise() override;
52  std::string GetLabel() override;
53 };
54 
55 ////////////////////////////////////////////////////////////////////////////////////////////////
56 /// \brief Factory method to create a Apache Arrow RDataFrame.
57 /// \param[in] table an apache::arrow table to use as a source.
58 RDataFrame MakeArrowDataFrame(std::shared_ptr<arrow::Table> table, std::vector<std::string> const &columns);
59 
60 } // namespace RDF
61 
62 } // namespace ROOT
63 
64 #endif
ROOT::RDF::RArrowDS::fColumnNames
std::vector< std::string > fColumnNames
Definition: RArrowDS.hxx:34
ROOT::RDF::RArrowDS::HasColumn
bool HasColumn(std::string_view colName) const override
Checks if the dataset has a certain column.
Definition: RArrowDS.cxx:493
ROOT::Internal::RDF::TValueGetter
Helper class which keeps track for each slot where to get the entry.
Definition: RArrowDS.cxx:204
ROOT::RDF::RArrowDS
RDataFrame data source class to interface with Apache Arrow.
Definition: RArrowDS.hxx:30
string_view
basic_string_view< char > string_view
Definition: libcpp_string_view.h:786
ROOT::RDF::RArrowDS::GetColumnReadersImpl
std::vector< void * > GetColumnReadersImpl(std::string_view name, const std::type_info &type) override
This needs to return a pointer to the pointer each value getter will point to.
Definition: RArrowDS.cxx:570
ROOT::RDataFrame
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTree,...
Definition: RDataFrame.hxx:42
ROOT::RDF::RDataSource
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
Definition: RDataSource.hxx:106
ROOT::RDF::RArrowDS::GetTypeName
std::string GetTypeName(std::string_view colName) const override
Type of a column as a string, e.g.
Definition: RArrowDS.cxx:475
RDataFrame.hxx
ROOT::RDF::RArrowDS::RArrowDS
RArrowDS(std::shared_ptr< arrow::Table > table, std::vector< std::string > const &columns)
Constructor to create an Arrow RDataSource for RDataFrame.
Definition: RArrowDS.cxx:388
RDataSource.hxx
ROOT::RDF::MakeArrowDataFrame
RDataFrame MakeArrowDataFrame(std::shared_ptr< arrow::Table > table, std::vector< std::string > const &columns)
Factory method to create a Apache Arrow RDataFrame.
Definition: RArrowDS.cxx:604
ROOT::RDF::RArrowDS::SetEntry
bool SetEntry(unsigned int slot, ULong64_t entry) override
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
Definition: RArrowDS.cxx:502
ROOT::RDF::RArrowDS::InitSlot
void InitSlot(unsigned int slot, ULong64_t firstEntry) override
Convenience method called at the start of the data processing associated to a slot.
Definition: RArrowDS.cxx:511
ROOT::RDF::RArrowDS::fEntryRanges
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
Definition: RArrowDS.hxx:33
ROOT::RDF::RArrowDS::fValueGetters
std::vector< std::unique_ptr< ROOT::Internal::RDF::TValueGetter > > fValueGetters
Definition: RArrowDS.hxx:38
ROOT::RDF::RArrowDS::fNSlots
size_t fNSlots
Definition: RArrowDS.hxx:35
ROOT::RDF::RArrowDS::GetLabel
std::string GetLabel() override
Return a string representation of the datasource type.
Definition: RArrowDS.cxx:595
ROOT::RDF::RArrowDS::Initialise
void Initialise() override
Convenience method called before starting an event-loop.
Definition: RArrowDS.cxx:589
ULong64_t
unsigned long long ULong64_t
Definition: RtypesCore.h:81
ROOT::RDF::RArrowDS::~RArrowDS
~RArrowDS()
Destructor.
Definition: RArrowDS.cxx:460
ROOT::RDF::RArrowDS::GetColumnNames
const std::vector< std::string > & GetColumnNames() const override
Returns a reference to the collection of the dataset's column names.
Definition: RArrowDS.cxx:464
arrow
Definition: RArrowDS.hxx:17
name
char name[80]
Definition: TGX11.cxx:110
ROOT::RDF::RArrowDS::GetEntryRanges
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() override
Return ranges of entries to distribute to tasks.
Definition: RArrowDS.cxx:469
ROOT::RDF::RArrowDS::SetNSlots
void SetNSlots(unsigned int nSlots) override
Inform RDataSource of the number of processing slots (i.e.
Definition: RArrowDS.cxx:554
type
int type
Definition: TGX11.cxx:121
ROOT::RDF::RArrowDS::fTable
std::shared_ptr< arrow::Table > fTable
Definition: RArrowDS.hxx:32
ROOT::RDF::RArrowDS::fGetterIndex
std::vector< std::pair< size_t, size_t > > fGetterIndex
Definition: RArrowDS.hxx:37
ROOT
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition: EExecutionPolicy.hxx:4