40#pragma GCC diagnostic push
41#pragma GCC diagnostic ignored "-Wshadow"
42#pragma GCC diagnostic ignored "-Wunused-parameter"
44#include <arrow/table.h>
47#pragma GCC diagnostic pop
62#define ROOT_ARROW_STL_CONVERSION(c_type, ArrowType_) \
64 struct RootConversionTraits<c_type> { \
65 using ArrowType = ::arrow::ArrowType_; \
86 bool fCachedBool{
false};
102 using ArrayType =
typename arrow::TypeTraits<ArrowType>::ArrayType;
103 auto values =
reinterpret_cast<ArrayType *
>(array.values().get());
107 RVec<T> tmp(
reinterpret_cast<T *
>((
void *)values->raw_values()) +
offset, array.value_length(
entry));
108 std::swap(cache, tmp);
109 return (
void *)(&cache);
118 arrow::Status
Visit(arrow::Int32Array
const &array)
final
120 *fResult = (
void *)(array.raw_values() + fCurrentEntry);
121 return arrow::Status::OK();
124 arrow::Status
Visit(arrow::Int64Array
const &array)
final
126 *fResult = (
void *)(array.raw_values() + fCurrentEntry);
127 return arrow::Status::OK();
131 arrow::Status
Visit(arrow::UInt32Array
const &array)
final
133 *fResult = (
void *)(array.raw_values() + fCurrentEntry);
134 return arrow::Status::OK();
137 arrow::Status
Visit(arrow::UInt64Array
const &array)
final
139 *fResult = (
void *)(array.raw_values() + fCurrentEntry);
140 return arrow::Status::OK();
143 arrow::Status
Visit(arrow::FloatArray
const &array)
final
145 *fResult = (
void *)(array.raw_values() + fCurrentEntry);
146 return arrow::Status::OK();
149 arrow::Status
Visit(arrow::DoubleArray
const &array)
final
151 *fResult = (
void *)(array.raw_values() + fCurrentEntry);
152 return arrow::Status::OK();
155 arrow::Status
Visit(arrow::BooleanArray
const &array)
final
157 fCachedBool = array.Value(fCurrentEntry);
158 *fResult =
reinterpret_cast<void *
>(&fCachedBool);
159 return arrow::Status::OK();
162 arrow::Status
Visit(arrow::StringArray
const &array)
final
164 fCachedString = array.GetString(fCurrentEntry);
165 *fResult =
reinterpret_cast<void *
>(&fCachedString);
166 return arrow::Status::OK();
169 arrow::Status
Visit(arrow::ListArray
const &array)
final
171 switch (array.value_type()->id()) {
172 case arrow::Type::FLOAT: {
173 *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecFloat);
174 return arrow::Status::OK();
176 case arrow::Type::DOUBLE: {
177 *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecDouble);
178 return arrow::Status::OK();
180 case arrow::Type::UINT32: {
181 *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecUInt);
182 return arrow::Status::OK();
184 case arrow::Type::UINT64: {
185 *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecULong64);
186 return arrow::Status::OK();
188 case arrow::Type::INT32: {
189 *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecInt);
190 return arrow::Status::OK();
192 case arrow::Type::INT64: {
193 *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecLong64);
194 return arrow::Status::OK();
196 default:
return arrow::Status::TypeError(
"Type not supported");
200 using ::arrow::ArrayVisitor::Visit;
225 next +=
chunk->length();
236 std::vector<void *>
result;
272 std::string
msg =
"Could not get pointer for slot ";
273 msg += std::to_string(
slot) +
" looking at entry " + std::to_string(
entry);
274 throw std::runtime_error(
msg);
302 arrow::Status
Visit(
const arrow::Int64Type &)
override
305 return arrow::Status::OK();
307 arrow::Status
Visit(
const arrow::Int32Type &)
override
310 return arrow::Status::OK();
312 arrow::Status
Visit(
const arrow::UInt64Type &)
override
315 return arrow::Status::OK();
317 arrow::Status
Visit(
const arrow::UInt32Type &)
override
320 return arrow::Status::OK();
322 arrow::Status
Visit(
const arrow::FloatType &)
override
325 return arrow::Status::OK();
327 arrow::Status
Visit(
const arrow::DoubleType &)
override
330 return arrow::Status::OK();
332 arrow::Status
Visit(
const arrow::StringType &)
override
335 return arrow::Status::OK();
337 arrow::Status
Visit(
const arrow::BooleanType &)
override
340 return arrow::Status::OK();
342 arrow::Status
Visit(
const arrow::ListType &
l)
override
348 fTypeName.push_back(
"ROOT::VecOps::RVec<%s>");
349 return l.value_type()->Accept(
this);
354 std::string
result =
"%s";
356 for (
size_t i = 0; i <
fTypeName.size(); ++i) {
363 using ::arrow::TypeVisitor::Visit;
370 arrow::Status
Visit(
const arrow::Int64Type &)
override {
return arrow::Status::OK(); }
371 arrow::Status
Visit(
const arrow::UInt64Type &)
override {
return arrow::Status::OK(); }
372 arrow::Status
Visit(
const arrow::Int32Type &)
override {
return arrow::Status::OK(); }
373 arrow::Status
Visit(
const arrow::UInt32Type &)
override {
return arrow::Status::OK(); }
374 arrow::Status
Visit(
const arrow::FloatType &)
override {
return arrow::Status::OK(); }
375 arrow::Status
Visit(
const arrow::DoubleType &)
override {
return arrow::Status::OK(); }
376 arrow::Status
Visit(
const arrow::StringType &)
override {
return arrow::Status::OK(); }
377 arrow::Status
Visit(
const arrow::BooleanType &)
override {
return arrow::Status::OK(); }
378 arrow::Status
Visit(
const arrow::ListType &)
override {
return arrow::Status::OK(); }
380 using ::arrow::TypeVisitor::Visit;
388RArrowDS::RArrowDS(std::shared_ptr<arrow::Table>
inTable, std::vector<std::string>
const &
inColumns)
398 for (
auto &
field : table->schema()->fields()) {
405 using ColumnType =
decltype(
fTable->column(0));
409 throw std::runtime_error(
"At least one column required");
413 return table->column(
columnIdx)->length();
419 std::string
msg =
"Column ";
420 msg += table->schema()->field(
columnIdx)->name() +
" has a different number of entries.";
421 throw std::runtime_error(
msg);
427 auto verifyType = std::make_unique<VerifyValidColumnType>();
429 if (
result.ok() ==
false) {
430 std::string
msg =
"Column ";
431 msg += table->schema()->field(
columnIdx)->name() +
" contains an unsupported type.";
432 throw std::runtime_error(
msg);
464const std::vector<std::string> &RArrowDS::GetColumnNames()
const
469std::vector<std::pair<ULong64_t, ULong64_t>> RArrowDS::GetEntryRanges()
475std::string RArrowDS::GetTypeName(std::string_view
colName)
const
479 std::string
msg =
"The dataset does not have column ";
481 throw std::runtime_error(
msg);
485 if (status.ok() ==
false) {
486 std::string
msg =
"RArrowDS does not support a column of type ";
488 throw std::runtime_error(
msg);
493bool RArrowDS::HasColumn(std::string_view
colName)
const
529 ranges.emplace_back(start, end);
532 ranges.back().second += remainder;
538 return table->column(
index)->length();
548std::shared_ptr<arrow::ChunkedArray>
554void RArrowDS::SetNSlots(
unsigned int nSlots)
556 assert(0U ==
fNSlots &&
"Setting the number of slots even if the number of slots is different from zero.");
570std::vector<void *> RArrowDS::GetColumnReadersImpl(std::string_view
colName,
const std::type_info &)
575 if (
entry.first == column) {
579 throw std::runtime_error(
"No column found at index " + std::to_string(column));
589void RArrowDS::Initialize()
595std::string RArrowDS::GetLabel()
#define ROOT_ARROW_STL_CONVERSION(c_type, ArrowType_)
unsigned long long ULong64_t
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
ULong64_t fCurrentEntry
The entry in the array which should be looked up.
std::string fCachedString
void ** fResult
The pointer to update.
arrow::Status Visit(arrow::DoubleArray const &array) final
arrow::Status Visit(arrow::UInt32Array const &array) final
Check if we are asking the same entry as before.
arrow::Status Visit(arrow::Int64Array const &array) final
arrow::Status Visit(arrow::BooleanArray const &array) final
arrow::Status Visit(arrow::Int32Array const &array) final
Check if we are asking the same entry as before.
arrow::Status Visit(arrow::UInt64Array const &array) final
RVec< UInt_t > fCachedRVecUInt
RVec< ULong64_t > fCachedRVecULong64
arrow::Status Visit(arrow::StringArray const &array) final
RVec< Int_t > fCachedRVecInt
void SetEntry(ULong64_t entry)
arrow::Status Visit(arrow::ListArray const &array) final
RVec< Long64_t > fCachedRVecLong64
RVec< float > fCachedRVecFloat
arrow::Status Visit(arrow::FloatArray const &array) final
void * getTypeErasedPtrFrom(arrow::ListArray const &array, int32_t entry, RVec< T > &cache)
ArrayPtrVisitor(void **result)
RVec< double > fCachedRVecDouble
Helper class which keeps track for each slot where to get the entry.
std::vector< ULong64_t > fLastChunkPerSlot
std::vector< ArrayPtrVisitor > fArrayVisitorPerSlot
std::vector< ULong64_t > fFirstEntryPerChunk
std::vector< ULong64_t > fLastEntryPerSlot
std::vector< void * > SlotPtrs()
This returns the ptr to the ptr to actual data.
TValueGetter(size_t slots, arrow::ArrayVector chunks)
arrow::ArrayVector fChunks
void SetEntry(unsigned int slot, ULong64_t entry)
Set the current entry to be retrieved.
std::vector< ULong64_t > fChunkIndex
Since data can be chunked in different arrays we need to construct an index which contains the first ...
void UncachedSlotLookup(unsigned int slot, ULong64_t entry)
std::vector< void * > fValuesPtrPerSlot
std::shared_ptr< arrow::Table > fTable
std::vector< std::pair< size_t, size_t > > fGetterIndex
std::vector< std::unique_ptr< ROOT::Internal::RDF::TValueGetter > > fValueGetters
std::vector< std::string > fColumnNames
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
Helper to get the contents of a given column.
arrow::Status Visit(const arrow::DoubleType &) override
arrow::Status Visit(const arrow::BooleanType &) override
arrow::Status Visit(const arrow::UInt32Type &) override
arrow::Status Visit(const arrow::ListType &l) override
arrow::Status Visit(const arrow::FloatType &) override
arrow::Status Visit(const arrow::UInt64Type &) override
arrow::Status Visit(const arrow::StringType &) override
arrow::Status Visit(const arrow::Int64Type &) override
std::vector< std::string > fTypeName
arrow::Status Visit(const arrow::Int32Type &) override
Helper to determine if a given Column is a supported type.
arrow::Status Visit(const arrow::UInt64Type &) override
arrow::Status Visit(const arrow::FloatType &) override
arrow::Status Visit(const arrow::Int64Type &) override
arrow::Status Visit(const arrow::Int32Type &) override
arrow::Status Visit(const arrow::StringType &) override
arrow::Status Visit(const arrow::UInt32Type &) override
arrow::Status Visit(const arrow::DoubleType &) override
arrow::Status Visit(const arrow::BooleanType &) override
arrow::Status Visit(const arrow::ListType &) override
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
void splitInEqualRanges(std::vector< std::pair< ULong64_t, ULong64_t > > &ranges, int nRecords, unsigned int nSlots)
int getNRecords(std::shared_ptr< arrow::Table > &table, std::vector< std::string > &columnNames)
std::shared_ptr< arrow::ChunkedArray > getData(T p)
RDataFrame FromArrow(std::shared_ptr< arrow::Table > table, std::vector< std::string > const &columnNames)
Factory method to create a Apache Arrow RDataFrame.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
TSeq< unsigned int > TSeqU