25#include <unordered_set> 
   42   const auto tree = fLoopManager->GetTree();
 
   44      const auto treeName = 
tree->GetName();
 
   45      const auto isTChain = 
dynamic_cast<TChain *
>(
tree) ? 
true : 
false;
 
   46      const auto treeType = isTChain ? 
"TChain" : 
"TTree";
 
   47      const auto isInMemory = !isTChain && !
tree->GetCurrentFile() ? true : 
false;
 
   49      const auto hasFriends = friendInfo.fFriendNames.empty() ? false : 
true;
 
   51      ss << 
"Dataframe from " << treeType << 
" " << treeName;
 
   56         const auto numFiles = files.size();
 
   58            ss << 
" in file " << files[0];
 
   61            for (
auto i = 0u; i < numFiles; i++) {
 
   62               ss << 
"  " << files[i];
 
   69         const auto numFriends = friendInfo.fFriendNames.size();
 
   70         if (numFriends == 1) {
 
   71            ss << 
"\nwith friend\n";
 
   73            ss << 
"\nwith friends\n";
 
   75         for (
auto i = 0u; i < numFriends; i++) {
 
   76            const auto nameAlias = friendInfo.fFriendNames[i];
 
   77            const auto files = friendInfo.fFriendFileNames[i];
 
   78            const auto numFiles = files.size();
 
   79            const auto subnames = friendInfo.fFriendChainSubNames[i];
 
   80            ss << 
"  " << nameAlias.first;
 
   81            if (nameAlias.first != nameAlias.second)
 
   82               ss << 
" (" << nameAlias.second << 
")";
 
   85               ss << 
" " << files[0];
 
   90               for (
auto j = 0u; j < numFiles; j++) {
 
   91                  ss << 
"    " << subnames[j] << 
" " << files[j];
 
   96            if (i < numFriends - 1)
 
  103   else if (fDataSource) {
 
  104      const auto datasourceLabel = fDataSource->GetLabel();
 
  105      return "Dataframe from datasource " + datasourceLabel;
 
  109      const auto n = fLoopManager->GetNEmptyEntries();
 
  111         return "Empty dataframe filling 1 row";
 
  113         return "Empty dataframe filling " + std::to_string(
n) + 
" rows";
 
  119   : fLoopManager(lm.get()), fDataSource(lm->GetDataSource()), fColRegister(std::move(lm))
 
  125   : fLoopManager(&lm), fDataSource(lm.GetDataSource()), fColRegister(colRegister)
 
  145   std::unordered_set<std::string> allColumns;
 
  147   auto addIfNotInternal = [&allColumns](std::string_view colName) {
 
  149         allColumns.emplace(colName);
 
  152   auto definedColumns = fColRegister.GetNames();
 
  154   std::for_each(definedColumns.begin(), definedColumns.end(), addIfNotInternal);
 
  156   auto tree = fLoopManager->GetTree();
 
  159         allColumns.emplace(bName);
 
  163      for (
const auto &s : fDataSource->GetColumnNames()) {
 
  164         if (s.rfind(
"R_rdf_sizeof", 0) != 0)
 
  165            allColumns.emplace(s);
 
  170   std::sort(ret.begin(), ret.end());
 
  189   const auto col = fColRegister.ResolveAlias(std::string(column));
 
  193   const bool convertVector2RVec = 
true;
 
  231   const auto columnNames = GetColumnNames();
 
  232   std::set<std::string> definedColumnNamesSet;
 
  233   for (
const auto &
name : GetDefinedColumnNames())
 
  234      definedColumnNamesSet.insert(
name);
 
  237   const std::vector<std::string> metadataProperties = {
"Columns in total", 
"Columns from defines", 
"Event loops run",
 
  239   const std::vector<std::string> metadataValues = {std::to_string(columnNames.size()),
 
  240                                                    std::to_string(definedColumnNamesSet.size()),
 
  241                                                    std::to_string(GetNRuns()), std::to_string(
GetNSlots())};
 
  247   const auto columnWidthValues =
 
  248      std::max(std::max_element(metadataValues.begin(), metadataValues.end())->size(), 
static_cast<std::size_t
>(5u));
 
  249   std::stringstream ss;
 
  250   ss << std::left << std::setw(columnWidthProperties) << 
"Property" << std::setw(columnWidthValues) << 
"Value\n" 
  251      << std::setw(columnWidthProperties) << 
"--------" << std::setw(columnWidthValues) << 
"-----\n";
 
  255   for (
auto i = 0u; i < metadataProperties.size(); i++) {
 
  256      ss << std::left << std::setw(columnWidthProperties) << metadataProperties[i] << std::right
 
  257         << std::setw(columnWidthValues) << metadataValues[i] << 
'\n';
 
  263   const auto columnTypes = GetColumnTypeNamesList(columnNames);
 
  265   ss << std::left << std::setw(columnWidthNames) << 
"Column" << std::setw(columnWidthTypes) << 
"Type" 
  267      << std::setw(columnWidthNames) << 
"------" << std::setw(columnWidthTypes) << 
"----" 
  271   const auto nCols = columnNames.size();
 
  272   for (
auto i = 0u; i < nCols; i++) {
 
  273      auto origin = 
"Dataset";
 
  274      if (definedColumnNamesSet.find(columnNames[i]) != definedColumnNamesSet.end())
 
  276      ss << std::left << std::setw(columnWidthNames) << columnNames[i] << std::setw(columnWidthTypes) << columnTypes[i]
 
  303   const auto columns = fColRegister.BuildDefineNames();
 
  304   for (
const auto &column : columns) {
 
  306         definedColumns.emplace_back(column);
 
  309   return definedColumns;
 
  326   return fColRegister.BuildVariationsDescription();
 
  345   if (fColRegister.IsDefineOrAlias(columnName))
 
  348   if (fLoopManager->GetTree()) {
 
  349      const auto &branchNames = fLoopManager->GetBranchNames();
 
  350      const auto branchNamesEnd = branchNames.end();
 
  351      if (branchNamesEnd != std::find(branchNames.begin(), branchNamesEnd, columnName))
 
  355   if (fDataSource && fDataSource->HasColumn(columnName))
 
  375   return fLoopManager->GetNSlots();
 
  394   return fLoopManager->GetNRuns();
 
  399   std::vector<std::string> types;
 
  401   for (
auto column : columnList) {
 
  402      types.push_back(GetColumnType(column));
 
  410      std::string error(callerName);
 
  411      error += 
" was called with ImplicitMT enabled, but multi-thread is not supported.";
 
  412      throw std::runtime_error(error);
 
  419   const std::string entryColName = 
"rdfentry_";
 
  420   const std::string entryColType = 
"ULong64_t";
 
  421   auto entryColGen = [](
unsigned int, 
ULong64_t entry) { 
return entry; };
 
  422   using NewColEntry_t = 
RDFDetail::RDefine<
decltype(entryColGen), RDFDetail::ExtraArgsForDefine::SlotAndEntry>;
 
  424   auto entryColumn = std::make_shared<NewColEntry_t>(entryColName, entryColType, std::move(entryColGen),
 
  426   fColRegister.AddDefine(std::move(entryColumn));
 
  429   const std::string slotColName = 
"rdfslot_";
 
  430   const std::string slotColType = 
"unsigned int";
 
  431   auto slotColGen = [](
unsigned int slot) { 
return slot; };
 
  432   using NewColSlot_t = 
RDFDetail::RDefine<
decltype(slotColGen), RDFDetail::ExtraArgsForDefine::Slot>;
 
  434   auto slotColumn = std::make_shared<NewColSlot_t>(slotColName, slotColType, std::move(slotColGen), 
ColumnNames_t{},
 
  435                                                    fColRegister, *fLoopManager);
 
  436   fColRegister.AddDefine(std::move(slotColumn));
 
  438   fColRegister.AddAlias(
"tdfentry_", entryColName);
 
  439   fColRegister.AddAlias(
"tdfslot_", slotColName);
 
unsigned long long ULong64_t
The head node of a RDF computation graph.
A binder for user-defined columns, variations and aliases.
A DFDescription contains useful information about a given RDataFrame computation graph.
RVariationsDescription GetVariations() const
Return a descriptor for the systematic variations registered in this branch of the computation graph.
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
RDFDescription Describe()
Return information about the dataframe.
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
RDFDetail::RLoopManager * fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
unsigned int GetNRuns() const
Gets the number of event loops run.
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterfaceBase(std::shared_ptr< RDFDetail::RLoopManager > lm)
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
std::string DescribeDataset() const
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
A descriptor for the systematic variations known to a given RDataFrame node.
A chain is a collection of files containing TTree objects.
std::vector< std::string > GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2rvec=true)
Return a string containing the type of the given branch.
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
ROOT::TreeUtils::RFriendInfo GetFriendInfo(const TTree &tree, bool retrieveEntries=false)
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
std::vector< std::string > ColumnNames_t
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.