24#include <unordered_map> 
   30   std::uint64_t fFirstEntry = 0;
 
   31   std::uint32_t fNPages = 0;
 
   32   std::uint32_t fNEntries = 0;
 
   33   std::uint32_t fBytesOnStorage = 0;
 
   34   std::uint32_t fBytesInMemory = 0;
 
   37      return fFirstEntry == other.fFirstEntry;
 
   40   bool operator <(
const ClusterInfo &other)
 const {
 
   41      return fFirstEntry < other.fFirstEntry;
 
   49   std::uint64_t fLocalOrder = 0;
 
   50   std::uint64_t fNElements = 0;
 
   51   std::uint64_t fNPages = 0;
 
   52   std::uint64_t fBytesOnStorage = 0;
 
   53   std::uint32_t fElementSize = 0;
 
   55   std::string fFieldName;
 
   56   std::string fFieldDescription;
 
   58   bool operator <(
const ColumnInfo &other)
 const {
 
   59      if (fFieldName == other.fFieldName)
 
   60         return fLocalOrder < other.fLocalOrder;
 
   61      return fFieldName < other.fFieldName;
 
   71   return GetFieldName(fieldDesc.GetParentId(), ntupleDesc) + 
"." + fieldDesc.GetFieldName();
 
   85   std::vector<ColumnInfo> columns;
 
   86   std::vector<ClusterInfo> clusters;
 
   87   std::unordered_map<DescriptorId_t, unsigned int> cluster2Idx;
 
   90      info.fFirstEntry = cluster.second.GetFirstEntryIndex();
 
   91      info.fNEntries = cluster.second.GetNEntries();
 
   92      cluster2Idx[cluster.first] = clusters.size();
 
   93      clusters.emplace_back(info);
 
   96   std::uint64_t bytesOnStorage = 0;
 
   97   std::uint64_t bytesInMemory = 0;
 
   98   std::uint64_t nPages = 0;
 
  106      info.fPhysicalColumnId = column.second.GetPhysicalId();
 
  107      info.fLogicalColumnId = column.second.GetLogicalId();
 
  108      info.fFieldId = column.second.GetFieldId();
 
  109      info.fLocalOrder = column.second.GetIndex();
 
  110      info.fElementSize = elementSize;
 
  111      info.fType = column.second.GetModel().GetType();
 
  114         auto columnRange = cluster.second.GetColumnRange(column.second.GetPhysicalId());
 
  115         info.fNElements += columnRange.fNElements;
 
  116         if (compression == -1) {
 
  117            compression = columnRange.fCompressionSettings;
 
  119         const auto &pageRange = cluster.second.GetPageRange(column.second.GetPhysicalId());
 
  120         auto idx = cluster2Idx[cluster.first];
 
  121         for (
const auto &page : pageRange.fPageInfos) {
 
  122            bytesOnStorage += page.fLocator.fBytesOnStorage;
 
  123            bytesInMemory += page.fNElements * elementSize;
 
  124            clusters[idx].fBytesOnStorage += page.fLocator.fBytesOnStorage;
 
  125            clusters[idx].fBytesInMemory += page.fNElements * elementSize;
 
  126            ++clusters[idx].fNPages;
 
  127            info.fBytesOnStorage += page.fLocator.fBytesOnStorage;
 
  132      columns.emplace_back(info);
 
  136   output << 
"============================================================" << std::endl;
 
  138   output << 
"Compression: " << compression << std::endl;
 
  139   output << 
"------------------------------------------------------------" << std::endl;
 
  144   output << 
"  # Pages:          " << nPages << std::endl;
 
  146   output << 
"  Size on storage:  " << bytesOnStorage << 
" B" << std::endl;
 
  147   output << 
"  Compression rate: " << std::fixed << std::setprecision(2)
 
  148                                    << float(bytesInMemory) / float(bytesOnStorage) << std::endl;
 
  149   output << 
"  Header size:      " << headerSize << 
" B" << std::endl;
 
  150   output << 
"  Footer size:      " << footerSize << 
" B" << std::endl;
 
  151   output << 
"  Meta-data / data: " << std::fixed << std::setprecision(3)
 
  152                                    << float(headerSize + footerSize) / float(bytesOnStorage) << std::endl;
 
  153   output << 
"------------------------------------------------------------" << std::endl;
 
  154   output << 
"CLUSTER DETAILS" << std::endl;
 
  155   output << 
"------------------------------------------------------------" << std::endl;
 
  157   std::sort(clusters.begin(), clusters.end());
 
  158   for (
unsigned int i = 0; i < clusters.size(); ++i) {
 
  159      output << 
"  # " << std::setw(5) << i
 
  160             << 
"   Entry range:     [" << clusters[i].fFirstEntry << 
".." 
  161             << clusters[i].fFirstEntry + clusters[i].fNEntries - 1 << 
"]  --  " << clusters[i].fNEntries << std::endl;
 
  163             << 
"   # Pages:         " << clusters[i].fNPages << std::endl;
 
  165             << 
"   Size on storage: " << clusters[i].fBytesOnStorage << 
" B" << std::endl;
 
  167             << 
"   Compression:     " << std::fixed << std::setprecision(2)
 
  168             << float(clusters[i].fBytesInMemory) / float(
float(clusters[i].fBytesOnStorage)) << std::endl;
 
  171   output << 
"------------------------------------------------------------" << std::endl;
 
  172   output << 
"COLUMN DETAILS" << std::endl;
 
  173   output << 
"------------------------------------------------------------" << std::endl;
 
  174   for (
auto &col : columns) {
 
  175      col.fFieldName = GetFieldName(col.fFieldId, *
this).substr(1);
 
  176      col.fFieldDescription = GetFieldDescription(col.fFieldId, *
this);
 
  178   std::sort(columns.begin(), columns.end());
 
  179   for (
const auto &col : columns) {
 
  180      auto avgPageSize = (col.fNPages == 0) ? 0 : (col.fBytesOnStorage / col.fNPages);
 
  181      auto avgElementsPerPage = (col.fNPages == 0) ? 0 : (col.fNElements / col.fNPages);
 
  182      std::string nameAndType = std::string(
"  ") + col.fFieldName + 
" [#" + std::to_string(col.fLocalOrder) + 
"]" 
  184      std::string 
id = std::string(
"{id:") + std::to_string(col.fLogicalColumnId) + 
"}";
 
  185      if (col.fLogicalColumnId != col.fPhysicalColumnId)
 
  186         id += 
" --alias--> " + std::to_string(col.fPhysicalColumnId);
 
  187      output << nameAndType << std::setw(60 - nameAndType.length()) << 
id << std::endl;
 
  188      if (!col.fFieldDescription.empty())
 
  189         output << 
"    Description:         " << col.fFieldDescription << std::endl;
 
  190      output << 
"    # Elements:          " << col.fNElements << std::endl;
 
  191      output << 
"    # Pages:             " << col.fNPages << std::endl;
 
  192      output << 
"    Avg elements / page: " << avgElementsPerPage << std::endl;
 
  193      output << 
"    Avg page size:       " << avgPageSize << 
" B" << std::endl;
 
  194      output << 
"    Size on storage:     " << col.fBytesOnStorage << 
" B" << std::endl;
 
  195      output << 
"    Compression:         " << std::fixed << std::setprecision(2)
 
  196             << float(col.fElementSize * col.fNElements) / float(col.fBytesOnStorage) << std::endl;
 
  197      output << 
"............................................................" << std::endl;
 
Bool_t operator<(const TDatime &d1, const TDatime &d2)
Bool_t operator==(const TDatime &d1, const TDatime &d2)
static std::string GetTypeName(EColumnType type)
static std::unique_ptr< RColumnElementBase > Generate(EColumnType type)
If CppT == void, use the default C++ type for the given column type.
The available trivial, native content types of a column.
std::string GetFieldName() const
std::string GetFieldDescription() const
The on-storage meta-data of an ntuple.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::size_t GetNLogicalColumns() const
std::string GetName() const
std::uint64_t GetOnDiskHeaderSize() const
std::size_t GetNClusters() const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
std::size_t GetNFields() const
std::uint64_t GetOnDiskFooterSize() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
std::size_t GetNPhysicalColumns() const
void PrintInfo(std::ostream &output) const
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId