Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RCluster.hxx
Go to the documentation of this file.
1/// \file ROOT/RCluster.hxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2020-03-11
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT_RCluster
17#define ROOT_RCluster
18
19#include <ROOT/RNTupleUtil.hxx>
20
21#include <cstdint>
22#include <memory>
23#include <unordered_map>
24#include <unordered_set>
25#include <utility>
26#include <vector>
27
28namespace ROOT {
29namespace Internal {
30
31// clang-format off
32/**
33\class ROOT::Internal::ROnDiskPage
34\ingroup NTuple
35\brief A page as being stored on disk, that is packed and compressed
36
37Used by the cluster pool to cache pages from the physical storage. Such pages generally need to be
38uncompressed and unpacked before they can be used by RNTuple upper layers.
39*/
40// clang-format on
42private:
43 /// The memory location of the bytes
44 const void *fAddress = nullptr;
45 /// The compressed and packed size of the page. This includes both payload and checksum (if present)
46 std::uint32_t fSize = 0;
47
48public:
49 /// On-disk pages within a page source are identified by the column and page number. The key is used for
50 /// associative collections of on-disk pages.
51 struct Key {
53 std::uint64_t fPageNo;
55 friend bool operator ==(const Key &lhs, const Key &rhs) {
56 return lhs.fPhysicalColumnId == rhs.fPhysicalColumnId && lhs.fPageNo == rhs.fPageNo;
57 }
58 };
59
60 ROnDiskPage() = default;
61 ROnDiskPage(void *address, std::uint32_t size) : fAddress(address), fSize(size) {}
62
63 const void *GetAddress() const { return fAddress; }
64 std::uint32_t GetSize() const { return fSize; }
65
66 bool IsNull() const { return fAddress == nullptr; }
67}; // class ROnDiskPage
68
69} // namespace Internal
70} // namespace ROOT
71
72// For hash maps ROnDiskPage::Key --> ROnDiskPage
73namespace std
74{
75template <>
76struct hash<ROOT::Internal::ROnDiskPage::Key> {
77 // TODO(jblomer): quick and dirty hash, likely very sub-optimal, to be revised later.
78 size_t operator()(const ROOT::Internal::ROnDiskPage::Key &key) const
79 {
80 return (
81 (std::hash<ROOT::DescriptorId_t>()(key.fPhysicalColumnId) ^ (hash<ROOT::NTupleSize_t>()(key.fPageNo) << 1)) >>
82 1);
83 }
84};
85}
86
87namespace ROOT {
88namespace Internal {
89
90// clang-format off
91/**
92\class ROOT::Internal::ROnDiskPageMap
93\ingroup NTuple
94\brief A memory region that contains packed and compressed pages
95
96Derived classes implement how the on-disk pages are stored in memory, e.g. mmap'd or in a special area.
97*/
98// clang-format on
100 friend class RCluster;
101
102private:
103 std::unordered_map<ROnDiskPage::Key, ROnDiskPage> fOnDiskPages;
104
105public:
106 ROnDiskPageMap() = default;
112
113 /// Inserts information about a page stored in fMemory. Therefore, the address referenced by onDiskPage
114 /// needs to be owned by the page map (see derived classes). If a page map contains a page of a given column,
115 /// it is expected that _all_ the pages of that column in that cluster are part of the page map.
116 void Register(const ROnDiskPage::Key &key, const ROnDiskPage &onDiskPage) { fOnDiskPages.emplace(key, onDiskPage); }
117}; // class ROnDiskPageMap
118
119// clang-format off
120/**
121\class ROOT::Internal::ROnDiskPageMapHeap
122\ingroup NTuple
123\brief An ROnDiskPageMap that is used for an fMemory allocated as an array of unsigned char.
124*/
125// clang-format on
127private:
128 /// The memory region containing the on-disk pages.
129 std::unique_ptr<unsigned char []> fMemory;
130public:
131 explicit ROnDiskPageMapHeap(std::unique_ptr<unsigned char []> memory) : fMemory(std::move(memory)) {}
137}; // class ROnDiskPageMapHeap
138
139// clang-format off
140/**
141\class ROOT::Internal::RCluster
142\ingroup NTuple
143\brief An in-memory subset of the packed and compressed pages of a cluster
144
145Binds together several page maps that represent all the pages of certain columns of a cluster
146*/
147// clang-format on
148class RCluster {
149public:
150 using ColumnSet_t = std::unordered_set<ROOT::DescriptorId_t>;
151 /// The identifiers that specifies the content of a (partial) cluster
156
157protected:
158 /// References the cluster identifier in the page source that created the cluster
160 /// Multiple page maps can be combined in a single RCluster
161 std::vector<std::unique_ptr<ROnDiskPageMap>> fPageMaps;
162 /// Set of the (complete) columns represented by the RCluster
164 /// Lookup table for the on-disk pages
165 std::unordered_map<ROnDiskPage::Key, ROnDiskPage> fOnDiskPages;
166
167public:
169 RCluster(const RCluster &other) = delete;
170 RCluster(RCluster &&other) = default;
173 ~RCluster() = default;
174
175 /// Move the given page map into this cluster; for on-disk pages that are present in both the cluster at hand and
176 /// pageMap, GetOnDiskPage() may return the page from either of the memory regions (left to the implementation).
177 /// Their content is supposed to be the same.
178 /// Page maps cannot be physically merged them because they have potentially used different allocation mechanisms
179 /// (e.g. mmap vs. malloc).
180 void Adopt(std::unique_ptr<ROnDiskPageMap> pageMap);
181 /// Move the contents of other into this cluster; for on-disk pages that are present in both the cluster at hand and
182 /// the "other" cluster, GetOnDiskPage() may return the page from either of the memory regions
183 /// (left to the implementation).
184 void Adopt(RCluster &&other);
185 /// Marks the column as complete; must be done for all columns, even empty ones without associated pages,
186 /// before the cluster is given from the page storage to the cluster pool. Marking the available columns is
187 /// typically the last step of RPageSouce::LoadCluster().
189 const ROnDiskPage *GetOnDiskPage(const ROnDiskPage::Key &key) const;
190
194 size_t GetNOnDiskPages() const { return fOnDiskPages.size(); }
195}; // class RCluster
196
197} // namespace Internal
198} // namespace ROOT
199
200#endif
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
TRObject operator()(const T1 &t1) const
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:148
std::vector< std::unique_ptr< ROnDiskPageMap > > fPageMaps
Multiple page maps can be combined in a single RCluster.
Definition RCluster.hxx:161
ColumnSet_t fAvailPhysicalColumns
Set of the (complete) columns represented by the RCluster.
Definition RCluster.hxx:163
std::unordered_set< ROOT::DescriptorId_t > ColumnSet_t
Definition RCluster.hxx:150
const ColumnSet_t & GetAvailPhysicalColumns() const
Definition RCluster.hxx:192
bool ContainsColumn(ROOT::DescriptorId_t colId) const
Definition RCluster.hxx:193
ROOT::DescriptorId_t fClusterId
References the cluster identifier in the page source that created the cluster.
Definition RCluster.hxx:159
RCluster(ROOT::DescriptorId_t clusterId)
Definition RCluster.hxx:168
void SetColumnAvailable(ROOT::DescriptorId_t physicalColumnId)
Marks the column as complete; must be done for all columns, even empty ones without associated pages,...
Definition RCluster.cxx:62
const ROnDiskPage * GetOnDiskPage(const ROnDiskPage::Key &key) const
Definition RCluster.cxx:31
RCluster(const RCluster &other)=delete
RCluster(RCluster &&other)=default
void Adopt(std::unique_ptr< ROnDiskPageMap > pageMap)
Move the given page map into this cluster; for on-disk pages that are present in both the cluster at ...
Definition RCluster.cxx:39
size_t GetNOnDiskPages() const
Definition RCluster.hxx:194
RCluster & operator=(const RCluster &other)=delete
ROOT::DescriptorId_t GetId() const
Definition RCluster.hxx:191
std::unordered_map< ROnDiskPage::Key, ROnDiskPage > fOnDiskPages
Lookup table for the on-disk pages.
Definition RCluster.hxx:165
An ROnDiskPageMap that is used for an fMemory allocated as an array of unsigned char.
Definition RCluster.hxx:126
ROnDiskPageMapHeap & operator=(const ROnDiskPageMapHeap &other)=delete
std::unique_ptr< unsigned char[]> fMemory
The memory region containing the on-disk pages.
Definition RCluster.hxx:129
ROnDiskPageMapHeap(ROnDiskPageMapHeap &&other)=default
ROnDiskPageMapHeap(std::unique_ptr< unsigned char[]> memory)
Definition RCluster.hxx:131
ROnDiskPageMapHeap(const ROnDiskPageMapHeap &other)=delete
A memory region that contains packed and compressed pages.
Definition RCluster.hxx:99
std::unordered_map< ROnDiskPage::Key, ROnDiskPage > fOnDiskPages
Definition RCluster.hxx:103
ROnDiskPageMap(ROnDiskPageMap &&other)=default
void Register(const ROnDiskPage::Key &key, const ROnDiskPage &onDiskPage)
Inserts information about a page stored in fMemory.
Definition RCluster.hxx:116
ROnDiskPageMap(const ROnDiskPageMap &other)=delete
ROnDiskPageMap & operator=(const ROnDiskPageMap &other)=delete
A page as being stored on disk, that is packed and compressed.
Definition RCluster.hxx:41
std::uint32_t fSize
The compressed and packed size of the page. This includes both payload and checksum (if present)
Definition RCluster.hxx:46
ROnDiskPage(void *address, std::uint32_t size)
Definition RCluster.hxx:61
const void * fAddress
The memory location of the bytes.
Definition RCluster.hxx:44
std::uint32_t GetSize() const
Definition RCluster.hxx:64
const void * GetAddress() const
Definition RCluster.hxx:63
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
The identifiers that specifies the content of a (partial) cluster.
Definition RCluster.hxx:152
ROOT::DescriptorId_t fClusterId
Definition RCluster.hxx:153
On-disk pages within a page source are identified by the column and page number.
Definition RCluster.hxx:51
ROOT::DescriptorId_t fPhysicalColumnId
Definition RCluster.hxx:52
friend bool operator==(const Key &lhs, const Key &rhs)
Definition RCluster.hxx:55
Key(ROOT::DescriptorId_t columnId, std::uint64_t pageNo)
Definition RCluster.hxx:54