Logo ROOT  
Reference Guide
RCluster.hxx
Go to the documentation of this file.
1/// \file ROOT/RCluster.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2020-03-11
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RCluster
17#define ROOT7_RCluster
18
19#include <ROOT/RNTupleUtil.hxx>
20
21#include <cstdint>
22#include <memory>
23#include <unordered_map>
24#include <unordered_set>
25#include <utility>
26#include <vector>
27
28namespace ROOT {
29namespace Experimental {
30namespace Detail {
31
32
33// clang-format off
34/**
35\class ROnDiskPage
36\ingroup NTuple
37\brief A page as being stored on disk, that is packed and compressed
38
39Used by the cluster pool to cache pages from the physical storage. Such pages generally need to be
40uncompressed and unpacked before they can be used by RNTuple upper layers.
41*/
42// clang-format on
44private:
45 /// The memory location of the bytes
46 const void *fAddress = nullptr;
47 /// The compressed and packed size of the page
48 std::uint32_t fSize = 0;
49
50public:
51 /// On-disk pages within a page source are identified by the column and page number. The key is used for
52 /// associative collections of on-disk pages.
53 struct Key {
55 std::uint64_t fPageNo;
56 Key(DescriptorId_t columnId, std::uint64_t pageNo) : fColumnId(columnId), fPageNo(pageNo) {}
57 friend bool operator ==(const Key &lhs, const Key &rhs) {
58 return lhs.fColumnId == rhs.fColumnId && lhs.fPageNo == rhs.fPageNo;
59 }
60 };
61
62 ROnDiskPage() = default;
63 ROnDiskPage(void *address, std::uint32_t size) : fAddress(address), fSize(size) {}
64
65 const void *GetAddress() const { return fAddress; }
66 std::uint32_t GetSize() const { return fSize; }
67
68 bool IsNull() const { return fAddress == nullptr; }
69};
70
71} // namespace Detail
72} // namespace Experimental
73} // namespace ROOT
74
75// For hash maps ROnDiskPage::Key --> ROnDiskPage
76namespace std
77{
78 template <>
79 struct hash<ROOT::Experimental::Detail::ROnDiskPage::Key>
80 {
81 // TODO(jblomer): quick and dirty hash, likely very sub-optimal, to be revised later.
83 {
84 return ((std::hash<ROOT::Experimental::DescriptorId_t>()(key.fColumnId) ^
85 (hash<ROOT::Experimental::NTupleSize_t>()(key.fPageNo) << 1)) >> 1);
86 }
87 };
88}
89
90
91namespace ROOT {
92namespace Experimental {
93namespace Detail {
94
95// clang-format off
96/**
97\class ROOT::Experimental::Detail::ROnDiskPageMap
98\ingroup NTuple
99\brief A memory region that contains packed and compressed pages
100
101Derived classes implement how the on-disk pages are stored in memory, e.g. mmap'd or in a special area.
102*/
103// clang-format on
105 friend class RCluster;
106
107private:
108 std::unordered_map<ROnDiskPage::Key, ROnDiskPage> fOnDiskPages;
109
110public:
111 ROnDiskPageMap() = default;
112 ROnDiskPageMap(const ROnDiskPageMap &other) = delete;
113 ROnDiskPageMap(ROnDiskPageMap &&other) = default;
114 ROnDiskPageMap &operator =(const ROnDiskPageMap &other) = delete;
117
118 /// Inserts information about a page stored in fMemory. Therefore, the address referenced by onDiskPage
119 /// needs to be owned by the page map (see derived classes). If a page map contains a page of a given column,
120 /// it is expected that _all_ the pages of that column in that cluster are part of the page map.
121 void Register(const ROnDiskPage::Key &key, const ROnDiskPage &onDiskPage) { fOnDiskPages.emplace(key, onDiskPage); }
122};
123
124
125// clang-format off
126/**
127\class ROOT::Experimental::Detail::ROnDiskPageMapHeap
128\ingroup NTuple
129\brief An ROnDiskPageMap that is used for an fMemory allocated as an array of unsigned char.
130*/
131// clang-format on
133private:
134 /// The memory region containing the on-disk pages.
135 std::unique_ptr<unsigned char []> fMemory;
136public:
137 explicit ROnDiskPageMapHeap(std::unique_ptr<unsigned char []> memory) : fMemory(std::move(memory)) {}
143};
144
145// clang-format off
146/**
147\class ROOT::Experimental::Detail::RCluster
148\ingroup NTuple
149\brief An in-memory subset of the packed and compressed pages of a cluster
150
151Binds together several page maps that represent all the pages of certain columns of a cluster
152*/
153// clang-format on
154class RCluster {
155public:
156 using ColumnSet_t = std::unordered_set<DescriptorId_t>;
157 /// The identifiers that specifies the content of a (partial) cluster
158 struct RKey {
161 };
162
163protected:
164 /// References the cluster identifier in the page source that created the cluster
166 /// Multiple page maps can be combined in a single RCluster
167 std::vector<std::unique_ptr<ROnDiskPageMap>> fPageMaps;
168 /// Set of the (complete) columns represented by the RCluster
170 /// Lookup table for the on-disk pages
171 std::unordered_map<ROnDiskPage::Key, ROnDiskPage> fOnDiskPages;
172
173public:
174 explicit RCluster(DescriptorId_t clusterId) : fClusterId(clusterId) {}
175 RCluster(const RCluster &other) = delete;
176 RCluster(RCluster &&other) = default;
177 RCluster &operator =(const RCluster &other) = delete;
178 RCluster &operator =(RCluster &&other) = default;
179 ~RCluster() = default;
180
181 /// Move the given page map into this cluster; for on-disk pages that are present in both the cluster at hand and
182 /// pageMap, GetOnDiskPage() may return the page from either of the memory regions (left to the implementation).
183 /// Their content is supposed to be the same.
184 /// Page maps cannot be physically merged them because they have potentially used different allocation mechanisms
185 /// (e.g. mmap vs. malloc).
186 void Adopt(std::unique_ptr<ROnDiskPageMap> pageMap);
187 /// Move the contents of other into this cluster; for on-disk pages that are present in both the cluster at hand and
188 /// the "other" cluster, GetOnDiskPage() may return the page from either of the memory regions
189 /// (left to the implementation).
190 void Adopt(RCluster &&other);
191 /// Marks the column as complete; must be done for all columns, even empty ones without associated pages,
192 /// before the cluster is given from the page storage to the cluster pool. Marking the available columns is
193 /// typically the last step of RPageSouce::LoadCluster().
194 void SetColumnAvailable(DescriptorId_t columnId);
195 const ROnDiskPage *GetOnDiskPage(const ROnDiskPage::Key &key) const;
196
197 DescriptorId_t GetId() const { return fClusterId; }
198 const ColumnSet_t &GetAvailColumns() const { return fAvailColumns; }
199 bool ContainsColumn(DescriptorId_t columnId) const { return fAvailColumns.count(columnId) > 0; }
200 size_t GetNOnDiskPages() const { return fOnDiskPages.size(); }
201};
202
203} // namespace Detail
204} // namespace Experimental
205} // namespace ROOT
206
207#endif
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
TRObject operator()(const T1 &t1) const
An in-memory subset of the packed and compressed pages of a cluster.
Definition: RCluster.hxx:154
RCluster(const RCluster &other)=delete
RCluster & operator=(const RCluster &other)=delete
bool ContainsColumn(DescriptorId_t columnId) const
Definition: RCluster.hxx:199
const ColumnSet_t & GetAvailColumns() const
Definition: RCluster.hxx:198
std::unordered_set< DescriptorId_t > ColumnSet_t
Definition: RCluster.hxx:156
RCluster(RCluster &&other)=default
DescriptorId_t fClusterId
References the cluster identifier in the page source that created the cluster.
Definition: RCluster.hxx:165
void Adopt(std::unique_ptr< ROnDiskPageMap > pageMap)
Move the given page map into this cluster; for on-disk pages that are present in both the cluster at ...
Definition: RCluster.cxx:45
RCluster(DescriptorId_t clusterId)
Definition: RCluster.hxx:174
void SetColumnAvailable(DescriptorId_t columnId)
Marks the column as complete; must be done for all columns, even empty ones without associated pages,...
Definition: RCluster.cxx:70
std::vector< std::unique_ptr< ROnDiskPageMap > > fPageMaps
Multiple page maps can be combined in a single RCluster.
Definition: RCluster.hxx:167
std::unordered_map< ROnDiskPage::Key, ROnDiskPage > fOnDiskPages
Lookup table for the on-disk pages.
Definition: RCluster.hxx:171
DescriptorId_t GetId() const
Definition: RCluster.hxx:197
ColumnSet_t fAvailColumns
Set of the (complete) columns represented by the RCluster.
Definition: RCluster.hxx:169
const ROnDiskPage * GetOnDiskPage(const ROnDiskPage::Key &key) const
Definition: RCluster.cxx:37
An ROnDiskPageMap that is used for an fMemory allocated as an array of unsigned char.
Definition: RCluster.hxx:132
std::unique_ptr< unsigned char[]> fMemory
The memory region containing the on-disk pages.
Definition: RCluster.hxx:135
ROnDiskPageMapHeap(std::unique_ptr< unsigned char[]> memory)
Definition: RCluster.hxx:137
ROnDiskPageMapHeap & operator=(const ROnDiskPageMapHeap &other)=delete
ROnDiskPageMapHeap(const ROnDiskPageMapHeap &other)=delete
ROnDiskPageMapHeap(ROnDiskPageMapHeap &&other)=default
A memory region that contains packed and compressed pages.
Definition: RCluster.hxx:104
std::unordered_map< ROnDiskPage::Key, ROnDiskPage > fOnDiskPages
Definition: RCluster.hxx:108
void Register(const ROnDiskPage::Key &key, const ROnDiskPage &onDiskPage)
Inserts information about a page stored in fMemory.
Definition: RCluster.hxx:121
ROnDiskPageMap(const ROnDiskPageMap &other)=delete
ROnDiskPageMap(ROnDiskPageMap &&other)=default
ROnDiskPageMap & operator=(const ROnDiskPageMap &other)=delete
A page as being stored on disk, that is packed and compressed.
Definition: RCluster.hxx:43
ROnDiskPage(void *address, std::uint32_t size)
Definition: RCluster.hxx:63
std::uint32_t fSize
The compressed and packed size of the page.
Definition: RCluster.hxx:48
const void * fAddress
The memory location of the bytes.
Definition: RCluster.hxx:46
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The identifiers that specifies the content of a (partial) cluster.
Definition: RCluster.hxx:158
On-disk pages within a page source are identified by the column and page number.
Definition: RCluster.hxx:53
friend bool operator==(const Key &lhs, const Key &rhs)
Definition: RCluster.hxx:57
Key(DescriptorId_t columnId, std::uint64_t pageNo)
Definition: RCluster.hxx:56