Logo ROOT  
Reference Guide
RCluster.hxx
Go to the documentation of this file.
1/// \file ROOT/RCluster.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2020-03-11
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RCluster
17#define ROOT7_RCluster
18
19#include <ROOT/RNTupleUtil.hxx>
20
21#include <cstdint>
22#include <memory>
23#include <unordered_map>
24#include <unordered_set>
25#include <utility>
26#include <vector>
27
28namespace ROOT {
29namespace Experimental {
30namespace Detail {
31
32
33// clang-format off
34/**
35\class ROnDiskPage
36\ingroup NTuple
37\brief A page as being stored on disk, that is packed and compressed
38
39Used by the cluster pool to cache pages from the physical storage. Such pages generally need to be
40uncompressed and unpacked before they can be used by RNTuple upper layers.
41*/
42// clang-format on
44private:
45 /// The memory location of the bytes
46 const void *fAddress = nullptr;
47 /// The compressed and packed size of the page
48 std::uint32_t fSize = 0;
49
50public:
51 /// On-disk pages within a page source are identified by the column and page number. The key is used for
52 /// associative collections of on-disk pages.
53 struct Key {
55 std::uint64_t fPageNo;
56 Key(DescriptorId_t columnId, std::uint64_t pageNo) : fPhysicalColumnId(columnId), fPageNo(pageNo) {}
57 friend bool operator ==(const Key &lhs, const Key &rhs) {
58 return lhs.fPhysicalColumnId == rhs.fPhysicalColumnId && lhs.fPageNo == rhs.fPageNo;
59 }
60 };
61
62 ROnDiskPage() = default;
63 ROnDiskPage(void *address, std::uint32_t size) : fAddress(address), fSize(size) {}
64
65 const void *GetAddress() const { return fAddress; }
66 std::uint32_t GetSize() const { return fSize; }
67
68 bool IsNull() const { return fAddress == nullptr; }
69};
70
71} // namespace Detail
72} // namespace Experimental
73} // namespace ROOT
74
75// For hash maps ROnDiskPage::Key --> ROnDiskPage
76namespace std
77{
78 template <>
79 struct hash<ROOT::Experimental::Detail::ROnDiskPage::Key>
80 {
81 // TODO(jblomer): quick and dirty hash, likely very sub-optimal, to be revised later.
83 {
84 return ((std::hash<ROOT::Experimental::DescriptorId_t>()(key.fPhysicalColumnId) ^
85 (hash<ROOT::Experimental::NTupleSize_t>()(key.fPageNo) << 1)) >>
86 1);
87 }
88 };
89}
90
91
92namespace ROOT {
93namespace Experimental {
94namespace Detail {
95
96// clang-format off
97/**
98\class ROOT::Experimental::Detail::ROnDiskPageMap
99\ingroup NTuple
100\brief A memory region that contains packed and compressed pages
101
102Derived classes implement how the on-disk pages are stored in memory, e.g. mmap'd or in a special area.
103*/
104// clang-format on
106 friend class RCluster;
107
108private:
109 std::unordered_map<ROnDiskPage::Key, ROnDiskPage> fOnDiskPages;
110
111public:
112 ROnDiskPageMap() = default;
113 ROnDiskPageMap(const ROnDiskPageMap &other) = delete;
114 ROnDiskPageMap(ROnDiskPageMap &&other) = default;
115 ROnDiskPageMap &operator =(const ROnDiskPageMap &other) = delete;
118
119 /// Inserts information about a page stored in fMemory. Therefore, the address referenced by onDiskPage
120 /// needs to be owned by the page map (see derived classes). If a page map contains a page of a given column,
121 /// it is expected that _all_ the pages of that column in that cluster are part of the page map.
122 void Register(const ROnDiskPage::Key &key, const ROnDiskPage &onDiskPage) { fOnDiskPages.emplace(key, onDiskPage); }
123};
124
125
126// clang-format off
127/**
128\class ROOT::Experimental::Detail::ROnDiskPageMapHeap
129\ingroup NTuple
130\brief An ROnDiskPageMap that is used for an fMemory allocated as an array of unsigned char.
131*/
132// clang-format on
134private:
135 /// The memory region containing the on-disk pages.
136 std::unique_ptr<unsigned char []> fMemory;
137public:
138 explicit ROnDiskPageMapHeap(std::unique_ptr<unsigned char []> memory) : fMemory(std::move(memory)) {}
144};
145
146// clang-format off
147/**
148\class ROOT::Experimental::Detail::RCluster
149\ingroup NTuple
150\brief An in-memory subset of the packed and compressed pages of a cluster
151
152Binds together several page maps that represent all the pages of certain columns of a cluster
153*/
154// clang-format on
155class RCluster {
156public:
157 using ColumnSet_t = std::unordered_set<DescriptorId_t>;
158 /// The identifiers that specifies the content of a (partial) cluster
159 struct RKey {
162 };
163
164protected:
165 /// References the cluster identifier in the page source that created the cluster
167 /// Multiple page maps can be combined in a single RCluster
168 std::vector<std::unique_ptr<ROnDiskPageMap>> fPageMaps;
169 /// Set of the (complete) columns represented by the RCluster
171 /// Lookup table for the on-disk pages
172 std::unordered_map<ROnDiskPage::Key, ROnDiskPage> fOnDiskPages;
173
174public:
175 explicit RCluster(DescriptorId_t clusterId) : fClusterId(clusterId) {}
176 RCluster(const RCluster &other) = delete;
177 RCluster(RCluster &&other) = default;
178 RCluster &operator =(const RCluster &other) = delete;
179 RCluster &operator =(RCluster &&other) = default;
180 ~RCluster() = default;
181
182 /// Move the given page map into this cluster; for on-disk pages that are present in both the cluster at hand and
183 /// pageMap, GetOnDiskPage() may return the page from either of the memory regions (left to the implementation).
184 /// Their content is supposed to be the same.
185 /// Page maps cannot be physically merged them because they have potentially used different allocation mechanisms
186 /// (e.g. mmap vs. malloc).
187 void Adopt(std::unique_ptr<ROnDiskPageMap> pageMap);
188 /// Move the contents of other into this cluster; for on-disk pages that are present in both the cluster at hand and
189 /// the "other" cluster, GetOnDiskPage() may return the page from either of the memory regions
190 /// (left to the implementation).
191 void Adopt(RCluster &&other);
192 /// Marks the column as complete; must be done for all columns, even empty ones without associated pages,
193 /// before the cluster is given from the page storage to the cluster pool. Marking the available columns is
194 /// typically the last step of RPageSouce::LoadCluster().
195 void SetColumnAvailable(DescriptorId_t physicalColumnId);
196 const ROnDiskPage *GetOnDiskPage(const ROnDiskPage::Key &key) const;
197
198 DescriptorId_t GetId() const { return fClusterId; }
200 bool ContainsColumn(DescriptorId_t colId) const { return fAvailPhysicalColumns.count(colId) > 0; }
201 size_t GetNOnDiskPages() const { return fOnDiskPages.size(); }
202};
203
204} // namespace Detail
205} // namespace Experimental
206} // namespace ROOT
207
208#endif
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
TRObject operator()(const T1 &t1) const
An in-memory subset of the packed and compressed pages of a cluster.
Definition: RCluster.hxx:155
RCluster(const RCluster &other)=delete
RCluster & operator=(const RCluster &other)=delete
std::unordered_set< DescriptorId_t > ColumnSet_t
Definition: RCluster.hxx:157
RCluster(RCluster &&other)=default
DescriptorId_t fClusterId
References the cluster identifier in the page source that created the cluster.
Definition: RCluster.hxx:166
void Adopt(std::unique_ptr< ROnDiskPageMap > pageMap)
Move the given page map into this cluster; for on-disk pages that are present in both the cluster at ...
Definition: RCluster.cxx:45
bool ContainsColumn(DescriptorId_t colId) const
Definition: RCluster.hxx:200
RCluster(DescriptorId_t clusterId)
Definition: RCluster.hxx:175
void SetColumnAvailable(DescriptorId_t physicalColumnId)
Marks the column as complete; must be done for all columns, even empty ones without associated pages,...
Definition: RCluster.cxx:69
std::vector< std::unique_ptr< ROnDiskPageMap > > fPageMaps
Multiple page maps can be combined in a single RCluster.
Definition: RCluster.hxx:168
std::unordered_map< ROnDiskPage::Key, ROnDiskPage > fOnDiskPages
Lookup table for the on-disk pages.
Definition: RCluster.hxx:172
ColumnSet_t fAvailPhysicalColumns
Set of the (complete) columns represented by the RCluster.
Definition: RCluster.hxx:170
DescriptorId_t GetId() const
Definition: RCluster.hxx:198
const ColumnSet_t & GetAvailPhysicalColumns() const
Definition: RCluster.hxx:199
const ROnDiskPage * GetOnDiskPage(const ROnDiskPage::Key &key) const
Definition: RCluster.cxx:37
An ROnDiskPageMap that is used for an fMemory allocated as an array of unsigned char.
Definition: RCluster.hxx:133
std::unique_ptr< unsigned char[]> fMemory
The memory region containing the on-disk pages.
Definition: RCluster.hxx:136
ROnDiskPageMapHeap(std::unique_ptr< unsigned char[]> memory)
Definition: RCluster.hxx:138
ROnDiskPageMapHeap & operator=(const ROnDiskPageMapHeap &other)=delete
ROnDiskPageMapHeap(const ROnDiskPageMapHeap &other)=delete
ROnDiskPageMapHeap(ROnDiskPageMapHeap &&other)=default
A memory region that contains packed and compressed pages.
Definition: RCluster.hxx:105
std::unordered_map< ROnDiskPage::Key, ROnDiskPage > fOnDiskPages
Definition: RCluster.hxx:109
void Register(const ROnDiskPage::Key &key, const ROnDiskPage &onDiskPage)
Inserts information about a page stored in fMemory.
Definition: RCluster.hxx:122
ROnDiskPageMap(const ROnDiskPageMap &other)=delete
ROnDiskPageMap(ROnDiskPageMap &&other)=default
ROnDiskPageMap & operator=(const ROnDiskPageMap &other)=delete
A page as being stored on disk, that is packed and compressed.
Definition: RCluster.hxx:43
ROnDiskPage(void *address, std::uint32_t size)
Definition: RCluster.hxx:63
std::uint32_t fSize
The compressed and packed size of the page.
Definition: RCluster.hxx:48
const void * fAddress
The memory location of the bytes.
Definition: RCluster.hxx:46
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
The identifiers that specifies the content of a (partial) cluster.
Definition: RCluster.hxx:159
On-disk pages within a page source are identified by the column and page number.
Definition: RCluster.hxx:53
friend bool operator==(const Key &lhs, const Key &rhs)
Definition: RCluster.hxx:57
Key(DescriptorId_t columnId, std::uint64_t pageNo)
Definition: RCluster.hxx:56