Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RCluster.hxx
Go to the documentation of this file.
1/// \file ROOT/RCluster.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2020-03-11
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RCluster
17#define ROOT7_RCluster
18
19#include <ROOT/RNTupleUtil.hxx>
20
21#include <cstdint>
22#include <memory>
23#include <unordered_map>
24#include <unordered_set>
25#include <utility>
26#include <vector>
27
28namespace ROOT {
29namespace Experimental {
30namespace Detail {
31
32
33// clang-format off
34/**
35\class ROnDiskPage
36\ingroup NTuple
37\brief A page as being stored on disk, that is packed and compressed
38
39Used by the cluster pool to cache pages from the physical storage. Such pages generally need to be
40uncompressed and unpacked before they can be used by RNTuple upper layers.
41*/
42// clang-format on
44private:
45 /// The memory location of the bytes
46 const void *fAddress = nullptr;
47 /// The compressed and packed size of the page
48 std::size_t fSize = 0;
49
50public:
51 /// On-disk pages within a page source are identified by the column and page number. The key is used for
52 /// associative collections of on-disk pages.
53 struct Key {
55 std::uint64_t fPageNo;
56 Key(DescriptorId_t columnId, std::uint64_t pageNo) : fColumnId(columnId), fPageNo(pageNo) {}
57 friend bool operator ==(const Key &lhs, const Key &rhs) {
58 return lhs.fColumnId == rhs.fColumnId && lhs.fPageNo == rhs.fPageNo;
59 }
60 };
61
62 ROnDiskPage() = default;
63 ROnDiskPage(void *address, std::size_t size) : fAddress(address), fSize(size) {}
64
65 const void *GetAddress() const { return fAddress; }
66 std::size_t GetSize() const { return fSize; }
67
68 bool IsNull() const { return fAddress == nullptr; }
69};
70
71} // namespace Detail
72} // namespace Experimental
73} // namespace ROOT
74
75// For hash maps ROnDiskPage::Key --> ROnDiskPage
76namespace std
77{
78 template <>
79 struct hash<ROOT::Experimental::Detail::ROnDiskPage::Key>
80 {
81 // TODO(jblomer): quick and dirty hash, likely very sub-optimal, to be revised later.
83 {
84 return ((std::hash<ROOT::Experimental::DescriptorId_t>()(key.fColumnId) ^
85 (hash<ROOT::Experimental::NTupleSize_t>()(key.fPageNo) << 1)) >> 1);
86 }
87 };
88}
89
90
91namespace ROOT {
92namespace Experimental {
93namespace Detail {
94
95// clang-format off
96/**
97\class ROOT::Experimental::Detail::ROnDiskPageMap
98\ingroup NTuple
99\brief A memory region that contains packed and compressed pages
100
101Derived classes implement how the on-disk pages are stored in memory, e.g. mmap'd or in a special area.
102*/
103// clang-format on
105 friend class RCluster;
106
107private:
108 std::unordered_map<ROnDiskPage::Key, ROnDiskPage> fOnDiskPages;
109
110public:
111 ROnDiskPageMap() = default;
112 ROnDiskPageMap(const ROnDiskPageMap &other) = delete;
113 ROnDiskPageMap(ROnDiskPageMap &&other) = default;
114 ROnDiskPageMap &operator =(const ROnDiskPageMap &other) = delete;
117
118 /// Inserts information about a page stored in fMemory. Therefore, the address referenced by onDiskPage
119 /// needs to be owned by the page map (see derived classes). If a page map contains a page of a given column,
120 /// it is expected that _all_ the pages of that column in that cluster are part of the page map.
121 void Register(const ROnDiskPage::Key &key, const ROnDiskPage &onDiskPage) { fOnDiskPages.emplace(key, onDiskPage); }
122};
123
124
125// clang-format off
126/**
127\class ROOT::Experimental::Detail::ROnDiskPageMapHeap
128\ingroup NTuple
129\brief An ROnDiskPageMap that is used for an fMemory allocated as an array of unsigned char.
130*/
131// clang-format on
133private:
134 /// The memory region containing the on-disk pages.
135 std::unique_ptr<unsigned char []> fMemory;
136public:
137 explicit ROnDiskPageMapHeap(std::unique_ptr<unsigned char []> memory) : fMemory(std::move(memory)) {}
143};
144
145// clang-format off
146/**
147\class ROOT::Experimental::Detail::RCluster
148\ingroup NTuple
149\brief An in-memory subset of the packed and compressed pages of a cluster
150
151Binds together several page maps that represent all the pages of certain columns of a cluster
152*/
153// clang-format on
154class RCluster {
155protected:
156 /// References the cluster identifier in the page source that created the cluster
158 /// Multiple page maps can be combined in a single RCluster
159 std::vector<std::unique_ptr<ROnDiskPageMap>> fPageMaps;
160 /// Set of the (complete) columns represented by the RCluster
161 std::unordered_set<DescriptorId_t> fAvailColumns;
162 /// Lookup table for the on-disk pages
163 std::unordered_map<ROnDiskPage::Key, ROnDiskPage> fOnDiskPages;
164
165public:
166 explicit RCluster(DescriptorId_t clusterId) : fClusterId(clusterId) {}
167 RCluster(const RCluster &other) = delete;
168 RCluster(RCluster &&other) = default;
169 RCluster &operator =(const RCluster &other) = delete;
170 RCluster &operator =(RCluster &&other) = default;
171 ~RCluster() = default;
172
173 /// Move the given page map into this cluster; for on-disk pages that are present in both the cluster at hand and
174 /// pageMap, GetOnDiskPage() may return the page from either of the memory regions (left to the implementation).
175 /// Their content is supposed to be the same.
176 /// Page maps cannot be physically merged them because they have potentially used different allocation mechanisms
177 /// (e.g. mmap vs. malloc).
178 void Adopt(std::unique_ptr<ROnDiskPageMap> pageMap);
179 /// Move the contents of other into this cluster; for on-disk pages that are present in both the cluster at hand and
180 /// the "other" cluster, GetOnDiskPage() may return the page from either of the memory regions
181 /// (left to the implementation).
182 void Adopt(RCluster &&other);
183 /// Marks the column as complete; must be done for all columns, even empty ones without associated pages,
184 /// before the cluster is given from the page storage to the cluster pool. Marking the available columns is
185 /// typically the last step of RPageSouce::LoadCluster().
186 void SetColumnAvailable(DescriptorId_t columnId);
187 const ROnDiskPage *GetOnDiskPage(const ROnDiskPage::Key &key) const;
188
189 DescriptorId_t GetId() const { return fClusterId; }
190 const std::unordered_set<DescriptorId_t> &GetAvailColumns() const { return fAvailColumns; }
191 bool ContainsColumn(DescriptorId_t columnId) const { return fAvailColumns.count(columnId) > 0; }
192 size_t GetNOnDiskPages() const { return fOnDiskPages.size(); }
193};
194
195} // namespace Detail
196} // namespace Experimental
197} // namespace ROOT
198
199#endif
TRObject operator()(const T1 &t1) const
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:154
RCluster(const RCluster &other)=delete
RCluster & operator=(const RCluster &other)=delete
bool ContainsColumn(DescriptorId_t columnId) const
Definition RCluster.hxx:191
std::unordered_set< DescriptorId_t > fAvailColumns
Set of the (complete) columns represented by the RCluster.
Definition RCluster.hxx:161
RCluster(RCluster &&other)=default
DescriptorId_t fClusterId
References the cluster identifier in the page source that created the cluster.
Definition RCluster.hxx:157
void Adopt(std::unique_ptr< ROnDiskPageMap > pageMap)
Move the given page map into this cluster; for on-disk pages that are present in both the cluster at ...
Definition RCluster.cxx:45
RCluster(DescriptorId_t clusterId)
Definition RCluster.hxx:166
void SetColumnAvailable(DescriptorId_t columnId)
Marks the column as complete; must be done for all columns, even empty ones without associated pages,...
Definition RCluster.cxx:70
std::vector< std::unique_ptr< ROnDiskPageMap > > fPageMaps
Multiple page maps can be combined in a single RCluster.
Definition RCluster.hxx:159
std::unordered_map< ROnDiskPage::Key, ROnDiskPage > fOnDiskPages
Lookup table for the on-disk pages.
Definition RCluster.hxx:163
const std::unordered_set< DescriptorId_t > & GetAvailColumns() const
Definition RCluster.hxx:190
const ROnDiskPage * GetOnDiskPage(const ROnDiskPage::Key &key) const
Definition RCluster.cxx:37
An ROnDiskPageMap that is used for an fMemory allocated as an array of unsigned char.
Definition RCluster.hxx:132
std::unique_ptr< unsigned char[]> fMemory
The memory region containing the on-disk pages.
Definition RCluster.hxx:135
ROnDiskPageMapHeap(std::unique_ptr< unsigned char[]> memory)
Definition RCluster.hxx:137
ROnDiskPageMapHeap & operator=(const ROnDiskPageMapHeap &other)=delete
ROnDiskPageMapHeap(const ROnDiskPageMapHeap &other)=delete
ROnDiskPageMapHeap(ROnDiskPageMapHeap &&other)=default
A memory region that contains packed and compressed pages.
Definition RCluster.hxx:104
std::unordered_map< ROnDiskPage::Key, ROnDiskPage > fOnDiskPages
Definition RCluster.hxx:108
void Register(const ROnDiskPage::Key &key, const ROnDiskPage &onDiskPage)
Inserts information about a page stored in fMemory.
Definition RCluster.hxx:121
ROnDiskPageMap(const ROnDiskPageMap &other)=delete
ROnDiskPageMap(ROnDiskPageMap &&other)=default
ROnDiskPageMap & operator=(const ROnDiskPageMap &other)=delete
A page as being stored on disk, that is packed and compressed.
Definition RCluster.hxx:43
ROnDiskPage(void *address, std::size_t size)
Definition RCluster.hxx:63
const void * fAddress
The memory location of the bytes.
Definition RCluster.hxx:46
std::size_t fSize
The compressed and packed size of the page.
Definition RCluster.hxx:48
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
On-disk pages within a page source are identified by the column and page number.
Definition RCluster.hxx:53
friend bool operator==(const Key &lhs, const Key &rhs)
Definition RCluster.hxx:57
Key(DescriptorId_t columnId, std::uint64_t pageNo)
Definition RCluster.hxx:56