Logo ROOT  
Reference Guide
RCluster.hxx
Go to the documentation of this file.
1 /// \file ROOT/RCluster.hxx
2 /// \ingroup NTuple ROOT7
3 /// \author Jakob Blomer <jblomer@cern.ch>
4 /// \date 2020-03-11
5 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6 /// is welcome!
7 
8 /*************************************************************************
9  * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
10  * All rights reserved. *
11  * *
12  * For the licensing terms see $ROOTSYS/LICENSE. *
13  * For the list of contributors see $ROOTSYS/README/CREDITS. *
14  *************************************************************************/
15 
16 #ifndef ROOT7_RCluster
17 #define ROOT7_RCluster
18 
19 #include <ROOT/RNTupleUtil.hxx>
20 
21 #include <cstdint>
22 #include <memory>
23 #include <unordered_map>
24 #include <unordered_set>
25 #include <utility>
26 #include <vector>
27 
28 namespace ROOT {
29 namespace Experimental {
30 namespace Detail {
31 
32 
33 // clang-format off
34 /**
35 \class ROnDiskPage
36 \ingroup NTuple
37 \brief A page as being stored on disk, that is packed and compressed
38 
39 Used by the cluster pool to cache pages from the physical storage. Such pages generally need to be
40 uncompressed and unpacked before they can be used by RNTuple upper layers.
41 */
42 // clang-format on
43 class ROnDiskPage {
44 private:
45  /// The memory location of the bytes
46  const void *fAddress = nullptr;
47  /// The compressed and packed size of the page
48  std::size_t fSize = 0;
49 
50 public:
51  /// On-disk pages within a page source are identified by the column and page number. The key is used for
52  /// associative collections of on-disk pages.
53  struct Key {
55  std::uint64_t fPageNo;
56  Key(DescriptorId_t columnId, std::uint64_t pageNo) : fColumnId(columnId), fPageNo(pageNo) {}
57  friend bool operator ==(const Key &lhs, const Key &rhs) {
58  return lhs.fColumnId == rhs.fColumnId && lhs.fPageNo == rhs.fPageNo;
59  }
60  };
61 
62  ROnDiskPage() = default;
63  ROnDiskPage(void *address, std::size_t size) : fAddress(address), fSize(size) {}
64 
65  const void *GetAddress() const { return fAddress; }
66  std::size_t GetSize() const { return fSize; }
67 
68  bool IsNull() const { return fAddress == nullptr; }
69 };
70 
71 } // namespace Detail
72 } // namespace Experimental
73 } // namespace ROOT
74 
75 // For hash maps ROnDiskPage::Key --> ROnDiskPage
76 namespace std
77 {
78  template <>
79  struct hash<ROOT::Experimental::Detail::ROnDiskPage::Key>
80  {
81  // TODO(jblomer): quick and dirty hash, likely very sub-optimal, to be revised later.
83  {
84  return ((std::hash<ROOT::Experimental::DescriptorId_t>()(key.fColumnId) ^
85  (hash<ROOT::Experimental::NTupleSize_t>()(key.fPageNo) << 1)) >> 1);
86  }
87  };
88 }
89 
90 
91 namespace ROOT {
92 namespace Experimental {
93 namespace Detail {
94 
95 // clang-format off
96 /**
97 \class ROOT::Experimental::Detail::ROnDiskPageMap
98 \ingroup NTuple
99 \brief A memory region that contains packed and compressed pages
100 
101 Derived classes implement how the on-disk pages are stored in memory, e.g. mmap'd or in a special area.
102 */
103 // clang-format on
105  friend class RCluster;
106 
107 private:
108  std::unordered_map<ROnDiskPage::Key, ROnDiskPage> fOnDiskPages;
109 
110 public:
111  ROnDiskPageMap() = default;
112  ROnDiskPageMap(const ROnDiskPageMap &other) = delete;
113  ROnDiskPageMap(ROnDiskPageMap &&other) = default;
114  ROnDiskPageMap &operator =(const ROnDiskPageMap &other) = delete;
116  virtual ~ROnDiskPageMap();
117 
118  /// Inserts information about a page stored in fMemory. Therefore, the address referenced by onDiskPage
119  /// needs to be owned by the page map (see derived classes). If a page map contains a page of a given column,
120  /// it is expected that _all_ the pages of that column in that cluster are part of the page map.
121  void Register(const ROnDiskPage::Key &key, const ROnDiskPage &onDiskPage) { fOnDiskPages.emplace(key, onDiskPage); }
122 };
123 
124 
125 // clang-format off
126 /**
127 \class ROOT::Experimental::Detail::ROnDiskPageMapHeap
128 \ingroup NTuple
129 \brief An ROnDiskPageMap that is used for an fMemory allocated as an array of unsigned char.
130 */
131 // clang-format on
133 private:
134  /// The memory region containing the on-disk pages.
135  std::unique_ptr<unsigned char []> fMemory;
136 public:
137  explicit ROnDiskPageMapHeap(std::unique_ptr<unsigned char []> memory) : fMemory(std::move(memory)) {}
138  ROnDiskPageMapHeap(const ROnDiskPageMapHeap &other) = delete;
143 };
144 
145 // clang-format off
146 /**
147 \class ROOT::Experimental::Detail::RCluster
148 \ingroup NTuple
149 \brief An in-memory subset of the packed and compressed pages of a cluster
150 
151 Binds together several page maps that represent all the pages of certain columns of a cluster
152 */
153 // clang-format on
154 class RCluster {
155 protected:
156  /// References the cluster identifier in the page source that created the cluster
158  /// Multiple page maps can be combined in a single RCluster
159  std::vector<std::unique_ptr<ROnDiskPageMap>> fPageMaps;
160  /// Set of the (complete) columns represented by the RCluster
161  std::unordered_set<DescriptorId_t> fAvailColumns;
162  /// Lookup table for the on-disk pages
163  std::unordered_map<ROnDiskPage::Key, ROnDiskPage> fOnDiskPages;
164 
165 public:
166  explicit RCluster(DescriptorId_t clusterId) : fClusterId(clusterId) {}
167  RCluster(const RCluster &other) = delete;
168  RCluster(RCluster &&other) = default;
169  RCluster &operator =(const RCluster &other) = delete;
170  RCluster &operator =(RCluster &&other) = default;
171  ~RCluster() = default;
172 
173  /// Move the given page map into this cluster; for on-disk pages that are present in both the cluster at hand and
174  /// pageMap, GetOnDiskPage() may return the page from either of the memory regions (left to the implementation).
175  /// Their content is supposed to be the same.
176  /// Page maps cannot be physically merged them because they have potentially used different allocation mechanisms
177  /// (e.g. mmap vs. malloc).
178  void Adopt(std::unique_ptr<ROnDiskPageMap> pageMap);
179  /// Move the contents of other into this cluster; for on-disk pages that are present in both the cluster at hand and
180  /// the "other" cluster, GetOnDiskPage() may return the page from either of the memory regions
181  /// (left to the implementation).
182  void Adopt(RCluster &&other);
183  /// Marks the column as complete; must be done for all columns, even empty ones without associated pages,
184  /// before the cluster is given from the page storage to the cluster pool. Marking the available columns is
185  /// typically the last step of RPageSouce::LoadCluster().
186  void SetColumnAvailable(DescriptorId_t columnId);
187  const ROnDiskPage *GetOnDiskPage(const ROnDiskPage::Key &key) const;
188 
189  DescriptorId_t GetId() const { return fClusterId; }
190  const std::unordered_set<DescriptorId_t> &GetAvailColumns() const { return fAvailColumns; }
191  bool ContainsColumn(DescriptorId_t columnId) const { return fAvailColumns.count(columnId) > 0; }
192  size_t GetNOnDiskPages() const { return fOnDiskPages.size(); }
193 };
194 
195 } // namespace Detail
196 } // namespace Experimental
197 } // namespace ROOT
198 
199 #endif
ROOT::Experimental::Detail::ROnDiskPage::Key::fPageNo
std::uint64_t fPageNo
Definition: RCluster.hxx:55
ROOT::Experimental::Detail::ROnDiskPage::fSize
std::size_t fSize
The compressed and packed size of the page.
Definition: RCluster.hxx:48
ROOT::Experimental::Detail::RCluster::operator=
RCluster & operator=(const RCluster &other)=delete
ROOT::Experimental::Detail::ROnDiskPage::Key::Key
Key(DescriptorId_t columnId, std::uint64_t pageNo)
Definition: RCluster.hxx:56
ROOT::Experimental::Detail::ROnDiskPageMap::ROnDiskPageMap
ROnDiskPageMap()=default
ROOT::Experimental::Detail::RCluster::fPageMaps
std::vector< std::unique_ptr< ROnDiskPageMap > > fPageMaps
Multiple page maps can be combined in a single RCluster.
Definition: RCluster.hxx:159
ROOT::Experimental::Detail::RCluster::fOnDiskPages
std::unordered_map< ROnDiskPage::Key, ROnDiskPage > fOnDiskPages
Lookup table for the on-disk pages.
Definition: RCluster.hxx:163
ROOT::Experimental::Detail::RCluster::GetOnDiskPage
const ROnDiskPage * GetOnDiskPage(const ROnDiskPage::Key &key) const
Definition: RCluster.cxx:37
ROOT::Experimental::Detail::ROnDiskPage::GetSize
std::size_t GetSize() const
Definition: RCluster.hxx:66
ROOT::Experimental::DescriptorId_t
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition: RNTupleUtil.hxx:91
ROOT::Experimental::Detail::ROnDiskPageMapHeap
An ROnDiskPageMap that is used for an fMemory allocated as an array of unsigned char.
Definition: RCluster.hxx:132
ROOT::Experimental::Detail::ROnDiskPage::Key::fColumnId
DescriptorId_t fColumnId
Definition: RCluster.hxx:54
ROOT::Experimental::Detail::ROnDiskPage::Key::operator==
friend bool operator==(const Key &lhs, const Key &rhs)
Definition: RCluster.hxx:57
operator()
TRObject operator()(const T1 &t1) const
Definition: TRFunctionImport__oprtr.h:14
ROOT::Experimental::Detail::ROnDiskPageMap::operator=
ROnDiskPageMap & operator=(const ROnDiskPageMap &other)=delete
ROOT::Experimental::Detail::ROnDiskPageMap::ROnDiskPageMap
ROnDiskPageMap(ROnDiskPageMap &&other)=default
ROOT::Experimental::Detail::ROnDiskPage::fAddress
const void * fAddress
The memory location of the bytes.
Definition: RCluster.hxx:46
ROOT::Experimental::Detail::ROnDiskPage::Key
On-disk pages within a page source are identified by the column and page number.
Definition: RCluster.hxx:53
ROOT::Experimental::Detail::ROnDiskPage::IsNull
bool IsNull() const
Definition: RCluster.hxx:68
ROOT::Experimental::Detail::RCluster::SetColumnAvailable
void SetColumnAvailable(DescriptorId_t columnId)
Marks the column as complete; must be done for all columns, even empty ones without associated pages,...
Definition: RCluster.cxx:70
ROOT::Experimental::Detail::ROnDiskPageMap::~ROnDiskPageMap
virtual ~ROnDiskPageMap()
ROOT::Experimental::Detail::ROnDiskPageMap::Register
void Register(const ROnDiskPage::Key &key, const ROnDiskPage &onDiskPage)
Inserts information about a page stored in fMemory.
Definition: RCluster.hxx:121
ROOT::Experimental::Detail::RCluster::RCluster
RCluster(DescriptorId_t clusterId)
Definition: RCluster.hxx:166
ROOT::Experimental::Detail::RCluster::fAvailColumns
std::unordered_set< DescriptorId_t > fAvailColumns
Set of the (complete) columns represented by the RCluster.
Definition: RCluster.hxx:161
ROOT::Experimental::Detail::ROnDiskPage::ROnDiskPage
ROnDiskPage()=default
ROOT::Experimental::Detail::RCluster::Adopt
void Adopt(std::unique_ptr< ROnDiskPageMap > pageMap)
Move the given page map into this cluster; for on-disk pages that are present in both the cluster at ...
Definition: RCluster.cxx:45
ROOT::Experimental::Detail::RCluster::fClusterId
DescriptorId_t fClusterId
References the cluster identifier in the page source that created the cluster.
Definition: RCluster.hxx:157
ROOT::Experimental::Detail::ROnDiskPage
A page as being stored on disk, that is packed and compressed.
Definition: RCluster.hxx:43
ROOT::Experimental::Detail::RCluster::RCluster
RCluster(RCluster &&other)=default
ROOT::Experimental::Detail::ROnDiskPageMapHeap::ROnDiskPageMapHeap
ROnDiskPageMapHeap(const ROnDiskPageMapHeap &other)=delete
ROOT::Experimental::Detail::ROnDiskPage::GetAddress
const void * GetAddress() const
Definition: RCluster.hxx:65
ROOT::Experimental::Detail::ROnDiskPageMapHeap::operator=
ROnDiskPageMapHeap & operator=(const ROnDiskPageMapHeap &other)=delete
RNTupleUtil.hxx
ROOT::Experimental::Detail::ROnDiskPageMapHeap::ROnDiskPageMapHeap
ROnDiskPageMapHeap(ROnDiskPageMapHeap &&other)=default
ROOT::Experimental::Detail::ROnDiskPageMap::ROnDiskPageMap
ROnDiskPageMap(const ROnDiskPageMap &other)=delete
ROOT::Experimental::Detail::ROnDiskPage::ROnDiskPage
ROnDiskPage(void *address, std::size_t size)
Definition: RCluster.hxx:63
ROOT::Experimental::Detail::ROnDiskPageMapHeap::ROnDiskPageMapHeap
ROnDiskPageMapHeap(std::unique_ptr< unsigned char[]> memory)
Definition: RCluster.hxx:137
ROOT::Experimental::Detail::RCluster::RCluster
RCluster(const RCluster &other)=delete
ROOT::Experimental::Detail::ROnDiskPageMap::fOnDiskPages
std::unordered_map< ROnDiskPage::Key, ROnDiskPage > fOnDiskPages
Definition: RCluster.hxx:108
ROOT::Experimental::Detail::ROnDiskPageMapHeap::fMemory
std::unique_ptr< unsigned char[]> fMemory
The memory region containing the on-disk pages.
Definition: RCluster.hxx:135
ROOT::Experimental::Detail::RCluster::~RCluster
~RCluster()=default
ROOT::Experimental::Detail::ROnDiskPageMapHeap::~ROnDiskPageMapHeap
~ROnDiskPageMapHeap()
ROOT::Experimental::Detail::RCluster::GetAvailColumns
const std::unordered_set< DescriptorId_t > & GetAvailColumns() const
Definition: RCluster.hxx:190
ROOT
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition: EExecutionPolicy.hxx:4
ROOT::Experimental::Detail::RCluster::ContainsColumn
bool ContainsColumn(DescriptorId_t columnId) const
Definition: RCluster.hxx:191
ROOT::Experimental::Detail::RCluster
An in-memory subset of the packed and compressed pages of a cluster.
Definition: RCluster.hxx:154
ROOT::Experimental::Detail::RCluster::GetId
DescriptorId_t GetId() const
Definition: RCluster.hxx:189
ROOT::Experimental::Detail::ROnDiskPageMap
A memory region that contains packed and compressed pages.
Definition: RCluster.hxx:104
ROOT::Experimental::Detail::RCluster::GetNOnDiskPages
size_t GetNOnDiskPages() const
Definition: RCluster.hxx:192