Logo ROOT   master
Reference Guide
RNTupleZip.hxx
Go to the documentation of this file.
1 /// \file ROOT/RNTupleZip.hxx
2 /// \ingroup NTuple ROOT7
3 /// \author Jakob Blomer <jblomer@cern.ch>
4 /// \date 2019-11-21
5 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6 /// is welcome!
7 
8 /*************************************************************************
9  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10  * All rights reserved. *
11  * *
12  * For the licensing terms see $ROOTSYS/LICENSE. *
13  * For the list of contributors see $ROOTSYS/README/CREDITS. *
14  *************************************************************************/
15 
16 #ifndef ROOT7_RNTupleZip
17 #define ROOT7_RNTupleZip
18 
19 #include <RZip.h>
20 #include <TError.h>
21 
22 #include <algorithm>
23 #include <array>
24 #include <cstring>
25 #include <functional>
26 #include <memory>
27 #include <utility>
28 
29 namespace ROOT {
30 namespace Experimental {
31 namespace Detail {
32 
33 // clang-format off
34 /**
35 \class ROOT::Experimental::Detail::RNTupleCompressor
36 \ingroup NTuple
37 \brief Helper class to compress data blocks in the ROOT compression frame format
38 */
39 // clang-format on
41 private:
42  using Buffer_t = std::array<unsigned char, kMAXZIPBUF>;
43  std::unique_ptr<Buffer_t> fZipBuffer;
44 
45 public:
46  /// Data might be overwritten, if a zipped block in the middle of a large input data stream
47  /// turns out to be uncompressible
48  using Writer_t = std::function<void(const void *buffer, size_t nbytes, size_t offset)>;
49  static constexpr size_t kMaxSingleBlock = kMAXZIPBUF;
50 
51  RNTupleCompressor() : fZipBuffer(std::unique_ptr<Buffer_t>(new Buffer_t())) {}
52  RNTupleCompressor(const RNTupleCompressor &other) = delete;
53  RNTupleCompressor &operator =(const RNTupleCompressor &other) = delete;
54 
55  /// Returns the size of the compressed data. Data is compressed in 16MB blocks and written
56  /// piecewise using the provided writer
57  size_t operator() (const void *from, size_t nbytes, int compression, Writer_t fnWriter) {
58  R__ASSERT(from != nullptr);
59 
60  auto cxLevel = compression % 100;
61  if (cxLevel == 0) {
62  fnWriter(from, nbytes, 0);
63  return nbytes;
64  }
65 
66  auto cxAlgorithm = static_cast<ROOT::RCompressionSetting::EAlgorithm::EValues>(compression / 100);
67  unsigned int nZipBlocks = 1 + (nbytes - 1) / kMAXZIPBUF;
68  char *source = const_cast<char *>(static_cast<const char *>(from));
69  int szTarget = kMAXZIPBUF;
70  char *target = reinterpret_cast<char *>(fZipBuffer->data());
71  int szOutBlock = 0;
72  int szRemaining = nbytes;
73  size_t szZipData = 0;
74  for (unsigned int i = 0; i < nZipBlocks; ++i) {
75  int szSource = std::min(static_cast<int>(kMAXZIPBUF), szRemaining);
76  R__zipMultipleAlgorithm(cxLevel, &szSource, source, &szTarget, target, &szOutBlock, cxAlgorithm);
77  R__ASSERT(szOutBlock >= 0);
78  if ((szOutBlock == 0) || (szOutBlock >= szSource)) {
79  // Uncompressible block, we have to store the entire input data stream uncompressed
80  fnWriter(from, nbytes, 0);
81  return nbytes;
82  }
83 
84  fnWriter(target, szOutBlock, szZipData);
85  szZipData += szOutBlock;
86  source += szSource;
87  szRemaining -= szSource;
88  }
89  R__ASSERT(szRemaining == 0);
90  R__ASSERT(szZipData < nbytes);
91  return szZipData;
92  }
93 
94  /// Returns the size of the compressed data block. The data is written into the zip buffer.
95  /// This works only for small input buffer up to 16MB
96  size_t operator() (const void *from, size_t nbytes, int compression) {
97  R__ASSERT(from != nullptr);
98  R__ASSERT(nbytes <= kMAXZIPBUF);
99 
100  auto cxLevel = compression % 100;
101  if (cxLevel == 0) {
102  memcpy(fZipBuffer->data(), from, nbytes);
103  return nbytes;
104  }
105 
106  auto cxAlgorithm = static_cast<ROOT::RCompressionSetting::EAlgorithm::EValues>(compression / 100);
107  int szSource = nbytes;
108  char *source = const_cast<char *>(static_cast<const char *>(from));
109  int szTarget = nbytes;
110  char *target = reinterpret_cast<char *>(fZipBuffer->data());
111  int szOut = 0;
112  R__zipMultipleAlgorithm(cxLevel, &szSource, source, &szTarget, target, &szOut, cxAlgorithm);
113  R__ASSERT(szOut >= 0);
114  if ((szOut > 0) && (static_cast<unsigned int>(szOut) < nbytes))
115  return szOut;
116 
117  memcpy(fZipBuffer->data(), from, nbytes);
118  return nbytes;
119  }
120 
121  const void *GetZipBuffer() { return fZipBuffer->data(); }
122 };
123 
124 
125 // clang-format off
126 /**
127 \class ROOT::Experimental::Detail::RNTupleDecompressor
128 \ingroup NTuple
129 \brief Helper class to uncompress data blocks in the ROOT compression frame format
130 */
131 // clang-format on
133 private:
134  using Buffer_t = std::array<unsigned char, kMAXZIPBUF>;
135  std::unique_ptr<Buffer_t> fUnzipBuffer;
136 
137 public:
139  RNTupleDecompressor(const RNTupleDecompressor &other) = delete;
140  RNTupleDecompressor &operator =(const RNTupleDecompressor &other) = delete;
141 
142  /**
143  * The nbytes parameter provides the size ls of the from buffer. The dataLen gives the size of the uncompressed data.
144  * The block is uncompressed iff nbytes == dataLen.
145  */
146  void operator() (const void *from, size_t nbytes, size_t dataLen, void *to) {
147  if (dataLen == nbytes) {
148  memcpy(to, from, nbytes);
149  return;
150  }
151  R__ASSERT(dataLen > nbytes);
152 
153  unsigned char *source = const_cast<unsigned char *>(static_cast<const unsigned char *>(from));
154  unsigned char *target = static_cast<unsigned char *>(to);
155  int szRemaining = dataLen;
156  do {
157  int szSource;
158  int szTarget;
159  int retval = R__unzip_header(&szSource, source, &szTarget);
160  R__ASSERT(retval == 0);
161  R__ASSERT(szSource > 0);
162  R__ASSERT(szTarget > szSource);
163  R__ASSERT(static_cast<unsigned int>(szSource) <= nbytes);
164  R__ASSERT(static_cast<unsigned int>(szTarget) <= dataLen);
165 
166  int unzipBytes = 0;
167  R__unzip(&szSource, source, &szTarget, target, &unzipBytes);
168  R__ASSERT(unzipBytes == szTarget);
169 
170  target += szTarget;
171  source += szSource;
172  szRemaining -= unzipBytes;
173  } while (szRemaining > 0);
174  R__ASSERT(szRemaining == 0);
175  }
176 
177  /**
178  * In-place decompression via unzip buffer
179  */
180  void operator() (void *fromto, size_t nbytes, size_t dataLen) {
181  R__ASSERT(dataLen <= kMAXZIPBUF);
182  operator()(fromto, nbytes, dataLen, fUnzipBuffer->data());
183  memcpy(fromto, fUnzipBuffer->data(), dataLen);
184  }
185 };
186 
187 } // namespace Detail
188 } // namespace Experimental
189 } // namespace ROOT
190 
191 #endif
RNTupleDecompressor & operator=(const RNTupleDecompressor &other)=delete
RNTupleCompressor & operator=(const RNTupleCompressor &other)=delete
Returns the available number of logical cores.
Definition: RNumpyDS.hxx:30
#define R__ASSERT(e)
Definition: TError.h:96
std::array< unsigned char, kMAXZIPBUF > Buffer_t
Definition: RNTupleZip.hxx:42
STL namespace.
std::array< unsigned char, kMAXZIPBUF > Buffer_t
Definition: RNTupleZip.hxx:134
size_t operator()(const void *from, size_t nbytes, int compression, Writer_t fnWriter)
Returns the size of the compressed data.
Definition: RNTupleZip.hxx:57
Helper class to uncompress data blocks in the ROOT compression frame format
Definition: RNTupleZip.hxx:132
std::function< void(const void *buffer, size_t nbytes, size_t offset)> Writer_t
Data might be overwritten, if a zipped block in the middle of a large input data stream turns out to ...
Definition: RNTupleZip.hxx:48
int R__unzip_header(Int_t *nin, UChar_t *bufin, Int_t *lout)
EValues
Note: this is only temporarily a struct and will become a enum class hence the name.
Definition: Compression.h:83
Helper class to compress data blocks in the ROOT compression frame format
Definition: RNTupleZip.hxx:40
void R__unzip(Int_t *nin, UChar_t *bufin, Int_t *lout, char *bufout, Int_t *nout)
std::unique_ptr< Buffer_t > fZipBuffer
Definition: RNTupleZip.hxx:43
void operator()(const void *from, size_t nbytes, size_t dataLen, void *to)
The nbytes parameter provides the size ls of the from buffer.
Definition: RNTupleZip.hxx:146