30ROOT::RLogChannel &RNTupleExporterLog()
32 static RLogChannel sLog(
"ROOT.RNTupleExporter");
36struct RColumnExportInfo {
37 const ROOT::RColumnDescriptor *fColDesc;
38 const ROOT::RFieldDescriptor *fFieldDesc;
39 std::string fQualifiedName;
41 RColumnExportInfo(
const ROOT::RNTupleDescriptor &desc,
const ROOT::RColumnDescriptor &colDesc,
42 const ROOT::RFieldDescriptor &fieldDesc)
44 fFieldDesc(&fieldDesc),
47 fQualifiedName(desc.GetQualifiedFieldName(fieldDesc.GetId()) +
'-' + std::to_string(colDesc.GetIndex()))
52struct RAddColumnsResult {
55 RAddColumnsResult &
operator+=(
const RAddColumnsResult &other)
57 fNColsTotal += other.fNColsTotal;
65 bool filterHasType = filter.fSet.find(item) != filter.fSet.end();
70RAddColumnsResult
AddColumnsFromField(std::vector<RColumnExportInfo> &vec,
const ROOT::RNTupleDescriptor &desc,
71 const ROOT::RFieldDescriptor &fieldDesc,
77 RAddColumnsResult res{};
80 if (subfieldDesc.IsProjectedField())
85 bool typeIsFiltered = ItemIsFilteredOut(options.fColumnTypeFilter, colDesc.GetType());
87 vec.emplace_back(desc, colDesc, subfieldDesc);
96int CountPages(
const ROOT::RNTupleDescriptor &desc, std::span<const RColumnExportInfo> columns)
102 for (
const auto &colInfo : columns) {
103 const auto &pages = clusterDesc.GetPageRange(colInfo.fColDesc->GetPhysicalId());
104 nPages += pages.GetPageInfos().size();
128 std::vector<RColumnExportInfo> columnInfos;
133 columnSet.reserve(columnInfos.size());
134 for (
const auto &colInfo : columnInfos) {
135 columnSet.emplace(colInfo.fColDesc->GetPhysicalId());
138 const auto nPages = CountPages(desc.GetRef(), columnInfos);
145 int pagesExported = 0;
146 int prevIntPercent = 0;
147 std::vector<char> unzipBuf;
151 for (
const auto &colInfo : columnInfos) {
152 auto columnId = colInfo.fColDesc->GetPhysicalId();
153 const auto &pages = clusterDesc.GetPageRange(columnId);
154 const auto &colRange = clusterDesc.GetColumnRange(columnId);
156 colElement->SetBitsOnStorage(colInfo.fColDesc->GetBitsOnStorage());
158 std::uint64_t pageIdx = 0;
161 <<
"exporting column \"" << colInfo.fQualifiedName <<
"\" (" << pages.GetPageInfos().size() <<
" pages)";
164 assert(!colRange.IsSuppressed() || pages.GetPageInfos().empty());
166 for (
const auto &pageInfo : pages.GetPageInfos()) {
171 std::ostringstream ss{options.
fOutputPath, std::ios_base::ate};
172 assert(colRange.GetCompressionSettings());
173 ss <<
"/cluster_" << clusterDesc.GetId() <<
"_" << colInfo.fQualifiedName <<
"_page_" << pageIdx
174 <<
"_elems_" << pageInfo.GetNElements() <<
"_comp_" << *colRange.GetCompressionSettings() <<
".page";
175 const auto outFileName = ss.str();
176 std::ofstream outFile{outFileName, std::ios_base::binary};
179 R__FAIL(std::string(
"output path ") + options.
fOutputPath +
" does not exist or is not writable!"));
183 const auto *pageBuf =
static_cast<const char *
>(onDiskPage->
GetAddress());
185 const auto nbytesPacked = colElement->GetPackedSize(pageInfo.GetNElements());
186 const auto nbytesData = pageInfo.GetLocator().GetNBytesOnStorage();
187 if (unzipBuf.size() < nbytesPacked)
188 unzipBuf.resize(nbytesPacked);
190 outFile.write(unzipBuf.data(), nbytesPacked);
192 const bool includeChecksum =
194 const std::size_t maybeChecksumSize = includeChecksum * 8;
195 const auto nbytesData = pageInfo.GetLocator().GetNBytesOnStorage() + maybeChecksumSize;
196 outFile.write(pageBuf, nbytesData);
201 ++pageIdx, ++pagesExported;
204 int intPercent =
static_cast<int>(100.f * pagesExported / res.
fExportedFileNames.size());
205 if (intPercent != prevIntPercent) {
206 fprintf(stderr,
"\rExport progress: %02d%%", intPercent);
207 if (intPercent == 100)
208 fprintf(stderr,
"\n");
209 prevIntPercent = intPercent;
218 std::ostringstream ss;
221 ss << addColRes.fNColsTotal <<
" columns)";
223 auto nColsFilteredOut = addColRes.fNColsTotal - columnInfos.size();
224 ss << nColsFilteredOut <<
"/" << addColRes.fNColsTotal <<
" columns filtered out)";
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
#define R__LOG_DEBUG(DEBUGLEVEL,...)
static void AddColumnsFromField(std::vector< RColumnMergeInfo > &columns, const ROOT::RNTupleDescriptor &srcDesc, RNTupleMergeData &mergeData, const ROOT::RFieldDescriptor &srcFieldDesc, const ROOT::RFieldDescriptor &dstFieldDesc, const std::string &prefix="")
std::string & operator+=(std::string &left, const TString &right)
@ kBlacklist
Don't export items contained in the filter's set.
static RPagesResult ExportPages(ROOT::Internal::RPageSource &source, const RPagesOptions &options={})
Given a page source, writes all its pages to individual files (1 per page).
Managed a set of clusters containing compressed and packed pages.
An in-memory subset of the packed and compressed pages of a cluster.
std::unordered_set< ROOT::DescriptorId_t > ColumnSet_t
const ROnDiskPage * GetOnDiskPage(const ROnDiskPage::Key &key) const
static std::unique_ptr< RColumnElementBase > Generate(ROOT::ENTupleColumnType type)
If CppT == void, use the default C++ type for the given column type.
static void Unzip(const void *from, size_t nbytes, size_t dataLen, void *to)
The nbytes parameter provides the size ls of the from buffer.
A page as being stored on disk, that is packed and compressed.
const void * GetAddress() const
Abstract interface to read data from an ntuple.
void Attach(ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode mode=ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode::kForReading)
Open the physical storage container and deserialize header and footer.
const RSharedDescriptorGuard GetSharedDescriptorGuard() const
Takes the read lock for the descriptor. Multiple threads can take the lock concurrently....
Base class for all ROOT issued exceptions.
ROOT::DescriptorId_t GetId() const
ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
RColumnDescriptorIterable GetColumnIterable() const
ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const
const RClusterDescriptor & GetClusterDescriptor(ROOT::DescriptorId_t clusterId) const
std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
const RFieldDescriptor & GetFieldZero() const
constexpr DescriptorId_t kInvalidDescriptorId
std::unordered_set< T > fSet
RFilter< ENTupleColumnType > fColumnTypeFilter
Optional filter that determines which columns are included or excluded from being exported.
@ kDecompress
If enabled, uncompress (but don't unpack) the page (mutually exclusive with kIncludeChecksums)
@ kShowProgressBar
If enabled, the exporter will report the current progress on the stderr.
std::vector< std::string > fExportedFileNames
On-disk pages within a page source are identified by the column and page number.