1// Author: Enric Tejedor CERN 10/2017
4 * Copyright (C) 1995-2017, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
11#ifndef ROOT_RCSVTDS
12#define ROOT_RCSVTDS
14#include "ROOT/RDataFrame.hxx"
15#include "ROOT/RDataSource.hxx"
17#include <cstdint>
18#include <deque>
19#include <list>
20#include <map>
21#include <memory>
22#include <vector>
24#include <TRegexp.h>
26namespace ROOT {
28namespace Internal {
29class RRawFile;
32namespace RDF {
34class RCsvDS final : public ROOT::RDF::RDataSource {
37 // Possible values are d, b, l, s. This is possible only because we treat double, bool, Long64_t and string
38 using ColType_t = char;
39 static const std::map<ColType_t, std::string> fgColTypeMap;
41 // Regular expressions for type inference
44 std::uint64_t fDataPos = 0;
45 bool fReadHeaders = false;
46 unsigned int fNSlots = 0U;
47 std::unique_ptr<ROOT::Internal::RRawFile> fCsvFile;
48 const char fDelimiter;
51 ULong64_t fProcessedLines = 0ULL; // marks the progress of the consumption of the csv lines
52 std::vector<std::string> fHeaders;
53 std::map<std::string, ColType_t> fColTypes;
54 std::list<ColType_t> fColTypesList;
55 std::vector<std::vector<void *>> fColAddresses; // fColAddresses[column][slot]
56 std::vector<Record_t> fRecords; // fRecords[entry][column]
57 std::vector<std::vector<double>> fDoubleEvtValues; // one per column per slot
58 std::vector<std::vector<Long64_t>> fLong64EvtValues; // one per column per slot
59 std::vector<std::vector<std::string>> fStringEvtValues; // one per column per slot
60 // This must be a deque to avoid the specialisation vector<bool>. This would not
61 // work given that the pointer to the boolean in that case cannot be taken
62 std::vector<std::deque<bool>> fBoolEvtValues; // one per column per slot
64 void FillHeaders(const std::string &);
65 void FillRecord(const std::string &, Record_t &);
66 void GenerateHeaders(size_t);
67 std::vector<void *> GetColumnReadersImpl(std::string_view, const std::type_info &);
68 void InferColTypes(std::vector<std::string> &);
69 void InferType(const std::string &, unsigned int);
70 std::vector<std::string> ParseColumns(const std::string &);
71 size_t ParseValue(const std::string &, std::vector<std::string> &, size_t);
72 ColType_t GetType(std::string_view colName) const;
75 std::string AsString();
78 RCsvDS(std::string_view fileName, bool readHeaders = true, char delimiter = ',', Long64_t linesChunkSize = -1LL);
79 void Finalise();
80 void FreeRecords();
81 ~RCsvDS();
82 const std::vector<std::string> &GetColumnNames() const;
83 std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges();
84 std::string GetTypeName(std::string_view colName) const;
85 bool HasColumn(std::string_view colName) const;
86 bool SetEntry(unsigned int slot, ULong64_t entry);
87 void SetNSlots(unsigned int nSlots);
88 std::string GetLabel();
92/// \brief Factory method to create a CSV RDataFrame.
93/// \param[in] fileName Path of the CSV file.
94/// \param[in] readHeaders `true` if the CSV file contains headers as first row, `false` otherwise
95/// (default `true`).
96/// \param[in] delimiter Delimiter character (default ',').
97RDataFrame MakeCsvDataFrame(std::string_view fileName, bool readHeaders = true, char delimiter = ',',
98 Long64_t linesChunkSize = -1LL);
100} // ns RDF
102} // ns ROOT
