113#include "haddCommandLineOptionsHelp.h"
122int main(
int argc,
char **argv )
124 if ( argc < 3 ||
"-h" == std::string(argv[1]) ||
"--help" == std::string(argv[1]) ) {
125 fprintf(stderr, kCommandLineOptionsHelp);
126 return (argc == 2 && (
"-h" == std::string(argv[1]) ||
"--help" == std::string(argv[1]))) ? 0 : 1;
139 Int_t maxopenedfiles = 0;
140 Int_t verbosity = 99;
144 auto nProcesses = s.
fCpus;
149 for(
int a = 1;
a < argc; ++
a ) {
150 if ( strcmp(argv[
a],
"-T") == 0 ) {
153 }
else if ( strcmp(argv[
a],
"-a") == 0 ) {
156 }
else if ( strcmp(argv[
a],
"-f") == 0 ) {
159 }
else if ( strcmp(argv[
a],
"-k") == 0 ) {
162 }
else if ( strcmp(argv[
a],
"-O") == 0 ) {
165 }
else if (strcmp(argv[
a],
"-dbg") == 0) {
169 }
else if (strcmp(argv[
a],
"-d") == 0) {
170 if (
a + 1 != argc && argv[
a + 1][0] !=
'-') {
172 std::cerr <<
"Error: could not access the directory specified: " << argv[
a + 1]
173 <<
". We will use the system's temporal directory.\n";
175 workingDir = argv[
a + 1];
180 std::cout <<
"-d: no directory specified. We will use the system's temporal directory.\n";
183 }
else if (strcmp(argv[
a],
"-j") == 0) {
185 if (
a + 1 != argc && argv[
a + 1][0] !=
'-') {
188 for (
char *
c = argv[
a + 1]; *
c !=
'\0'; ++
c) {
191 std::cerr <<
"Error: could not parse the number of processes to run in parallel passed after -j: "
192 << argv[
a + 1] <<
". We will use the system maximum.\n";
198 request = strtol(argv[
a + 1], 0, 10);
199 if (request < kMaxLong && request >= 0) {
200 nProcesses = (
Int_t)request;
203 std::cout <<
"Parallelizing with " << nProcesses <<
" processes.\n";
205 std::cerr <<
"Error: could not parse the number of processes to use passed after -j: " << argv[
a + 1]
206 <<
". We will use the default value (number of logical cores).\n";
212 }
else if ( strcmp(argv[
a],
"-cachesize=") == 0 ) {
214 static const size_t arglen = strlen(
"-cachesize=");
217 std::cerr <<
"Error: could not parse the cache size passed after -cachesize: "
218 << argv[
a + 1] <<
". We will use the default value.\n";
221 const char *munit =
nullptr;
223 std::cerr <<
"Error: the cache size passed after -cachesize is too large: "
224 << argv[
a + 1] <<
" is greater than " <<
m << munit
225 <<
". We will use the default value.\n";
227 cacheSize =
"cachesize=";
231 }
else if ( strcmp(argv[
a],
"-cachesize") == 0 ) {
233 std::cerr <<
"Error: no cache size number was provided after -cachesize.\n";
238 std::cerr <<
"Error: could not parse the cache size passed after -cachesize: "
239 << argv[
a + 1] <<
". We will use the default value.\n";
242 const char *munit =
nullptr;
244 std::cerr <<
"Error: the cache size passed after -cachesize is too large: "
245 << argv[
a + 1] <<
" is greater than " <<
m << munit
246 <<
". We will use the default value.\n";
250 cacheSize =
"cachesize=";
257 }
else if (!strcmp(argv[
a],
"-experimental-io-features")) {
259 std::cerr <<
"Error: no IO feature was specified after -experimental-io-features; ignoring\n";
261 std::stringstream ss;
265 while (std::getline(ss, item,
',')) {
266 if (!features.
Set(item)) {
267 std::cerr <<
"Ignoring unknown feature request: " << item << std::endl;
272 }
else if ( strcmp(argv[
a],
"-n") == 0 ) {
274 std::cerr <<
"Error: no maximum number of opened was provided after -n.\n";
276 Long_t request = strtol(argv[
a+1], 0, 10);
277 if (request < kMaxLong && request >= 0) {
278 maxopenedfiles = (
Int_t)request;
282 std::cerr <<
"Error: could not parse the max number of opened file passed after -n: " << argv[
a+1] <<
". We will use the system maximum.\n";
286 }
else if ( strcmp(argv[
a],
"-v") == 0 ) {
287 if (
a+1 == argc || argv[
a+1][0] ==
'-') {
294 for (
char *
c = argv[
a+1]; *
c !=
'\0'; ++
c) {
297 hasFollowupNumber =
kFALSE;
301 if (hasFollowupNumber) {
302 Long_t request = strtol(argv[
a+1], 0, 10);
303 if (request < kMaxLong && request >= 0) {
304 verbosity = (
Int_t)request;
309 std::cerr <<
"Error: could not parse the verbosity level passed after -v: " << argv[
a+1] <<
". We will use the default value (99).\n";
314 }
else if ( argv[
a][0] ==
'-' ) {
316 if (force && argv[
a][1] ==
'f') {
318 std::cerr <<
"Error: Using option " << argv[
a] <<
" more than once is not supported.\n";
322 const char *prefix =
"";
323 if (argv[
a][1] ==
'f' && argv[
a][2] ==
'k') {
326 keepCompressionAsIs =
kTRUE;
329 if (argv[
a][1] ==
'f' && argv[
a][2] ==
'f') {
332 useFirstInputCompression =
kTRUE;
333 if (argv[
a][3] !=
'\0') {
334 std::cerr <<
"Error: option -ff should not have any suffix: " << argv[
a] <<
" (suffix has been ignored)\n";
338 for (
int alg = 0; !useFirstInputCompression && alg <= 5; ++alg) {
339 for(
int j=0; j<=9; ++j ) {
340 const int comp = (alg*100)+j;
341 snprintf(ft,7,
"-f%s%d",prefix,comp);
342 if (!strcmp(argv[
a],ft)) {
352 std::cerr <<
"Error: option " << argv[
a] <<
" is not a supported option.\n";
355 }
else if (!outputPlace) {
362 const char *targetname = 0;
364 targetname = argv[outputPlace];
366 targetname = argv[ffirst-1];
370 std::cout <<
"hadd Target file: " << targetname << std::endl;
376 if (maxopenedfiles > 0) {
382 std::vector<std::string> allSubfiles;
383 for (
int a = ffirst;
a < argc; ++
a) {
384 if (
a == outputPlace)
386 if (argv[
a] && argv[
a][0] ==
'@') {
387 std::ifstream indirect_file(argv[
a] + 1);
388 if (!indirect_file.is_open()) {
389 std::cerr <<
"hadd could not open indirect file " << (argv[
a] + 1) << std::endl;
394 while (indirect_file) {
395 if( std::getline(indirect_file,
line) &&
line.length() ) {
397 std::cerr <<
"hadd could not validate the file name \"" <<
line <<
"\" within indirect file "
398 << (argv[
a] + 1) << std::endl;
402 allSubfiles.emplace_back(
line);
407 const std::string
line = argv[
a];
409 std::cerr <<
"hadd could not validate argument \"" <<
line <<
"\" as input file " << std::endl;
413 allSubfiles.emplace_back(
line);
416 if (allSubfiles.empty()) {
417 std::cerr <<
"hadd could not find any valid input file " << std::endl;
422 if (useFirstInputCompression || keepCompressionAsIs) {
425 if (firstInput && !firstInput->
IsZombie())
437 if (keepCompressionAsIs && !reoptimize)
438 std::cout <<
"hadd compression setting for meta data: " << newcomp <<
'\n';
440 std::cout <<
"hadd compression setting for all output: " << newcomp <<
'\n';
443 if (!fileMerger.
OutputFile(targetname,
"UPDATE", newcomp)) {
444 std::cerr <<
"hadd error opening target file for update :" << argv[ffirst-1] <<
"." << std::endl;
447 }
else if (!fileMerger.
OutputFile(targetname, force, newcomp)) {
448 std::cerr <<
"hadd error opening target file (does " << argv[ffirst-1] <<
" exist?)." << std::endl;
449 if (!force) std::cerr <<
"Pass \"-f\" argument to force re-creation of output file." << std::endl;
453 auto step = (allSubfiles.size() + nProcesses - 1) / nProcesses;
454 if (multiproc && step < 3) {
457 nProcesses = (allSubfiles.size() + step - 1) / step;
458 std::cout <<
"Each process should handle at least 3 files for efficiency.";
459 std::cout <<
" Setting the number of processes to: " << nProcesses << std::endl;
464 std::vector<std::string> partialFiles;
472 auto partialTail = uuid.AsString();
473 for (
auto i = 0; (i * step) < allSubfiles.size(); i++) {
474 std::stringstream buffer;
475 buffer << workingDir <<
"/partial" << i <<
"_" << partialTail <<
".root";
476 partialFiles.emplace_back(buffer.str());
483 merger.SetFastMethod(
kFALSE);
485 if (!keepCompressionAsIs && merger.HasCompressionChange()) {
487 std::cout <<
"hadd Sources and Target have different compression settings\n";
488 std::cout <<
"hadd merging will be slower" << std::endl;
491 merger.SetNotrees(noTrees);
492 merger.SetMergeOptions(
TString(merger.GetMergeOptions()) +
" " + cacheSize);
493 merger.SetIOFeatures(features);
498 status = merger.Merge();
502 auto sequentialMerge = [&](
TFileMerger &merger,
int start,
int nFiles) {
503 for (
auto i = start; i < (start + nFiles) && i < static_cast<int>(allSubfiles.size()); i++) {
504 if (!merger.
AddFile(allSubfiles[i].c_str())) {
506 std::cerr <<
"hadd skipping file with error: " << allSubfiles[i] << std::endl;
508 std::cerr <<
"hadd exiting due to error in " << allSubfiles[i] << std::endl;
516 auto parallelMerge = [&](
int start) {
520 if (maxopenedfiles > 0) {
523 if (!mergerP.
OutputFile(partialFiles[start / step].c_str(), newcomp)) {
524 std::cerr <<
"hadd error opening target partial file" << std::endl;
527 return sequentialMerge(mergerP, start, step);
530 auto reductionFunc = [&]() {
531 for (
const auto &pf : partialFiles) {
532 fileMerger.
AddFile(pf.c_str());
542 auto res =
p.Map(parallelMerge,
ROOT::TSeqI(0, allSubfiles.size(), step));
543 status = std::accumulate(res.begin(), res.end(), 0U) == partialFiles.size();
545 status = reductionFunc();
547 std::cout <<
"hadd failed at the parallel stage" << std::endl;
550 for (
const auto &pf : partialFiles) {
555 status = sequentialMerge(fileMerger, 0, allSubfiles.size());
558 status = sequentialMerge(fileMerger, 0, allSubfiles.size());
562 if (verbosity == 1) {
564 <<
") input (partial) files into " << targetname <<
".\n";
568 if (verbosity == 1) {
569 std::cout <<
"hadd failure during the merge of " << allSubfiles.size() <<
" ("
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
winID h TVirtualViewer3D TVirtualGLPainter p
R__EXTERN TSystem * gSystem
TIOFeatures provides the end-user with the ability to change the IO behavior of data written via a TT...
bool Set(EIOFeatures bits)
Set a specific IO feature.
This class provides a simple interface to execute the same task multiple times in parallel,...
A pseudo container class which is a generator of indices.
virtual Int_t GetEntries() const
This class provides file copy and merging services.
virtual Bool_t OutputFile(const char *url, Bool_t force)
Open merger output file.
virtual Bool_t AddFile(TFile *source, Bool_t own, Bool_t cpProgress)
Add the TFile to this file merger and give ownership of the TFile to this object (unless kFALSE is re...
void SetMsgPrefix(const char *prefix)
Set the prefix to be used when printing informational message.
void SetPrintLevel(Int_t level)
@ kAll
Merge all type of objects (default)
@ kIncremental
Merge the input file with the content of the output file (if already existing).
void SetMergeOptions(const TString &options)
void SetMaxOpenedFiles(Int_t newmax)
Set a limit to the number of files that TFileMerger will open simultaneously.
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Int_t GetCompressionSettings() const
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
R__ALWAYS_INLINE Bool_t IsZombie() const
TString & Append(const char *cs)
virtual int GetSysInfo(SysInfo_t *info) const
Returns static system info, like OS type, CPU type, number of CPUs RAM size, etc into the SysInfo_t s...
virtual int Load(const char *module, const char *entry="", Bool_t system=kFALSE)
Load a shared library.
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
virtual int Unlink(const char *name)
Unlink, i.e.
virtual const char * TempDirectory() const
Return a user configured or systemwide directory to create temporary files in.
This class defines a UUID (Universally Unique IDentifier), also known as GUIDs (Globally Unique IDent...
void ToHumanReadableSize(value_type bytes, Bool_t si, Double_t *coeff, const char **units)
Return the size expressed in 'human readable' format.
EFromHumanReadableSize FromHumanReadableSize(std::string_view str, T &value)
Convert strings like the following into byte counts 5MB, 5 MB, 5M, 3.7GB, 123b, 456kB,...
@ kUseCompiledDefault
Use the compile-time default setting.