387 unsigned int n =
data.Size();
388 nPoints =
data.Size();
401 Error(
"FitUtil::EvaluateChi2",
"The vectorized implementation doesn't support Integrals, BinVolume or ExpErrors\n. Aborting operation.");
405 double maxResValue = std::numeric_limits<double>::max() /
n;
406 std::vector<double> ones{1., 1., 1., 1.};
407 auto vecSize = vecCore::VectorSize<T>();
409 auto mapFunction = [&](
unsigned int i) {
411 T
x1,
y, invErrorVec;
412 vecCore::Load<T>(
x1,
data.GetCoordComponent(
i * vecSize, 0));
413 vecCore::Load<T>(
y,
data.ValuePtr(
i * vecSize));
414 const auto invError =
data.ErrorPtr(
i * vecSize);
415 auto invErrorptr = (invError !=
nullptr) ? invError : &ones.front();
416 vecCore::Load<T>(invErrorVec, invErrorptr);
420 if(
data.NDim() > 1) {
421 xc.resize(
data.NDim());
423 for (
unsigned int j = 1; j <
data.NDim(); ++j)
424 vecCore::Load<T>(xc[j],
data.GetCoordComponent(
i * vecSize, j));
438 T tmp = (
y - fval) * invErrorVec;
443 auto m = vecCore::Mask_v<T>(chi2 > maxResValue);
445 vecCore::MaskedAssign<T>(chi2,
m, maxResValue);
450 auto redFunction = [](
const std::vector<T> &objs) {
451 return std::accumulate(objs.begin(), objs.end(), T{});
459 Warning(
"FitUtil::EvaluateChi2",
"Multithread execution policy requires IMT, which is disabled. Changing "
460 "to ::ROOT::EExecutionPolicy::kSequential.");
476 Error(
"FitUtil::EvaluateChi2",
"Execution policy unknown. Available choices:\n ::ROOT::EExecutionPolicy::kSequential (default)\n ::ROOT::EExecutionPolicy::kMultiThread (requires IMT)\n");
480 if (
data.Size() % vecSize != 0)
481 vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(
data.Size() % vecSize),
482 res + mapFunction(
data.Size() / vecSize));
484 return vecCore::ReduceAdd(res);
488 int iWeight,
bool extended,
unsigned int &nPoints,
492 unsigned int n =
data.Size();
493 nPoints =
data.Size();
496 bool normalizeFunc =
false;
506 if (!normalizeFunc) {
507 if (
data.NDim() == 1) {
509 vecCore::Load<T>(
x,
data.GetCoordComponent(0, 0));
513 std::vector<T>
x(
data.NDim());
514 for (
unsigned int j = 0; j <
data.NDim(); ++j)
515 vecCore::Load<T>(
x[j],
data.GetCoordComponent(0, j));
525 std::vector<double>
xmin(
data.NDim());
526 std::vector<double>
xmax(
data.NDim());
529 if (
data.Range().Size() > 0) {
531 for (
unsigned int ir = 0; ir <
data.Range().Size(); ++ir) {
540 vecCore::Load<T>(xmin_v,
xmin.data());
541 vecCore::Load<T>(xmax_v,
xmax.data());
542 if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
543 MATH_ERROR_MSG(
"FitUtil::EvaluateLogLikelihood",
"A range has not been set and the function is not zero at +/- inf");
552 auto vecSize = vecCore::VectorSize<T>();
553 unsigned int numVectors =
n / vecSize;
555 auto mapFunction = [ &, p](
const unsigned i) {
563 vecCore::Load<T>(
x1,
data.GetCoordComponent(
i * vecSize, 0));
564 const T *
x =
nullptr;
565 unsigned int ndim =
data.NDim();
570 for (
unsigned int j = 1; j < ndim; ++j)
571 vecCore::Load<T>(xc[j],
data.GetCoordComponent(
i * vecSize, j));
584 if (
i < 5 || (
i > numVectors-5) ) {
585 if (ndim == 1) std::cout <<
i <<
" x " <<
x[0] <<
" fval = " << fval;
586 else std::cout <<
i <<
" x " <<
x[0] <<
" y " <<
x[1] <<
" fval = " << fval;
590 if (normalizeFunc) fval = fval * (1 / norm);
596 if (
data.WeightsPtr(
i) ==
nullptr)
599 vecCore::Load<T>(weight,
data.WeightsPtr(
i*vecSize));
606 W2 = weight * weight;
611 if (
i < 5 || (
i > numVectors-5)) {
612 std::cout <<
" " << fval <<
" logfval " << logval << std::endl;
619 auto redFunction = [](
const std::vector<LikelihoodAux<T>> &objs) {
631 Warning(
"FitUtil::EvaluateLogL",
"Multithread execution policy requires IMT, which is disabled. Changing "
632 "to ::ROOT::EExecutionPolicy::kSequential.");
651 Error(
"FitUtil::EvaluateLogL",
"Execution policy unknown. Available choices:\n ::ROOT::EExecutionPolicy::kSequential (default)\n ::ROOT::EExecutionPolicy::kMultiThread (requires IMT)\n");
659 unsigned int remainingPoints =
n % vecSize;
660 if (remainingPoints > 0) {
661 auto remainingPointsContribution = mapFunction(numVectors);
663 auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
664 vecCore::MaskedAssign(logl_v, remainingMask, logl_v + remainingPointsContribution.logvalue);
665 vecCore::MaskedAssign(sumW_v, remainingMask, sumW_v + remainingPointsContribution.weight);
666 vecCore::MaskedAssign(sumW2_v, remainingMask, sumW2_v + remainingPointsContribution.weight2);
671 double logl = vecCore::ReduceAdd(logl_v);
672 double sumW = vecCore::ReduceAdd(sumW_v);
673 double sumW2 = vecCore::ReduceAdd(sumW2_v);
677 double extendedTerm = 0;
681 if (!normalizeFunc) {
683 std::vector<double>
xmin(
data.NDim());
684 std::vector<double>
xmax(
data.NDim());
687 if (
data.Range().Size() > 0) {
689 for (
unsigned int ir = 0; ir <
data.Range().Size(); ++ir) {
698 vecCore::Load<T>(xmin_v,
xmin.data());
699 vecCore::Load<T>(xmax_v,
xmax.data());
700 if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
701 MATH_ERROR_MSG(
"FitUtil::EvaluateLogLikelihood",
"A range has not been set and the function is not zero at +/- inf");
710 extendedTerm = - nuTot;
714 extendedTerm = - (sumW2 / sumW) * nuTot;
723 logl += extendedTerm;
727 std::cout <<
"Evaluated log L for parameters (";
728 for (
unsigned int ip = 0; ip < func.
NPar(); ++ip)
729 std::cout <<
" " << p[ip];
730 std::cout <<
") nll = " << -logl << std::endl;
738 int iWeight,
bool extended,
unsigned int,
759 auto vecSize = vecCore::VectorSize<T>();
763 Error(
"FitUtil::EvaluateChi2",
764 "The vectorized implementation doesn't support Integrals or BinVolume\n. Aborting operation.");
765 bool useW2 = (iWeight == 2);
767 auto mapFunction = [&](
unsigned int i) {
769 vecCore::Load<T>(
y,
data.ValuePtr(
i * vecSize));
772 if (
data.NDim() > 1) {
773 std::vector<T>
x(
data.NDim());
774 for (
unsigned int j = 0; j <
data.NDim(); ++j)
775 vecCore::Load<T>(
x[j],
data.GetCoordComponent(
i * vecSize, j));
777 fval = func(
x.data());
779 fval = func(
x.data(), p);
784 vecCore::Load<T>(
x,
data.GetCoordComponent(
i * vecSize, 0));
794 vecCore::MaskedAssign<T>(fval, fval < 0.0, 0.0);
806 vecCore::Load<T>(error,
data.ErrorPtr(
i * vecSize));
808 auto m = vecCore::Mask_v<T>(
y != 0.0);
809 auto weight = vecCore::Blend(
m,(error * error) /
y, T(
data.SumOfError2()/
data.SumOfContent()) );
811 nloglike = weight * ( fval -
y);
820 if (extended) nloglike = fval -
y;
822 vecCore::MaskedAssign<T>(
830 auto redFunction = [](
const std::vector<T> &objs) {
return std::accumulate(objs.begin(), objs.end(), T{}); };
836 Warning(
"FitUtil::Evaluate<T>::EvalPoissonLogL",
837 "Multithread execution policy requires IMT, which is disabled. Changing "
838 "to ::ROOT::EExecutionPolicy::kSequential.");
845 for (
unsigned int i = 0;
i < (
data.Size() / vecSize);
i++) {
846 res += mapFunction(
i);
856 "FitUtil::Evaluate<T>::EvalPoissonLogL",
857 "Execution policy unknown. Available choices:\n ::ROOT::EExecutionPolicy::kSequential (default)\n ::ROOT::EExecutionPolicy::kMultiThread (requires IMT)\n");
861 if (
data.Size() % vecSize != 0)
862 vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(
data.Size() % vecSize),
863 res + mapFunction(
data.Size() / vecSize));
865 return vecCore::ReduceAdd(res);
870 Error(
"FitUtil::Evaluate<T>::EvalChi2Effective",
"The vectorized evaluation of the Chi2 with coordinate errors is still not supported");
877 static vecCore::Mask<T> CheckInfNaNValues(T &rval)
879 auto mask = rval > -vecCore::NumericLimits<T>::Max() && rval < vecCore::NumericLimits<T>::Max();
882 vecCore::MaskedAssign(rval, !
mask, +vecCore::NumericLimits<T>::Max());
885 vecCore::MaskedAssign(rval, !
mask && rval < 0, -vecCore::NumericLimits<T>::Max());
891 unsigned int &nPoints,
893 unsigned nChunks = 0)
901 if (
data.HaveCoordErrors()) {
903 "Error on the coordinates are not used in calculating Chi2 gradient");
908 assert(fg !=
nullptr);
914 Error(
"FitUtil::EvaluateChi2Gradient",
"The vectorized implementation doesn't support Integrals,"
915 "BinVolume or ExpErrors\n. Aborting operation.");
917 unsigned int npar = func.
NPar();
918 auto vecSize = vecCore::VectorSize<T>();
919 unsigned initialNPoints =
data.Size();
920 unsigned numVectors = initialNPoints / vecSize;
923 std::vector<vecCore::Mask<T>> validPointsMasks(numVectors + 1);
925 auto mapFunction = [&](
const unsigned int i) {
927 std::vector<T> gradFunc(npar);
928 std::vector<T> pointContributionVec(npar);
932 vecCore::Load<T>(
x1,
data.GetCoordComponent(
i * vecSize, 0));
933 vecCore::Load<T>(
y,
data.ValuePtr(
i * vecSize));
934 const auto invErrorPtr =
data.ErrorPtr(
i * vecSize);
936 if (invErrorPtr ==
nullptr)
939 vecCore::Load<T>(invError, invErrorPtr);
945 const T *
x =
nullptr;
947 unsigned int ndim =
data.NDim();
954 for (
unsigned int j = 1; j < ndim; ++j)
955 vecCore::Load<T>(xc[j],
data.GetCoordComponent(
i * vecSize, j));
964 validPointsMasks[
i] = CheckInfNaNValues(fval);
965 if (vecCore::MaskEmpty(validPointsMasks[
i])) {
967 return pointContributionVec;
971 for (
unsigned int ipar = 0; ipar < npar; ++ipar) {
974 validPointsMasks[
i] = CheckInfNaNValues(gradFunc[ipar]);
976 if (vecCore::MaskEmpty(validPointsMasks[
i])) {
981 vecCore::MaskedAssign(pointContributionVec[ipar], validPointsMasks[
i],
982 -2.0 * (
y - fval) * invError * invError * gradFunc[ipar]);
985 return pointContributionVec;
989 auto redFunction = [&](
const std::vector<std::vector<T>> &partialResults) {
990 std::vector<T>
result(npar);
992 for (
auto const &pointContributionVec : partialResults) {
993 for (
unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
994 result[parameterIndex] += pointContributionVec[parameterIndex];
1000 std::vector<T> gVec(npar);
1001 std::vector<double>
g(npar);
1009 Warning(
"FitUtil::EvaluateChi2Gradient",
1010 "Multithread execution policy requires IMT, which is disabled. Changing "
1011 "to ::ROOT::EExecutionPolicy::kSequential.");
1029 "FitUtil::EvaluateChi2Gradient",
1030 "Execution policy unknown. Available choices:\n 0: Serial (default)\n 1: MultiThread (requires IMT)\n");
1034 unsigned int remainingPoints = initialNPoints % vecSize;
1035 if (remainingPoints > 0) {
1036 auto remainingPointsContribution = mapFunction(numVectors);
1038 auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
1039 for (
unsigned int param = 0; param < npar; param++) {
1040 vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
1044 for (
unsigned int param = 0; param < npar; param++) {
1045 grad[param] = vecCore::ReduceAdd(gVec[param]);
1049 nPoints = initialNPoints;
1051 if (std::any_of(validPointsMasks.begin(), validPointsMasks.end(),
1052 [](vecCore::Mask<T> validPoints) { return !vecCore::MaskFull(validPoints); })) {
1053 unsigned nRejected = 0;
1055 for (
const auto &
mask : validPointsMasks) {
1056 for (
unsigned int i = 0;
i < vecSize;
i++) {
1057 nRejected += !vecCore::Get(
mask,
i);
1061 assert(nRejected <= initialNPoints);
1062 nPoints = initialNPoints - nRejected;
1064 if (nPoints < npar) {
1066 "Too many points rejected for overflow in gradient calculation");
1073 Error(
"FitUtil::Evaluate<T>::EvalChi2Residual",
"The vectorized evaluation of the Chi2 with the ith residual is still not supported");
1080 Error(
"FitUtil::Evaluate<T>::EvaluatePoissonBinPdf",
"The vectorized evaluation of the BinnedLikelihood fit evaluated point by point is still not supported");
1085 Error(
"FitUtil::Evaluate<T>::EvalPdf",
"The vectorized evaluation of the LogLikelihood fit evaluated point by point is still not supported");
1103 unsigned nChunks = 0)
1108 assert(fg !=
nullptr);
1117 Error(
"FitUtil::EvaluatePoissonLogLGradient",
"The vectorized implementation doesn't support Integrals,"
1118 "BinVolume or ExpErrors\n. Aborting operation.");
1120 unsigned int npar = func.
NPar();
1121 auto vecSize = vecCore::VectorSize<T>();
1122 unsigned initialNPoints =
data.Size();
1123 unsigned numVectors = initialNPoints / vecSize;
1125 auto mapFunction = [&](
const unsigned int i) {
1127 std::vector<T> gradFunc(npar);
1128 std::vector<T> pointContributionVec(npar);
1132 vecCore::Load<T>(
x1,
data.GetCoordComponent(
i * vecSize, 0));
1133 vecCore::Load<T>(
y,
data.ValuePtr(
i * vecSize));
1137 const T *
x =
nullptr;
1139 unsigned ndim =
data.NDim();
1144 for (
unsigned int j = 1; j < ndim; ++j)
1145 vecCore::Load<T>(xc[j],
data.GetCoordComponent(
i * vecSize, j));
1155 for (
unsigned int ipar = 0; ipar < npar; ++ipar) {
1156 vecCore::Mask<T> positiveValuesMask = fval > 0;
1159 vecCore::MaskedAssign(pointContributionVec[ipar], positiveValuesMask, gradFunc[ipar] * (1. -
y / fval));
1161 vecCore::Mask<T> validNegativeValuesMask = !positiveValuesMask && gradFunc[ipar] != 0;
1163 if (!vecCore::MaskEmpty(validNegativeValuesMask)) {
1164 const T kdmax1 = vecCore::math::Sqrt(vecCore::NumericLimits<T>::Max());
1165 const T kdmax2 = vecCore::NumericLimits<T>::Max() / (4 * initialNPoints);
1166 T gg = kdmax1 * gradFunc[ipar];
1167 pointContributionVec[ipar] = -vecCore::Blend(gg > 0, vecCore::math::Min(gg, kdmax2), vecCore::math::Max(gg, -kdmax2));
1174 if (
i < 5 || (
i >
data.Size()-5) ) {
1175 if (
data.NDim() > 1) std::cout <<
i <<
" x " <<
x[0] <<
" y " <<
x[1];
1176 else std::cout <<
i <<
" x " <<
x[0];
1177 std::cout <<
" func " << fval <<
" gradient ";
1178 for (
unsigned int ii = 0; ii < npar; ++ii) std::cout <<
" " << pointContributionVec[ii];
1184 return pointContributionVec;
1188 auto redFunction = [&](
const std::vector<std::vector<T>> &partialResults) {
1189 std::vector<T>
result(npar);
1191 for (
auto const &pointContributionVec : partialResults) {
1192 for (
unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
1193 result[parameterIndex] += pointContributionVec[parameterIndex];
1199 std::vector<T> gVec(npar);
1207 Warning(
"FitUtil::EvaluatePoissonLogLGradient",
1208 "Multithread execution policy requires IMT, which is disabled. Changing "
1209 "to ::ROOT::EExecutionPolicy::kSequential.");
1226 Error(
"FitUtil::EvaluatePoissonLogLGradient",
"Execution policy unknown. Available choices:\n "
1227 "::ROOT::EExecutionPolicy::kSequential (default)\n "
1228 "::ROOT::EExecutionPolicy::kMultiThread (requires IMT)\n");
1233 unsigned int remainingPoints = initialNPoints % vecSize;
1234 if (remainingPoints > 0) {
1235 auto remainingPointsContribution = mapFunction(numVectors);
1237 auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
1238 for (
unsigned int param = 0; param < npar; param++) {
1239 vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
1243 for (
unsigned int param = 0; param < npar; param++) {
1244 grad[param] = vecCore::ReduceAdd(gVec[param]);
1248 std::cout <<
"***** Final gradient : ";
1249 for (
unsigned int ii = 0; ii< npar; ++ii) std::cout << grad[ii] <<
" ";
1256 double *grad,
unsigned int &,
1258 unsigned nChunks = 0)
1263 assert(fg !=
nullptr);
1268 unsigned int npar = func.
NPar();
1269 auto vecSize = vecCore::VectorSize<T>();
1270 unsigned initialNPoints =
data.Size();
1271 unsigned numVectors = initialNPoints / vecSize;
1274 std::cout <<
"\n===> Evaluate Gradient for parameters ";
1275 for (
unsigned int ip = 0; ip < npar; ++ip)
1276 std::cout <<
" " << p[ip];
1282 const T kdmax1 = vecCore::math::Sqrt(vecCore::NumericLimits<T>::Max());
1283 const T kdmax2 = vecCore::NumericLimits<T>::Max() / (4 * initialNPoints);
1285 auto mapFunction = [&](
const unsigned int i) {
1286 std::vector<T> gradFunc(npar);
1287 std::vector<T> pointContributionVec(npar);
1290 vecCore::Load<T>(
x1,
data.GetCoordComponent(
i * vecSize, 0));
1292 const T *
x =
nullptr;
1294 unsigned int ndim =
data.NDim();
1295 std::vector<T> xc(ndim);
1299 for (
unsigned int j = 1; j < ndim; ++j)
1300 vecCore::Load<T>(xc[j],
data.GetCoordComponent(
i * vecSize, j));
1307 T fval = func(
x, p);
1311 if (
i < 5 || (
i > numVectors-5) ) {
1312 if (ndim > 1) std::cout <<
i <<
" x " <<
x[0] <<
" y " <<
x[1] <<
" gradient " << gradFunc[0] <<
" " << gradFunc[1] <<
" " << gradFunc[3] << std::endl;
1313 else std::cout <<
i <<
" x " <<
x[0] <<
" gradient " << gradFunc[0] <<
" " << gradFunc[1] <<
" " << gradFunc[3] << std::endl;
1317 vecCore::Mask<T> positiveValues = fval > 0;
1319 for (
unsigned int kpar = 0; kpar < npar; ++kpar) {
1320 if (!vecCore::MaskEmpty(positiveValues))
1321 vecCore::MaskedAssign<T>(pointContributionVec[kpar], positiveValues, -1. / fval * gradFunc[kpar]);
1323 vecCore::Mask<T> nonZeroGradientValues = !positiveValues && gradFunc[kpar] != 0;
1324 if (!vecCore::MaskEmpty(nonZeroGradientValues)) {
1325 T gg = kdmax1 * gradFunc[kpar];
1326 pointContributionVec[kpar] =
1327 vecCore::Blend(nonZeroGradientValues && gg > 0, -vecCore::math::Min(gg, kdmax2),
1328 -vecCore::math::Max(gg, -kdmax2));
1333 return pointContributionVec;
1337 auto redFunction = [&](
const std::vector<std::vector<T>> &pointContributions) {
1338 std::vector<T>
result(npar);
1340 for (
auto const &pointContributionVec : pointContributions) {
1341 for (
unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
1342 result[parameterIndex] += pointContributionVec[parameterIndex];
1348 std::vector<T> gVec(npar);
1349 std::vector<double>
g(npar);
1357 Warning(
"FitUtil::EvaluateLogLGradient",
1358 "Multithread execution policy requires IMT, which is disabled. Changing "
1359 "to ::ROOT::EExecutionPolicy::kSequential.");
1376 Error(
"FitUtil::EvaluateLogLGradient",
"Execution policy unknown. Available choices:\n "
1377 "::ROOT::EExecutionPolicy::kSequential (default)\n "
1378 "::ROOT::EExecutionPolicy::kMultiThread (requires IMT)\n");
1382 unsigned int remainingPoints = initialNPoints % vecSize;
1383 if (remainingPoints > 0) {
1384 auto remainingPointsContribution = mapFunction(numVectors);
1386 auto remainingMask = vecCore::Int2Mask<T>(initialNPoints % vecSize);
1387 for (
unsigned int param = 0; param < npar; param++) {
1388 vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
1392 for (
unsigned int param = 0; param < npar; param++) {
1393 grad[param] = vecCore::ReduceAdd(gVec[param]);
1397 std::cout <<
"Final gradient ";
1398 for (
unsigned int param = 0; param < npar; param++) {
1399 std::cout <<
" " << grad[param];
1425 int iWeight,
bool extended,
unsigned int &nPoints,
1432 int iWeight,
bool extended,
unsigned int &nPoints,
1443 double *
g,
unsigned int &nPoints,
1445 unsigned nChunks = 0)
1451 bool hasGrad,
bool fullHessian)
1468 unsigned int &nPoints,
1470 unsigned nChunks = 0)
1476 double *
g,
unsigned int &nPoints,
1478 unsigned nChunks = 0)