// @(#)root/base:$Id: TPRegexp.cxx 26723 2008-12-08 10:36:31Z rdm $
// Author: Eddy Offermann   24/06/05

/*************************************************************************
 * Copyright (C) 1995-2005, Rene Brun and Fons Rademakers.               *
 * All rights reserved.                                                  *
 *                                                                       *
 * For the licensing terms see $ROOTSYS/LICENSE.                         *
 * For the list of contributors see $ROOTSYS/README/CREDITS.             *
 *************************************************************************/

//////////////////////////////////////////////////////////////////////////
//                                                                      //
// TPRegexp                                                             //
//                                                                      //
// C++ Wrapper for the "Perl Compatible Regular Expressions" library    //
//  The PCRE lib can be found at:                                       //
//              http://www.pcre.org/                                    //
//                                                                      //
// Extensive documentation about Regular expressions in Perl can be     //
// found at :                                                           //
//              http://perldoc.perl.org/perlre.html                     //
//                                                                      //
//////////////////////////////////////////////////////////////////////////

#include "Riostream.h"
#include "TPRegexp.h"
#include "TObjArray.h"
#include "TObjString.h"
#include "TError.h"

#include <pcre.h>

#include <vector>

struct PCREPriv_t {
   pcre       *fPCRE;
   pcre_extra *fPCREExtra;

   PCREPriv_t() { fPCRE = 0; fPCREExtra = 0; }
};


ClassImp(TPRegexp)

//______________________________________________________________________________
TPRegexp::TPRegexp()
{
   // Default ctor.

   fPriv     = new PCREPriv_t;
   fPCREOpts = 0;
}

//______________________________________________________________________________
TPRegexp::TPRegexp(const TString &pat)
{
   // Create and initialize with pat.

   fPattern  = pat;
   fPriv     = new PCREPriv_t;
   fPCREOpts = 0;
}

//______________________________________________________________________________
TPRegexp::TPRegexp(const TPRegexp &p)
{
   // Copy ctor.

   fPattern  = p.fPattern;
   fPriv     = new PCREPriv_t;
   fPCREOpts = p.fPCREOpts;
}

//______________________________________________________________________________
TPRegexp::~TPRegexp()
{
   // Cleanup.

   if (fPriv->fPCRE)
      pcre_free(fPriv->fPCRE);
   if (fPriv->fPCREExtra)
      pcre_free(fPriv->fPCREExtra);
   delete fPriv;
}

//______________________________________________________________________________
TPRegexp &TPRegexp::operator=(const TPRegexp &p)
{
   // Assignement operator.

   if (this != &p) {
      fPattern = p.fPattern;
      if (fPriv->fPCRE)
         pcre_free(fPriv->fPCRE);
      fPriv->fPCRE = 0;
      if (fPriv->fPCREExtra)
         pcre_free(fPriv->fPCREExtra);
      fPriv->fPCREExtra = 0;
      fPCREOpts  = p.fPCREOpts;
   }
   return *this;
}

//______________________________________________________________________________
UInt_t TPRegexp::ParseMods(const TString &modStr) const
{
   // Translate Perl modifier flags into pcre flags.

   UInt_t opts = 0;

   if (modStr.Length() <= 0)
      return fPCREOpts;

   //translate perl flags into pcre flags
   const char *m = modStr;
   while (*m) {
      switch (*m) {
         case 'g':
            opts |= kPCRE_GLOBAL;
            break;
         case 'i':
            opts |= PCRE_CASELESS;
            break;
         case 'm':
            opts |= PCRE_MULTILINE;
            break;
         case 'o':
            opts |= kPCRE_OPTIMIZE;
            break;
         case 's':
            opts |= PCRE_DOTALL;
            break;
         case 'x':
            opts |= PCRE_EXTENDED;
            break;
         case 'd': // special flag to enable debug printing (not Perl compat.)
            opts |= kPCRE_DEBUG_MSGS;
            break;
         default:
            Error("ParseMods", "illegal pattern modifier: %c", *m);
	    opts = 0;
      }
      ++m;
   }
   return opts;
}

//______________________________________________________________________________
TString TPRegexp::GetModifiers() const
{
   // Return PCRE modifier options as string.

   TString ret;

   if (fPCREOpts & kPCRE_GLOBAL)     ret += 'g';
   if (fPCREOpts & PCRE_CASELESS)    ret += 'i';
   if (fPCREOpts & PCRE_MULTILINE)   ret += 'm';
   if (fPCREOpts & PCRE_DOTALL)      ret += 's';
   if (fPCREOpts & PCRE_EXTENDED)    ret += 'x';
   if (fPCREOpts & kPCRE_OPTIMIZE)   ret += 'o';
   if (fPCREOpts & kPCRE_DEBUG_MSGS) ret += 'd';

   return ret;
}

//______________________________________________________________________________
void TPRegexp::Compile()
{
   // Compile the fPattern.

   if (fPriv->fPCRE)
      pcre_free(fPriv->fPCRE);

   if (fPCREOpts & kPCRE_DEBUG_MSGS)
      Info("Compile", "PREGEX compiling %s", fPattern.Data());

   const char *errstr;
   Int_t patIndex;
   fPriv->fPCRE = pcre_compile(fPattern.Data(), fPCREOpts & kPCRE_INTMASK,
                               &errstr, &patIndex, 0);

   if (!fPriv->fPCRE) {
      Error("Compile", "compilation of TPRegexp(%s) failed at: %d because %s",
            fPattern.Data(), patIndex, errstr);
   }

   if (fPriv->fPCREExtra || (fPCREOpts & kPCRE_OPTIMIZE))
      Optimize();
}

//______________________________________________________________________________
void TPRegexp::Optimize()
{
   // Send the pattern through the optimizer.

   if (fPriv->fPCREExtra)
      pcre_free(fPriv->fPCREExtra);

   if (fPCREOpts & kPCRE_DEBUG_MSGS)
      Info("Optimize", "PREGEX studying %s", fPattern.Data());

   const char *errstr;
   // pcre_study allows less options - see pcre_internal.h PUBLIC_STUDY_OPTIONS.
   fPriv->fPCREExtra = pcre_study(fPriv->fPCRE, 0, &errstr);

   if (!fPriv->fPCREExtra && errstr) {
      Error("Optimize", "Optimization of TPRegexp(%s) failed: %s",
            fPattern.Data(), errstr);
   }
}

//______________________________________________________________________________
Int_t TPRegexp::ReplaceSubs(const TString &s, TString &final,
                            const TString &replacePattern,
                            Int_t *offVec, Int_t nrMatch) const
{
   // Returns the number of expanded '$' constructs.

   Int_t nrSubs = 0;
   const char *p = replacePattern;

   Int_t state = 0;
   Int_t subnum = 0;
   while (state != -1) {
      switch (state) {
         case 0:
            if (!*p) {
               state = -1;
               break;
            }
            if (*p == '$') {
               state = 1;
               subnum = 0;
               if (p[1] == '&') {
                  p++;
                  if (isdigit(p[1]))
                     p++;
               } else if (!isdigit(p[1])) {
                  Error("ReplaceSubs", "badly formed replacement pattern: %s",
                        replacePattern.Data());
               }
            } else
               final += *p;
            break;
         case 1:
            if (isdigit(*p)) {
               subnum *= 10;
               subnum += (*p)-'0';
            } else {
               if (fPCREOpts & kPCRE_DEBUG_MSGS)
                  Info("ReplaceSubs", "PREGEX appending substr #%d", subnum);
               if (subnum < 0 || subnum > nrMatch-1) {
                  Error("ReplaceSubs","bad string number: %d",subnum);
               } else {
                  const TString subStr = s(offVec[2*subnum],offVec[2*subnum+1]-offVec[2*subnum]);
                  final += subStr;
                  nrSubs++;
               }
               state = 0;
               continue;  // send char to start state
            }
      }
      p++;
   }
   return nrSubs;
}

//______________________________________________________________________________
Int_t TPRegexp::MatchInternal(const TString &s, Int_t start,
                              Int_t nMaxMatch, TArrayI *pos)
{
   // Perform the actual matching - protected method.

   Int_t *offVec = new Int_t[3*nMaxMatch];
   // pcre_exec allows less options - see pcre_internal.h PUBLIC_EXEC_OPTIONS.
   Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
                             s.Length(), start, 0,
                             offVec, 3*nMaxMatch);

   if (nrMatch == PCRE_ERROR_NOMATCH)
      nrMatch = 0;
   else if (nrMatch <= 0) {
      Error("Match","pcre_exec error = %d", nrMatch);
      delete [] offVec;
      return 0;
   }

   if (pos)
      pos->Set(2*nrMatch, offVec);
   delete [] offVec;

   return nrMatch;
}

//______________________________________________________________________________
Int_t TPRegexp::Match(const TString &s, const TString &mods, Int_t start,
                      Int_t nMaxMatch, TArrayI *pos)
{
   // The number of matches is returned, this equals the full match +
   // sub-pattern matches.
   // nMaxMatch is the maximum allowed number of matches.
   // pos contains the string indices of the matches. Its usage is
   // shown in the routine MatchS.

   UInt_t opts = ParseMods(mods);

   if (!fPriv->fPCRE || opts != fPCREOpts) {
      fPCREOpts = opts;
      Compile();
   }

   return MatchInternal(s, start, nMaxMatch, pos);
}


//______________________________________________________________________________
TObjArray *TPRegexp::MatchS(const TString &s, const TString &mods,
                            Int_t start, Int_t nMaxMatch)
{
   // Returns a TObjArray of matched substrings as TObjString's.
   // The TObjArray is owner of the objects. The first entry is the full
   // matched pattern, followed by the subpatterns.
   // If a pattern was not matched, it will return an empty substring:
   //
   // TObjArray *subStrL = TPRegexp("(a|(z))(bc)").MatchS("abc");
   // for (Int_t i = 0; i < subStrL->GetLast()+1; i++) {
   //    const TString subStr = ((TObjString *)subStrL->At(i))->GetString();
   //    cout << "\"" << subStr << "\" ";
   // }
   // cout << subStr << endl;
   //
   // produces:  "abc" "a" "" "bc"

   TArrayI pos;
   Int_t nrMatch = Match(s, mods, start, nMaxMatch, &pos);

   TObjArray *subStrL = new TObjArray();
   subStrL->SetOwner();

   for (Int_t i = 0; i < nrMatch; i++) {
      Int_t startp = pos[2*i];
      Int_t stopp  = pos[2*i+1];
      if (startp >= 0 && stopp >= 0) {
         const TString subStr = s(pos[2*i], pos[2*i+1]-pos[2*i]);
         subStrL->Add(new TObjString(subStr));
      } else
         subStrL->Add(new TObjString());
   }

   return subStrL;
}

//______________________________________________________________________________
Int_t TPRegexp::SubstituteInternal(TString &s, const TString &replacePattern,
                                   Int_t start, Int_t nMaxMatch,
                                   Bool_t doDollarSubst)
{
   // Perform pattern substitution with optional back-ref replacement
   // - protected method.

   Int_t *offVec = new Int_t[3*nMaxMatch];

   TString final;
   Int_t nrSubs = 0;
   Int_t offset = start;
   Int_t last = 0;

   while (kTRUE) {

      // find next matching subs
      // pcre_exec allows less options - see pcre_internal.h PUBLIC_EXEC_OPTIONS.
      Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
                                s.Length(), offset, 0,
                                offVec, 3*nMaxMatch);

      if (nrMatch == PCRE_ERROR_NOMATCH) {
         nrMatch = 0;
         break;
      } else if (nrMatch <= 0) {
         Error("Substitute", "pcre_exec error = %d", nrMatch);
         break;
      }

      // append anything previously unmatched, but not substituted
      if (last <= offVec[0]) {
         final += s(last,offVec[0]-last);
         last = offVec[1];
      }

      // replace stuff in s
      if (doDollarSubst) {
         ReplaceSubs(s, final, replacePattern, offVec, nrMatch);
      } else {
         final += replacePattern;
      }
      ++nrSubs;

      // if global gotta check match at every pos
      if (!(fPCREOpts & kPCRE_GLOBAL))
         break;

      if (offVec[0] != offVec[1])
         offset = offVec[1];
      else {
         // matched empty string
         if (offVec[1] == s.Length())
         break;
         offset = offVec[1]+1;
      }
   }

   delete [] offVec;

   final += s(last,s.Length()-last);
   s = final;

   return nrSubs;
}

//______________________________________________________________________________
Int_t TPRegexp::Substitute(TString &s, const TString &replacePattern,
                           const TString &mods, Int_t start, Int_t nMaxMatch)
{
   // Substitute replaces the string s by a new string in which matching
   // patterns are replaced by the replacePattern string. The number of
   // substitutions are returned.
   //
   // TString s("aap noot mies");
   // const Int_t nrSub = TPRegexp("(\\w*) noot (\\w*)").Substitute(s,"$2 noot $1");
   // cout << nrSub << " \"" << s << "\"" <<endl;
   //
   // produces: 2 "mies noot aap"

   UInt_t opts = ParseMods(mods);

   if (!fPriv->fPCRE || opts != fPCREOpts) {
      fPCREOpts = opts;
      Compile();
   }

   return SubstituteInternal(s, replacePattern, start, nMaxMatch, kTRUE);
}


//////////////////////////////////////////////////////////////////////////
//                                                                      //
// TString member functions, put here so the linker will include        //
// them only if regular expressions are used.                           //
//                                                                      //
//////////////////////////////////////////////////////////////////////////

//______________________________________________________________________________
Ssiz_t TString::Index(TPRegexp& r, Ssiz_t start) const
{
   // Find the first occurance of the regexp in string and return the position.
   // Start is the offset at which the search should start.

   TArrayI pos;
   Int_t nrMatch = r.Match(*this,"",start,10,&pos);
   if (nrMatch > 0)
      return pos[0];
   else
      return -1;
}

//______________________________________________________________________________
Ssiz_t TString::Index(TPRegexp& r, Ssiz_t* extent, Ssiz_t start) const
{
   // Find the first occurance of the regexp in string and return the position.
   // Extent is length of the matched string and start is the offset at which
   // the matching should start.

   TArrayI pos;
   const Int_t nrMatch = r.Match(*this,"",start,10,&pos);
   if (nrMatch > 0) {
      *extent = pos[1]-pos[0];
      return pos[0];
   } else {
      *extent = 0;
      return -1;
   }
}

//______________________________________________________________________________
TSubString TString::operator()(TPRegexp& r, Ssiz_t start)
{
   // Return the substring found by applying the regexp starting at start.

   Ssiz_t len;
   Ssiz_t begin = Index(r, &len, start);
   return TSubString(*this, begin, len);
}

//______________________________________________________________________________
TSubString TString::operator()(TPRegexp& r)
{
   // Return the substring found by applying the regexp.

   return (*this)(r,0);
}

//______________________________________________________________________________
TSubString TString::operator()(TPRegexp& r, Ssiz_t start) const
{
   // Return the substring found by applying the regexp starting at start.

   Ssiz_t len;
   Ssiz_t begin = Index(r, &len, start);
   return TSubString(*this, begin, len);
}

//______________________________________________________________________________
TSubString TString::operator()(TPRegexp& r) const
{
   // Return the substring found by applying the regexp.

   return (*this)(r, 0);
}


//////////////////////////////////////////////////////////////////////////
// TPMERegexp
//////////////////////////////////////////////////////////////////////////

//______________________________________________________________________________
//
// Wrapper for PCRE library (Perl Compatible Regular Expressions).
// Based on PME - PCRE Made Easy by Zachary Hansen.
//
// Supports main Perl operations using regular expressions (Match,
// Substitute and Split). To retrieve the results one can simply use
// operator[] returning a TString.
//
// See $ROOTSYS/tutorials/regexp_pme.C for examples.

ClassImp(TPMERegexp);

//______________________________________________________________________________
TPMERegexp::TPMERegexp() :
   TPRegexp(),
   fNMaxMatches(10),
   fNMatches(0),
   fAddressOfLastString(0),
   fLastGlobalPosition(0)
{
   // Default constructor. This regexp will match an empty string.

   Compile();
}

//______________________________________________________________________________
TPMERegexp::TPMERegexp(const TString& s, const TString& opts, Int_t nMatchMax) :
   TPRegexp(s),
   fNMaxMatches(nMatchMax),
   fNMatches(0),
   fAddressOfLastString(0),
   fLastGlobalPosition(0)
{
   // Constructor:
   //  s    - string to compile into regular expression
   //  opts - perl-style character flags to be set on TPME object

   fPCREOpts = ParseMods(opts);
   Compile();
}

//______________________________________________________________________________
TPMERegexp::TPMERegexp(const TString& s, UInt_t opts, Int_t nMatchMax) :
   TPRegexp(s),
   fNMaxMatches(nMatchMax),
   fNMatches(0),
   fAddressOfLastString(0),
   fLastGlobalPosition(0)
{
   // Constructor:
   //  s    - string to copmile into regular expression
   //  opts - PCRE-style option flags to be set on TPME object

   fPCREOpts = opts;
   Compile();
}

//______________________________________________________________________________
TPMERegexp::TPMERegexp(const TPMERegexp& r) :
   TPRegexp(r),
   fNMaxMatches(r.fNMaxMatches),
   fNMatches(0),
   fAddressOfLastString(0),
   fLastGlobalPosition(0)
{
   // Copy constructor.
   // Only PCRE specifics are copied, not last-match or global-matech
   // information.

   Compile();
}

//______________________________________________________________________________
void TPMERegexp::Reset(const TString& s, const TString& opts, Int_t nMatchMax)
{
   // Reset the patteren and options.
   // If 'nMatchMax' other than -1 (the default) is passed, it is also set.

   Reset(s, ParseMods(opts), nMatchMax);
}

//______________________________________________________________________________
void TPMERegexp::Reset(const TString& s, UInt_t opts, Int_t nMatchMax)
{
   // Reset the patteren and options.
   // If 'nMatchMax' other than -1 (the default) is passed, it is also set.

   fPattern = s;
   fPCREOpts = opts;
   Compile();

   if (nMatchMax != -1)
      fNMatches = nMatchMax;
   fNMatches = 0;
   fLastGlobalPosition = 0;   
}

//______________________________________________________________________________
void TPMERegexp::AssignGlobalState(const TPMERegexp& re)
{
   // Copy global-match state from 're; so that this regexp can continue
   // parsing the string from where 're' left off.
   //
   // Alternatively, GetGlobalPosition() get be used to retrieve the
   // last match position so that it can passed to Match().
   //
   // Ideally, as it is done in PERL, the last match position would be
   // stored in the TString itself.

   fLastStringMatched  = re.fLastStringMatched;
   fLastGlobalPosition = re.fLastGlobalPosition;
}

//______________________________________________________________________________
void TPMERegexp::ResetGlobalState()
{
   // Reset state of global match.
   // This happens automatically when a new string is passed for matching.
   // But be carefull, as the address of last TString object is used
   // to make this decision.

   fLastGlobalPosition = 0;
}

//______________________________________________________________________________
Int_t TPMERegexp::Match(const TString& s, UInt_t start)
{
   // Runs a match on s against the regex 'this' was created with.
   //
   // Args:
   //  s        - string to match against
   //  start    - offset at which to start matching
   // Returns:  - number of matches found

   // If we got a new string, reset the global position counter.
   if (fAddressOfLastString != (void*) &s) {
      fLastGlobalPosition = 0;
   }

   if (fPCREOpts & kPCRE_GLOBAL) {
      start += fLastGlobalPosition;
   }

   //fprintf(stderr, "string: '%s' length: %d offset: %d\n", s.Data(), s.length(), offset);
   fNMatches = MatchInternal(s, start, fNMaxMatches, &fMarkers);

   //fprintf(stderr, "MatchInternal_exec result = %d\n", fNMatches);

   fLastStringMatched   = s;
   fAddressOfLastString = (void*) &s;

   if (fPCREOpts & kPCRE_GLOBAL) {
      if (fNMatches == PCRE_ERROR_NOMATCH) {
         // fprintf(stderr, "TPME RESETTING: reset for no match\n");
         fLastGlobalPosition = 0; // reset the position for next match (perl does this)
      } else if (fNMatches > 0) {
         // fprintf(stderr, "TPME RESETTING: setting to %d\n", marks[0].second);
         fLastGlobalPosition = fMarkers[1]; // set to the end of the match
      } else {
         // fprintf(stderr, "TPME RESETTING: reset for no unknown\n");
         fLastGlobalPosition = 0;
      }
   }

   return fNMatches;
}

//______________________________________________________________________________
Int_t TPMERegexp::Split(const TString& s, Int_t maxfields)
{
   // Splits into at most maxfields. If maxfields is unspecified or
   // 0, trailing empty matches are discarded. If maxfields is
   // positive, no more than maxfields fields will be returned and
   // trailing empty matches are preserved. If maxfields is empty,
   // all fields (including trailing empty ones) are returned. This
   // *should* be the same as the perl behaviour.
   //
   // If pattern produces sub-matches, these are also stored in
   // the result.
   //
   // A pattern matching the null string will split the value of EXPR
   // into separate characters at each point it matches that way.
   //
   // Args:
   //  s         - string to split
   //  maxfields - maximum number of fields to be split out.  0 means
   //              split all fields, but discard any trailing empty bits.
   //              Negative means split all fields and keep trailing empty bits.
   //              Positive means keep up to N fields including any empty fields
   //              less than N. Anything remaining is in the last field.
   // Returns:   - number of fields found

   typedef std::pair<int, int>   Marker_t;
   typedef std::vector<Marker_t> MarkerVec_t;

   // stores the marks for the split
   MarkerVec_t oMarks;

   // this is a list of current trailing empty matches if maxfields is
   //   unspecified or 0.  If there is stuff in it and a non-empty match
   //   is found, then everything in here is pushed into oMarks and then
   //   the new match is pushed on.  If the end of the string is reached
   //   and there are empty matches in here, they are discarded.
   MarkerVec_t oCurrentTrailingEmpties;

   Int_t nOffset = 0;
   Int_t nMatchesFound = 0;

   // while we are still finding matches and maxfields is 0 or negative
   //   (meaning we get all matches), or we haven't gotten to the number
   //   of specified matches
   Int_t matchRes;
   while ((matchRes = Match(s, nOffset)) &&
          ((maxfields < 1) || nMatchesFound < maxfields)) {
      ++nMatchesFound;

      if (fMarkers[1] - fMarkers[0] == 0) {
         oMarks.push_back(Marker_t(nOffset, nOffset + 1));
         ++nOffset;
         if (nOffset >= s.Length())
            break;
         else
            continue;
      }

      // match can be empty
      if (nOffset != fMarkers[0]) {
         if (!oCurrentTrailingEmpties.empty()) {
            oMarks.insert(oMarks.end(),
                          oCurrentTrailingEmpties.begin(),
                          oCurrentTrailingEmpties.end());
            oCurrentTrailingEmpties.clear();
         }
         oMarks.push_back(Marker_t(nOffset, fMarkers[0]));
      } else {
         // empty match
         if (maxfields == 0) {
            // store for possible later inclusion
            oCurrentTrailingEmpties.push_back(Marker_t(nOffset, nOffset));
         } else {
            oMarks.push_back(Marker_t(nOffset, nOffset));
         }
      }

      nOffset = fMarkers[1];

      if (matchRes > 1) {
         for (Int_t i = 1; i < matchRes; ++i)
            oMarks.push_back(Marker_t(fMarkers[2*i], fMarkers[2*i + 1]));
      }
   }


   // if there were no matches found, push the whole thing on
   if (nMatchesFound == 0) {
      oMarks.push_back(Marker_t(0, s.Length()));
   }
   // if we ran out of matches, then append the rest of the string
   //   onto the end of the last split field
   else if (maxfields > 0 && nMatchesFound >= maxfields) {
      oMarks[oMarks.size() - 1].second = s.Length();
   }
   // else we have to add another entry for the end of the string
   else {
      Bool_t last_empty = (nOffset == s.Length());
      if (!last_empty || maxfields < 0) {
         if (!oCurrentTrailingEmpties.empty()) {
            oMarks.insert(oMarks.end(),
                          oCurrentTrailingEmpties.begin(),
                          oCurrentTrailingEmpties.end());
         }
         oMarks.push_back(Marker_t(nOffset, s.Length()));
      }
   }

   fNMatches = oMarks.size();
   fMarkers.Set(2*fNMatches);
   for (Int_t i = 0; i < fNMatches; ++i) {
      fMarkers[2*i]     = oMarks[i].first;
      fMarkers[2*i + 1] = oMarks[i].second;
   }

   // fprintf(stderr, "match returning %d\n", fNMatches);
   return fNMatches;
}

//______________________________________________________________________________
Int_t TPMERegexp::Substitute(TString& s, const TString& r, Bool_t doDollarSubst)
{
   // Substitute matching part of s with r, dollar back-ref
   // substitution is performed if doDollarSubst is true (default).
   // Returns the number of substitutions made.
   //
   // After the substitution, another pass is made over the resulting
   // string and the following special tokens are interpreted:
   // \l - lowercase next char,
   // \u - uppercase next char,
   // \L - lowercase till \E,
   // \U - uppercase till \E, and
   // \E - end case modification.

   Int_t cnt = SubstituteInternal(s, r, 0, fNMaxMatches, doDollarSubst);

   TString ret;
   Int_t   state = 0;
   Ssiz_t  pos = 0, len = s.Length();
   const Char_t *data = s.Data();
   while (pos < len) {
      Char_t c = data[pos];
      if (c == '\\') {
         c = data[pos+1]; // Rely on string-data being null-terminated.
         switch (c) {
            case  0 : ret += '\\'; break;
            case 'l': state = 1;   break;
            case 'u': state = 2;   break;
            case 'L': state = 3;   break;
            case 'U': state = 4;   break;
            case 'E': state = 0;   break;
            default : ret += '\\'; ret += c; break;
         }
         pos += 2;
      } else {
         switch (state) {
            case 0:  ret += c; break;
            case 1:  ret += (Char_t) tolower(c); state = 0; break;
            case 2:  ret += (Char_t) toupper(c); state = 0; break;
            case 3:  ret += (Char_t) tolower(c); break;
            case 4:  ret += (Char_t) toupper(c); break;
            default: Error("TPMERegexp::Substitute", "invalid state.");
         }
         ++pos;
      }
   }

   s = ret;

   return cnt;
}

//______________________________________________________________________________
TString TPMERegexp::operator[](int index)
{
   // Returns the sub-string from the internal fMarkers vector.
   // Requires having run match or split first.

   if (index >= fNMatches)
      return "";

   Int_t begin = fMarkers[2*index];
   Int_t end   = fMarkers[2*index + 1];
   return fLastStringMatched(begin, end-begin);
}

//______________________________________________________________________________
void TPMERegexp::Print(Option_t* option)
{
   // Print the regular expression and modifier options.
   // If 'option' contains "all", prints also last string match and
   // match results.

   TString opt = option;
   opt.ToLower();

   Printf("Regexp='%s', Opts='%s'", fPattern.Data(), GetModifiers().Data());
   if (opt.Contains("all")) {
      Printf("  last string='%s'", fLastStringMatched.Data());
      Printf("  number of matches = %d", fNMatches);
      for (Int_t i=0; i<fNMatches; ++i)
         Printf("  %d - %s", i, operator[](i).Data());
   }
}


//////////////////////////////////////////////////////////////////////////
//                                                                      //
// TStringToken                                                         //
//                                                                      //
//////////////////////////////////////////////////////////////////////////

//______________________________________________________________________________
//
// Provides iteration through tokens of a given string:
//
// - fFullStr     stores the string to be split. It is never modified.
// - fSplitRe     is the perl-re that is used to separete the tokens.
// - fReturnVoid  if true, empty strings will be returned.
//
// Current token is stored in the TString base-class.
// During construction no match is done, use NextToken() to get the first
// and all subsequent tokens.
//

ClassImp(TStringToken)

//______________________________________________________________________________
TStringToken::TStringToken(const TString& fullStr, const TString& splitRe, Bool_t retVoid) :
   fFullStr    (fullStr),
   fSplitRe    (splitRe),
   fReturnVoid (retVoid),
   fPos        (0)
{
   // Constructor.
}

//______________________________________________________________________________
Bool_t TStringToken::NextToken()
{
   // Get the next token, it is stored in this TString.
   // Returns true if new token is available, false otherwise.

   TArrayI x;
   while (fPos < fFullStr.Length()) {
      if (fSplitRe.Match(fFullStr, "", fPos, 2, &x)) {
         TString::operator=(fFullStr(fPos, x[0] - fPos));
         fPos = x[1];
      } else {
         TString::operator=(fFullStr(fPos, fFullStr.Length() - fPos));
         fPos = fFullStr.Length() + 1;
      }
      if (Length() || fReturnVoid)
         return kTRUE;
   }

   // Special case: void-strings are requested and the full-string
   // ends with the separator. Thus we return another empty string.
   if (fPos == fFullStr.Length() && fReturnVoid) {
      TString::operator=("");
      fPos = fFullStr.Length() + 1;
      return kTRUE;
   }

   return kFALSE;
}

Last change: Tue Dec 9 09:01:12 2008
Last generated: 2008-12-09 09:01

This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.