#include "Riostream.h"
#include "TPRegexp.h"
#include "TObjArray.h"
#include "TObjString.h"
#include "TError.h"
#include <pcre.h>
#include <vector>
struct PCREPriv_t {
   pcre       *fPCRE;
   pcre_extra *fPCREExtra;
   PCREPriv_t() { fPCRE = 0; fPCREExtra = 0; }
};
ClassImp(TPRegexp)
TPRegexp::TPRegexp()
{
   
   fPriv     = new PCREPriv_t;
   fPCREOpts = 0;
}
TPRegexp::TPRegexp(const TString &pat)
{
   
   fPattern  = pat;
   fPriv     = new PCREPriv_t;
   fPCREOpts = 0;
}
TPRegexp::TPRegexp(const TPRegexp &p)
{
   
   fPattern  = p.fPattern;
   fPriv     = new PCREPriv_t;
   fPCREOpts = p.fPCREOpts;
}
TPRegexp::~TPRegexp()
{
   
   if (fPriv->fPCRE)
      pcre_free(fPriv->fPCRE);
   if (fPriv->fPCREExtra)
      pcre_free(fPriv->fPCREExtra);
   delete fPriv;
}
TPRegexp &TPRegexp::operator=(const TPRegexp &p)
{
   
   if (this != &p) {
      fPattern = p.fPattern;
      if (fPriv->fPCRE)
         pcre_free(fPriv->fPCRE);
      fPriv->fPCRE = 0;
      if (fPriv->fPCREExtra)
         pcre_free(fPriv->fPCREExtra);
      fPriv->fPCREExtra = 0;
      fPCREOpts  = p.fPCREOpts;
   }
   return *this;
}
UInt_t TPRegexp::ParseMods(const TString &modStr) const
{
   
   UInt_t opts = 0;
   if (modStr.Length() <= 0)
      return fPCREOpts;
   
   const char *m = modStr;
   while (*m) {
      switch (*m) {
         case 'g':
            opts |= kPCRE_GLOBAL;
            break;
         case 'i':
            opts |= PCRE_CASELESS;
            break;
         case 'm':
            opts |= PCRE_MULTILINE;
            break;
         case 'o':
            opts |= kPCRE_OPTIMIZE;
            break;
         case 's':
            opts |= PCRE_DOTALL;
            break;
         case 'x':
            opts |= PCRE_EXTENDED;
            break;
         case 'd': 
            opts |= kPCRE_DEBUG_MSGS;
            break;
         default:
            Error("ParseMods", "illegal pattern modifier: %c", *m);
	    opts = 0;
      }
      ++m;
   }
   return opts;
}
TString TPRegexp::GetModifiers() const
{
   
   TString ret;
   if (fPCREOpts & kPCRE_GLOBAL)     ret += 'g';
   if (fPCREOpts & PCRE_CASELESS)    ret += 'i';
   if (fPCREOpts & PCRE_MULTILINE)   ret += 'm';
   if (fPCREOpts & PCRE_DOTALL)      ret += 's';
   if (fPCREOpts & PCRE_EXTENDED)    ret += 'x';
   if (fPCREOpts & kPCRE_OPTIMIZE)   ret += 'o';
   if (fPCREOpts & kPCRE_DEBUG_MSGS) ret += 'd';
   return ret;
}
void TPRegexp::Compile()
{
   
   if (fPriv->fPCRE)
      pcre_free(fPriv->fPCRE);
   if (fPCREOpts & kPCRE_DEBUG_MSGS)
      Info("Compile", "PREGEX compiling %s", fPattern.Data());
   const char *errstr;
   Int_t patIndex;
   fPriv->fPCRE = pcre_compile(fPattern.Data(), fPCREOpts & kPCRE_INTMASK,
                               &errstr, &patIndex, 0);
   if (!fPriv->fPCRE) {
      Error("Compile", "compilation of TPRegexp(%s) failed at: %d because %s",
            fPattern.Data(), patIndex, errstr);
   }
   if (fPriv->fPCREExtra || (fPCREOpts & kPCRE_OPTIMIZE))
      Optimize();
}
void TPRegexp::Optimize()
{
   
   if (fPriv->fPCREExtra)
      pcre_free(fPriv->fPCREExtra);
   if (fPCREOpts & kPCRE_DEBUG_MSGS)
      Info("Optimize", "PREGEX studying %s", fPattern.Data());
   const char *errstr;
   fPriv->fPCREExtra = pcre_study(fPriv->fPCRE, fPCREOpts & kPCRE_INTMASK, &errstr);
   if (!fPriv->fPCREExtra && errstr) {
      Error("Optimize", "Optimization of TPRegexp(%s) failed: %s",
            fPattern.Data(), errstr);
   }
}
Int_t TPRegexp::ReplaceSubs(const TString &s, TString &final,
                            const TString &replacePattern,
                            Int_t *offVec, Int_t nrMatch) const
{
   
   Int_t nrSubs = 0;
   const char *p = replacePattern;
   Int_t state = 0;
   Int_t subnum = 0;
   while (state != -1) {
      switch (state) {
         case 0:
            if (!*p) {
               state = -1;
               break;
            }
            if (*p == '$') {
               state = 1;
               subnum = 0;
               if (p[1] == '&') {
                  p++;
                  if (isdigit(p[1]))
                     p++;
               } else if (!isdigit(p[1])) {
                  Error("ReplaceSubs", "badly formed replacement pattern: %s",
                        replacePattern.Data());
               }
            } else
               final += *p;
            break;
         case 1:
            if (isdigit(*p)) {
               subnum *= 10;
               subnum += (*p)-'0';
            } else {
               if (fPCREOpts & kPCRE_DEBUG_MSGS)
                  Info("ReplaceSubs", "PREGEX appending substr #%d", subnum);
               if (subnum < 0 || subnum > nrMatch-1) {
                  Error("ReplaceSubs","bad string number :%d",subnum);
               }
               const TString subStr = s(offVec[2*subnum],offVec[2*subnum+1]-offVec[2*subnum]);
               final += subStr;
               nrSubs++;
               state = 0;
               continue;  
            }
      }
      p++;
   }
   return nrSubs;
}
Int_t TPRegexp::MatchInternal(const TString &s, Int_t start,
                              Int_t nMaxMatch, TArrayI *pos)
{
   
   Int_t *offVec = new Int_t[3*nMaxMatch];
   Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
                             s.Length(), start, fPCREOpts & kPCRE_INTMASK,
                             offVec, 3*nMaxMatch);
   if (nrMatch == PCRE_ERROR_NOMATCH)
      nrMatch = 0;
   else if (nrMatch <= 0) {
      Error("Match","pcre_exec error = %d", nrMatch);
      delete [] offVec;
      return 0;
   }
   if (pos)
      pos->Set(2*nrMatch, offVec);
   delete [] offVec;
   return nrMatch;
}
Int_t TPRegexp::Match(const TString &s, const TString &mods, Int_t start,
                      Int_t nMaxMatch, TArrayI *pos)
{
   
   
   
   
   
   UInt_t opts = ParseMods(mods);
   if (!fPriv->fPCRE || opts != fPCREOpts) {
      fPCREOpts = opts;
      Compile();
   }
   return MatchInternal(s, start, nMaxMatch, pos);
}
TObjArray *TPRegexp::MatchS(const TString &s, const TString &mods,
                            Int_t start, Int_t nMaxMatch)
{
   
   
   
   
   
   
   
   
   
   
   
   
   
   TArrayI pos;
   Int_t nrMatch = Match(s, mods, start, nMaxMatch, &pos);
   TObjArray *subStrL = new TObjArray();
   subStrL->SetOwner();
   for (Int_t i = 0; i < nrMatch; i++) {
      Int_t startp = pos[2*i];
      Int_t stopp  = pos[2*i+1];
      if (startp >= 0 && stopp >= 0) {
         const TString subStr = s(pos[2*i], pos[2*i+1]-pos[2*i]);
         subStrL->Add(new TObjString(subStr));
      } else
         subStrL->Add(new TObjString());
   }
   return subStrL;
}
Int_t TPRegexp::SubstituteInternal(TString &s, const TString &replacePattern,
                                   Int_t start, Int_t nMaxMatch,
                                   Bool_t doDollarSubst)
{
   
   
   Int_t *offVec = new Int_t[3*nMaxMatch];
   TString final;
   Int_t nrSubs = 0;
   Int_t offset = start;
   Int_t last = 0;
   while (kTRUE) {
      
      Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
                                s.Length(), offset, fPCREOpts & kPCRE_INTMASK,
                                offVec, 3*nMaxMatch);
      if (nrMatch == PCRE_ERROR_NOMATCH) {
         nrMatch = 0;
         break;
      } else if (nrMatch <= 0) {
         Error("Substitute", "pcre_exec error = %d", nrMatch);
         break;
      }
      
      if (last <= offVec[0]) {
         final += s(last,offVec[0]-last);
         last = offVec[1];
      }
      
      if (doDollarSubst) {
         nrSubs += ReplaceSubs(s, final, replacePattern, offVec, nrMatch);
      } else {
         final += replacePattern;
         ++nrSubs;
      }
      
      if (!(fPCREOpts & kPCRE_GLOBAL))
         break;
      if (offVec[0] != offVec[1])
         offset = offVec[1];
      else {
         
         if (offVec[1] == s.Length())
         break;
         offset = offVec[1]+1;
      }
   }
   delete [] offVec;
   final += s(last,s.Length()-last);
   s = final;
   return nrSubs;
}
Int_t TPRegexp::Substitute(TString &s, const TString &replacePattern,
                           const TString &mods, Int_t start, Int_t nMaxMatch)
{
   
   
   
   
   
   
   
   
   
   UInt_t opts = ParseMods(mods);
   if (!fPriv->fPCRE || opts != fPCREOpts) {
      fPCREOpts = opts;
      Compile();
   }
   return SubstituteInternal(s, replacePattern, start, nMaxMatch, kTRUE);
}
Ssiz_t TString::Index(TPRegexp& r, Ssiz_t start) const
{
   
   
   TArrayI pos;
   Int_t nrMatch = r.Match(*this,"",start,10,&pos);
   if (nrMatch > 0)
      return pos[0];
   else
      return -1;
}
Ssiz_t TString::Index(TPRegexp& r, Ssiz_t* extent, Ssiz_t start) const
{
   
   
   
   TArrayI pos;
   const Int_t nrMatch = r.Match(*this,"",start,10,&pos);
   if (nrMatch > 0) {
      *extent = pos[1]-pos[0];
      return pos[0];
   } else {
      *extent = 0;
      return -1;
   }
}
TSubString TString::operator()(TPRegexp& r, Ssiz_t start)
{
   
   Ssiz_t len;
   Ssiz_t begin = Index(r, &len, start);
   return TSubString(*this, begin, len);
}
TSubString TString::operator()(TPRegexp& r)
{
   
   return (*this)(r,0);
}
TSubString TString::operator()(TPRegexp& r, Ssiz_t start) const
{
   
   Ssiz_t len;
   Ssiz_t begin = Index(r, &len, start);
   return TSubString(*this, begin, len);
}
TSubString TString::operator()(TPRegexp& r) const
{
   
   return (*this)(r, 0);
}
TPMERegexp::TPMERegexp() :
   TPRegexp(),
   fNMaxMatches(10),
   fNMatches(0),
   fAddressOfLastString(0),
   fLastGlobalPosition(0)
{
   
   Compile();
}
TPMERegexp::TPMERegexp(const TString& s, const TString& opts, Int_t nMatchMax) :
   TPRegexp(s),
   fNMaxMatches(nMatchMax),
   fNMatches(0),
   fAddressOfLastString(0),
   fLastGlobalPosition(0)
{
   
   
   
   fPCREOpts = ParseMods(opts);
   Compile();
}
TPMERegexp::TPMERegexp(const TString& s, UInt_t opts, Int_t nMatchMax) :
   TPRegexp(s),
   fNMaxMatches(nMatchMax),
   fNMatches(0),
   fAddressOfLastString(0),
   fLastGlobalPosition(0)
{
   
   
   
   fPCREOpts = opts;
   Compile();
}
TPMERegexp::TPMERegexp(const TPMERegexp& r) :
   TPRegexp(r),
   fNMaxMatches(r.fNMaxMatches),
   fNMatches(0),
   fAddressOfLastString(0),
   fLastGlobalPosition(0)
{
   
   
   
   Compile();
}
void TPMERegexp::ResetGlobalState()
{
   
   
   fLastGlobalPosition = 0;
}
Int_t TPMERegexp::Match(const TString& s, UInt_t offset)
{
   
   
   
   
   
   
   
   if (fAddressOfLastString != (void*) &s) {
      fLastGlobalPosition = 0;
   }
   if (fPCREOpts & kPCRE_GLOBAL) {
      offset += fLastGlobalPosition;
   }
   
   fNMatches = MatchInternal(s, offset, fNMaxMatches, &fMarkers);
   
   fLastStringMatched   = s;
   fAddressOfLastString = (void*) &s;
   if (fPCREOpts & kPCRE_GLOBAL) {
      if (fNMatches == PCRE_ERROR_NOMATCH) {
         
         fLastGlobalPosition = 0; 
      } else if (fNMatches > 0) {
         
         fLastGlobalPosition = fMarkers[1]; 
      } else {
         
         fLastGlobalPosition = 0;
      }
   }
   return fNMatches;
}
Int_t TPMERegexp::Split(const TString& s, Int_t maxfields)
{
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   typedef std::pair<int, int>   Marker_t;
   typedef std::vector<Marker_t> MarkerVec_t;
   
   MarkerVec_t oMarks;
   
   
   
   
   
   MarkerVec_t oCurrentTrailingEmpties;
   Int_t nOffset = 0;
   Int_t nMatchesFound = 0;
   
   
   
   Int_t matchRes;
   while ((matchRes = Match(s, nOffset)) &&
          ((maxfields < 1) || nMatchesFound < maxfields)) {
      ++nMatchesFound;
      if (fMarkers[1] - fMarkers[0] == 0) {
         oMarks.push_back(Marker_t(nOffset, nOffset + 1));
         ++nOffset;
         if (nOffset >= s.Length())
            break;
         else
            continue;
      }
      
      if (nOffset != fMarkers[0]) {
         if (!oCurrentTrailingEmpties.empty()) {
            oMarks.insert(oMarks.end(),
                          oCurrentTrailingEmpties.begin(),
                          oCurrentTrailingEmpties.end());
            oCurrentTrailingEmpties.clear();
         }
         oMarks.push_back(Marker_t(nOffset, fMarkers[0]));
      } else {
         
         if (maxfields == 0) {
            
            oCurrentTrailingEmpties.push_back(Marker_t(nOffset, nOffset));
         } else {
            oMarks.push_back(Marker_t(nOffset, nOffset));
         }
      }
      nOffset = fMarkers[1];
      if (matchRes > 1) {
         for (Int_t i = 1; i < matchRes; ++i)
            oMarks.push_back(Marker_t(fMarkers[2*i], fMarkers[2*i + 1]));
      }
   }
   
   if (nMatchesFound == 0) {
      oMarks.push_back(Marker_t(0, s.Length()));
   }
   
   
   else if (maxfields > 0 && nMatchesFound >= maxfields) {
      oMarks[oMarks.size() - 1].second = s.Length();
   }
   
   else {
      Bool_t last_empty = (nOffset == s.Length());
      if (!last_empty || maxfields < 0) {
         if (!oCurrentTrailingEmpties.empty()) {
            oMarks.insert(oMarks.end(),
                          oCurrentTrailingEmpties.begin(),
                          oCurrentTrailingEmpties.end());
         }
         oMarks.push_back(Marker_t(nOffset, s.Length()));
      }
   }
   fNMatches = oMarks.size();
   fMarkers.Set(2*fNMatches);
   for (Int_t i = 0; i < fNMatches; ++i) {
      fMarkers[2*i]     = oMarks[i].first;
      fMarkers[2*i + 1] = oMarks[i].second;
   }
   
   return fNMatches;
}
TString TPMERegexp::Substitute(const TString& s, const TString& r, Bool_t doDollarSubst)
{
   
   
   
   
   
   
   
   
   
   
   TString newstring(s);
   SubstituteInternal(newstring, r, 0, fNMaxMatches, doDollarSubst);
   TString ret;
   Int_t   state = 0;
   Ssiz_t  pos = 0, len = newstring.Length();
   const Char_t *data = newstring.Data();
   while (pos < len) {
      Char_t c = data[pos];
      if (c == '\\') {
         c = data[pos+1]; 
         switch (c) {
            case  0 : ret += '\\'; break;
            case 'l': state = 1;   break;
            case 'u': state = 2;   break;
            case 'L': state = 3;   break;
            case 'U': state = 4;   break;
            case 'E': state = 0;   break;
            default : ret += '\\'; ret += c; break;
         }
         pos += 2;
      } else {
         switch (state) {
            case 0:  ret += c; break;
            case 1:  ret += (Char_t) tolower(c); state = 0; break;
            case 2:  ret += (Char_t) toupper(c); state = 0; break;
            case 3:  ret += (Char_t) tolower(c); break;
            case 4:  ret += (Char_t) toupper(c); break;
            default: Error("TPMERegexp::Substitute", "invalid state.");
         }
         ++pos;
      }
   }
   return ret;
}
TString TPMERegexp::operator[](int index)
{
   
   
   if (index >= fNMatches)
      return "";
   Int_t begin = fMarkers[2*index];
   Int_t end   = fMarkers[2*index + 1];
   return fLastStringMatched(begin, end-begin);
}
void TPMERegexp::Print(Option_t* option)
{
   
   
   
   TString opt = option;
   opt.ToLower();
   Printf("Regexp='%s', Opts='%s'", fPattern.Data(), GetModifiers().Data());
   if (opt.Contains("all")) {
      Printf("  last string='%s'", fLastStringMatched.Data());
      Printf("  number of matches = %d", fNMatches);
      for (Int_t i=0; i<fNMatches; ++i)
         Printf("  %d - %s", i, operator[](i).Data());
   }
}
ClassImp(TStringToken)
TStringToken::TStringToken(const TString& fullStr, const TString& splitRe, Bool_t retVoid) :
   fFullStr    (fullStr),
   fSplitRe    (splitRe),
   fReturnVoid (retVoid),
   fPos        (0)
{
   
}
Bool_t TStringToken::NextToken()
{
   
   
   TArrayI x;
   while (fPos < fFullStr.Length()) {
      if (fSplitRe.Match(fFullStr, "", fPos, 2, &x)) {
         TString::operator=(fFullStr(fPos, x[0] - fPos));
         fPos = x[1];
      } else {
         TString::operator=(fFullStr(fPos, fFullStr.Length() - fPos));
         fPos = fFullStr.Length() + 1;
      }
      if (Length() || fReturnVoid)
         return kTRUE;
   }
   
   
   if (fPos == fFullStr.Length() && fReturnVoid) {
      TString::operator=("");
      fPos = fFullStr.Length() + 1;
      return kTRUE;
   }
   return kFALSE;
}
Last change: Tue May 13 17:25:49 2008
Last generated: 2008-05-13 17:25
This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.