ROOT  6.06/09
Reference Guide
TRDataFrame.h
Go to the documentation of this file.
1 // @(#)root/r:$Id$
2 // Author: Omar Zapata 30/05/2015
3 
4 
5 /*************************************************************************
6  * Copyright (C) 2013-2015, Omar Andres Zapata Mesa *
7  * All rights reserved. *
8  * *
9  * For the licensing terms see $ROOTSYS/LICENSE. *
10  * For the list of contributors see $ROOTSYS/README/CREDITS. *
11  *************************************************************************/
12 #ifndef ROOT_R_TRDataFrame
13 #define ROOT_R_TRDataFrame
14 
15 #ifndef ROOT_R_RExports
16 #include<RExports.h>
17 #endif
18 
19 #ifndef ROOT_R_TRObject
20 #include<TRObject.h>
21 #endif
22 
23 #ifndef ROOT_R_TRFunctionImport
24 #include<TRFunctionImport.h>
25 #endif
26 
27 
28 namespace ROOT {
29  namespace R {
30 
31  /**
32  \class TRDataFrame
33 
34  This is a class to create DataFrames from ROOT to R
35  <center><h2>TRDataFrame class</h2></center>
36 
37  DataFrame is a very important datatype in R and in ROOTR we have a class to manipulate<br>
38  dataframes called TRDataFrame, with a lot of very useful operators overloaded to work with TRDataFrame's objects<br>
39  in a similar way that in the R environment but from c++ in ROOT.<br>
40  Example:<br>
41  <br>
42  Lets to create need data to play with dataframe features<br>
43 
44  <h2>Creating variables</h2><br>
45  \code{.cpp}
46  TVectorD v1(3);
47  std::vector<Double_t> v2(3);
48  std::array<Int_t,3> v3{ {1,2,3} };
49  std::list<std::string> names;
50  \endcode
51 
52  <h2> Assigning values </h2><br>
53  \code{.cpp}
54  v1[0]=1;
55  v1[1]=2;
56  v1[2]=3;
57 
58  v2[0]=0.101;
59  v2[1]=0.202;
60  v2[2]=0.303;
61 
62  names.push_back("v1");
63  names.push_back("v2");
64  names.push_back("v3");
65 
66  ROOT::R::TRInterface &r=ROOT::R::TRInterface::Instance();
67  \endcode
68 
69  In R the dataframe have associate to every column a label,
70  in ROOTR you can have the same label using the class ROOT::R::Label to create a TRDataFrame where you data
71  have a label associate.
72  <h2> Creating dataframe object with its labels</h2> <br>
73  \code{.cpp}
74  using namespace ROOT::R;
75  TRDataFrame df1(Label["var1"]=v1,Label["var2"]=v2,Label["var3"]=v3,Label["strings"]=names);
76  \endcode
77 
78  <h2>Passing dataframe to R's environment</h2><br>
79  \code{.cpp}
80  r["df1"]<<df1;
81  r<<"print(df1)";
82  \endcode
83  Output
84  \code
85  var1 var2 var3 strings
86  1 1 0.101 1 v1
87  2 2 0.202 2 v2
88  3 3 0.303 3 v3
89  \endcode
90 
91  Manipulating data between dataframes
92  <h2>Adding colunms to dataframe</h2><br>
93  \code{.cpp}
94  TVectorD v4(3);
95  //filling the vector fro R's environment
96  r["c(-1,-2,-3)"]>>v4;
97  //adding new colunm to df1 with name var4
98  df1["var4"]=v4;
99  //updating df1 in R's environment
100  r["df1"]<<df1;
101  //printing df1
102  r<<"print(df1)";
103  \endcode
104 
105  Output
106  var1 var2 var3 strings var4
107  1 1 0.101 1 v1 -1
108  2 2 0.202 2 v2 -2
109  3 3 0.303 3 v3 -3
110 
111  <h2>Getting dataframe from R's environment</h2><br>
112  \code{.cpp}
113  ROOT::R::TRDataFrame df2;
114 
115  r<<"df2<-data.frame(v1=c(0.1,0.2,0.3),v2=c(3,2,1))";
116  r["df2"]>>df2;
117 
118  TVectorD v(3);
119  df2["v1"]>>v;
120  v.Print();
121 
122  df2["v2"]>>v;
123  v.Print();
124  \endcode
125 
126  Output
127  \code
128  Vector (3) is as follows
129 
130  | 1 |
131  ------------------
132  0 |0.1
133  1 |0.2
134  2 |0.3
135 
136  Vector (3) is as follows
137 
138  | 1 |
139  ------------------
140  0 |3
141  1 |2
142  2 |1
143  \endcode
144 
145  </h2>Working with colunms between dataframes</h2><br>
146  \code{.cpp}
147  df2["v3"]<<df1["strings"];
148 
149  //updating df2 in R's environment
150  r["df2"]<<df2;
151  r<<"print(df2)";
152  \endcode
153  Output
154  \code
155  v1 v2 v3
156  1 0.1 3 v1
157  2 0.2 2 v2
158  3 0.3 1 v3
159  \endcode
160 
161  <h2>Working with colunms between dataframes</h2><br>
162  \code{.cpp}
163  //passing values from colunm v3 of df2 to var1 of df1
164  df2["v3"]>>df1["var1"];
165  //updating df1 in R's environment
166  r["df1"]<<df1;
167  r<<"print(df1)";
168  \endcode
169  Output
170  \code
171  var1 var2 var3 strings var4
172  1 v1 0.101 1 v1 -1
173  2 v2 0.202 2 v2 -2
174  3 v3 0.303 3 v3 -3
175  \endcode
176  <h2>Users Guide </h2>
177  <a href="http://oproject.org/tiki-index.php?page=ROOT+R+Users+Guide"> http://oproject.org/tiki-index.php?page=ROOT+R+Users+Guide</a><br>
178  <a href="https://root.cern.ch/drupal/content/how-use-r-root-root-r-interface"> https://root.cern.ch/drupal/content/how-use-r-root-root-r-interface</a>
179  @ingroup R
180  */
181 
182 
183  class TRDataFrame: public TObject {
184  friend class TRInterface;
185  friend SEXP Rcpp::wrap<TRDataFrame>(const TRDataFrame &f);
186  protected:
187  Rcpp::DataFrame df; //internal Rcpp::DataFrame
188  public:
189  //Proxy class to use operators for assignation Ex: df["name"]>>object
190  class Binding {
191  friend class TRDataFrame;
192  public:
193  /**
194  Construct a Binding nestead class for facilities with operators
195  \param _df Rcpp::DataFrame (internal from TDataFrame)
196  \param name string to use in assignations
197  */
198  Binding(Rcpp::DataFrame &_df, TString name): fName(name), fDf(_df) {}
199  /**
200  Copy constructor for Binding nestead class
201  \param obj object with Rcpp::DataFame objecta and string with name
202  */
203  Binding(const Binding &obj): fName(obj.fName), fDf(obj.fDf) {}
204  /**
205  template method for operator assignation
206  \param var any R wrappable datatype
207  */
208  template <class T> Binding operator=(T var)
209  {
210  int size = fDf.size(), i = 0 ;
211  Rcpp::CharacterVector names = fDf.attr("names");
212  bool found = false;
213  while (i < size) {
214  if (names[i] == fName.Data()) {
215  found = true;
216  break;
217  }
218  i++;
219  }
220  if (found) fDf[fName.Data()] = var;
221  else {
222  if (size == 0) {
223  fDf = Rcpp::DataFrame::create(ROOT::R::Label[fName.Data()] = var);
224  } else {
225  Rcpp::List nDf(size + 1);
226  Rcpp::CharacterVector nnames(size + 1);
227  for (i = 0; i < size; i++) {
228  nDf[i] = fDf[i] ;
229  nnames[i] = names[i];
230  }
231  nDf[size] = var;
232  nnames[size] = fName.Data();
233  nDf.attr("class") = fDf.attr("class") ;
234  nDf.attr("row.names") = fDf.attr("row.names") ;
235  nDf.attr("names") = nnames ;
236  fDf = nDf;
237  }
238  }
239  return *this;
240  }
241  /**
242  method for operator assignation of Binding class
243  \param obj other Binding object
244  */
246  {
247  int size = fDf.size(), i = 0 ;
248  Rcpp::CharacterVector names = fDf.attr("names");
249  bool found = false;
250  while (i < size) {
251  if (names[i] == fName.Data()) {
252  found = true;
253  break;
254  }
255  i++;
256  }
257  if (found) fDf[fName.Data()] = obj.fDf[obj.fName.Data()];
258  else {
259  Rcpp::List nDf(size + 1);
260  Rcpp::CharacterVector nnames(size + 1);
261  for (i = 0; i < size; i++) {
262  nDf[i] = obj.fDf[i] ;
263  nnames[i] = names[i];
264  }
265  nDf[size] = obj.fDf[obj.fName.Data()];
266  nnames[size] = fName.Data();
267 
268  nDf.attr("class") = obj.fDf.attr("class") ;
269  nDf.attr("row.names") = obj.fDf.attr("row.names") ;
270  nDf.attr("names") = nnames ;
271  fDf = nDf;
272  }
273 
274  return *this;
275  }
276 
277  /**
278  Template method for operator >> that lets to use dataframes like streams
279  example: df["v"]>>vector;
280  \param var any datatype that can be assigned from dataframe label
281  */
282  template <class T> Binding &operator >>(T &var)
283  {
284  var = Rcpp::as<T>(fDf[fName.Data()]);
285  return *this;
286  }
288  {
289  var.fDf[var.fName.Data()] = fDf[fName.Data()];
290  return var;
291  }
292 
293  /**
294  Template method for operator << that lets to use dataframes like streams
295  example: df["v"]<<vector;
296  \param var any datatype that can be assigned to dataframe label
297  */
298  template <class T> Binding &operator <<(T var)
299  {
300  int size = fDf.size(), i = 0 ;
301  Rcpp::CharacterVector names = fDf.attr("names");
302  bool found = false;
303  while (i < size) {
304  if (names[i] == fName.Data()) {
305  found = true;
306  break;
307  }
308  i++;
309  }
310  if (found) fDf[fName.Data()] = var;
311  else {
312  Rcpp::List nDf(size + 1);
313  Rcpp::CharacterVector nnames(size + 1);
314  for (i = 0; i < size; i++) {
315  nDf[i] = fDf[i] ;
316  nnames[i] = names[i];
317  }
318  nDf[size] = var;
319  nnames[size] = fName.Data();
320 
321  nDf.attr("class") = fDf.attr("class") ;
322  nDf.attr("row.names") = fDf.attr("row.names") ;
323  nDf.attr("names") = nnames ;
324  fDf = nDf;
325  }
326  return *this;
327  }
328  template <class T> operator T()
329  {
330  return Rcpp::as<T>(fDf[fName.Data()]);
331  }
332  template <class T> operator T() const
333  {
334  return Rcpp::as<T>(fDf[fName.Data()]);
335  }
336 
337  private:
338  TString fName; //name of label
339  Rcpp::DataFrame &fDf;//internal dataframe
340  };
341 
342  /**
343  Default TDataFrame constructor
344  */
345  TRDataFrame();
346  /**
347  TDataFrame constructor
348  \param obj raw R object that can be casted to DataFrame
349  */
351  {
352  df = Rcpp::as<Rcpp::DataFrame>(obj);
353  }
354  /**
355  TDataFrame copy constructor
356  \param _df other TRDataFrame
357  */
358  TRDataFrame(const TRDataFrame &_df);
359  /**
360  TDataFrame constructor for Rcpp::DataFrame
361  \param _df raw dataframe from Rcpp
362  */
363  TRDataFrame(const Rcpp::DataFrame &_df): df(_df) {};
364 
365 #include <TRDataFrame__ctors.h>
366 
367  Binding operator[](const TString &name);
368 
370  {
371  df = obj.df;
372  return *this;
373  }
374 
376  {
377  df = obj.df;
378  return *this;
379  }
380 
382  {
383  df = Rcpp::as<Rcpp::DataFrame>(obj);
384  return *this;
385  }
386 
387  operator SEXP()
388  {
389  return df;
390  }
391 
392  operator SEXP() const
393  {
394  return df;
395  }
396 
397  /**
398  Method to get the number of colunms
399  \return number of cols
400  */
401  int GetNcols()
402  {
403  return df.size();
404  }
405  /**
406  Method to get the number of rows
407  \return number of rows
408  */
409  int GetNrows()
410  {
411  return df.nrows();
412  }
413  /**
414  Method to get labels of dataframe
415  \return colunms names
416  */
418  {
419  Rcpp::CharacterVector names = df.attr("names");
420  TVectorString rnames(GetNcols());
421  for (int i = 0; i < GetNcols(); i++)rnames[i] = names[i];
422  return rnames;
423  }
424 
425  /**
426  Method to get dataframe as matrix
427  \note only work on numerical dataframes if some column if string or other it will fail
428  \return TMatrixT with a given tamplate data type
429  */
430  template<class T> TMatrixT<T> AsMatrix()
431  {
432  TRFunctionImport asMatrix("as.matrix");
433  return Rcpp::as<TMatrixT<T> >(asMatrix(df));
434  }
435 
436  /**
437  Method to print the dataframe in stdout or a column given the label
438  \param label nomber of the column to print
439  */
440  void Print(TString label = "")
441  {
442  TRFunctionImport print("print");
443  if (label == "") print(df);
444  else print(df[label.Data()]);
445  }
446  ClassDef(TRDataFrame, 0) //
447  };
448  }
449 }
450 
451 
452 
453 #endif
TVectorString GetColNames()
Method to get labels of dataframe.
Definition: TRDataFrame.h:417
Namespace for new ROOT classes and functions.
Definition: ROOT.py:1
double T(double x)
Definition: ChebyshevPol.h:34
Binding & operator<<(T var)
Template method for operator << that lets to use dataframes like streams example: df["v"]<
Definition: TRDataFrame.h:298
Binding operator=(Binding obj)
method for operator assignation of Binding class
Definition: TRDataFrame.h:245
TRDataFrame()
Default TDataFrame constructor.
void Print(TString label="")
Method to print the dataframe in stdout or a column given the label.
Definition: TRDataFrame.h:440
Basic string class.
Definition: TString.h:137
Binding operator[](const TString &name)
Definition: TRDataFrame.cxx:28
TMatrixT< T > AsMatrix()
Method to get dataframe as matrix.
Definition: TRDataFrame.h:430
void Binding()
Definition: Binding.C:21
Binding(Rcpp::DataFrame &_df, TString name)
Construct a Binding nestead class for facilities with operators.
Definition: TRDataFrame.h:198
TRDataFrame(SEXP obj)
TDataFrame constructor.
Definition: TRDataFrame.h:350
TRDataFrame & operator=(TRDataFrame obj)
Definition: TRDataFrame.h:375
const char * Data() const
Definition: TString.h:349
Rcpp::DataFrame df
Definition: TRDataFrame.h:187
#define ClassDef(name, id)
Definition: Rtypes.h:254
std::list< Elem > List
Definition: ModulekNN.h:107
Binding operator=(T var)
template method for operator assignation
Definition: TRDataFrame.h:208
Binding(const Binding &obj)
Copy constructor for Binding nestead class.
Definition: TRDataFrame.h:203
This is a class to pass functions from ROOT to R.
TRDataFrame & operator=(SEXP obj)
Definition: TRDataFrame.h:381
int GetNcols()
Method to get the number of colunms.
Definition: TRDataFrame.h:401
std::vector< TString > TVectorString
Definition: RExports.h:65
double f(double x)
Binding & operator>>(T &var)
Template method for operator >> that lets to use dataframes like streams example: df["v"]>>vector;...
Definition: TRDataFrame.h:282
void DataFrame()
Definition: DataFrame.C:9
#define name(a, b)
Definition: linkTestLib0.cpp:5
Mother of all ROOT objects.
Definition: TObject.h:58
int GetNrows()
Method to get the number of rows.
Definition: TRDataFrame.h:409
Rcpp::internal::NamedPlaceHolder Label
Definition: RExports.cxx:14
TObject * obj
TRDataFrame(const Rcpp::DataFrame &_df)
TDataFrame constructor for Rcpp::DataFrame.
Definition: TRDataFrame.h:363
TRandom3 R
a TMatrixD.
Definition: testIO.cxx:28
TRDataFrame & operator=(TRDataFrame &obj)
Definition: TRDataFrame.h:369
This is a class to create DataFrames from ROOT to R
Definition: TRDataFrame.h:183