19from matplotlib
import use
22print(
"Loading dataframes...")
24df_train =
ROOT.RDataFrame(
"tree", data_dir +
"ml_dataloader_Higgs_Classification_train.root")
25df_val =
ROOT.RDataFrame(
"tree", data_dir +
"ml_dataloader_Higgs_Classification_val.root")
26df_test =
ROOT.RDataFrame(
"tree", data_dir +
"ml_dataloader_Higgs_Classification_test.root")
34 hidden_layers: list[int],
36 use_dropout: bool =
False,
37 use_batchnorm: bool =
True,
44 for out_dim
in hidden_layers:
63 x = self.output_layer(x)
68batches_in_memory = 1000
70columns = [
"m4l",
"good_lep",
"goodlep_E",
"goodlep_eta",
"goodlep_phi",
"goodlep_pt",
"goodlep_type",
"isHiggsRef"]
72max_vec_sizes = {
"good_lep": 4,
"goodlep_E": 4,
"goodlep_eta": 4,
"goodlep_phi": 4,
"goodlep_pt": 4,
"goodlep_type": 4}
77print(
"Normalizing data...")
78for var
in columns[:-1]:
91 for i
in range(max_vec_sizes[var]):
92 scalar_column = f
"{var}_{i}"
101 expr =
", ".join(f
"(({var}[{i}] - {means[i]}) / {stddevs[i]})" for i
in range(max_vec_sizes[var]))
108print(
"Creating dataloaders...")
111 batch_size=batch_size,
112 batches_in_memory=batches_in_memory,
113 drop_remainder=drop_remainder,
116 max_vec_sizes=max_vec_sizes,
122 batch_size=batch_size,
123 batches_in_memory=batches_in_memory,
124 drop_remainder=drop_remainder,
127 max_vec_sizes=max_vec_sizes,
133 batch_size=batch_size,
134 batches_in_memory=batches_in_memory,
135 drop_remainder=drop_remainder,
138 max_vec_sizes=max_vec_sizes,
148hidden_layers = [60, 60]
149model =
Classifier(num_features=num_features, hidden_layers=hidden_layers, p=0.2, use_dropout=
False)
155 print(f
"Epoch {epoch} summary ==> Validation loss: {val_loss:.2f}; Validation accuracy: {val_accuracy:.2f}")
159last_val_losses = [float(
"inf")] * 6
162print(
"Starting training...")
163for epoch
in range(epochs):
168 outputs = model(x_train)
169 loss =
loss_fn(outputs, y_train)
183 outputs = model(x_val)
187 preds = (outputs > 0.5).float()
188 val_correct += (preds == y_val).
sum().
item()
191 avg_val_loss = val_loss / (j + 1)
193 val_accuracy = val_correct / val_total
197 del last_val_losses[0]
200 if min(last_val_losses[-3:]) > max(last_val_losses[:3]):
201 print(f
"Validation loss has not improved for 6 epochs, stopping training after {epoch + 1} epochs.")
215 outputs = model(x_test)
216 loss =
loss_fn(outputs, y_test)
221 preds = (outputs > 0.5).float()
222 test_correct += (preds == y_test).
sum().
item()
225avg_test_loss = test_loss / (j + 1)
226test_accuracy = test_correct / test_total
228print(f
"Testing Loss: {avg_test_loss:.4f} Accuracy: {test_accuracy:.4f}\n")
240print(
"Loss curve saved to loss_curve.png")
250print(
"ROC curve saved to ROC_curve.png")
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
A "std::vector"-like collection of values implementing handy operation to analyse them.
static uint64_t sum(uint64_t i)