diff --git a/refactor/main.py b/refactor/main.py
index bab9189..d043d76 100644
--- a/refactor/main.py
+++ b/refactor/main.py
@@ -46,7 +46,6 @@ def main(args):
 
     if args.bert_embedder:
         bertEmbedder = BertGen(multilingualIndex, batch_size=4, nepochs=10, gpus=args.gpus, n_jobs=args.n_jobs)
-        bertEmbedder.transform(lX)
         embedder_list.append(bertEmbedder)
 
     # Init DocEmbedderList (i.e., first-tier learners or view generators) and metaclassifier
diff --git a/refactor/models/pl_bert.py b/refactor/models/pl_bert.py
index 67f37f4..48f5b9a 100644
--- a/refactor/models/pl_bert.py
+++ b/refactor/models/pl_bert.py
@@ -22,8 +22,7 @@ class BertModel(pl.LightningModule):
         self.macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
         self.microK = CustomK(num_classes=output_size, average='micro', device=self.gpus)
         self.macroK = CustomK(num_classes=output_size, average='macro', device=self.gpus)
-        # Language specific metrics - I am not really sure if they should be initialized
-        # independently or we can use the metrics init above... # TODO: check it
+        # Language specific metrics to compute metrics at epoch level
         self.lang_macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
         self.lang_microF1 = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
         self.lang_macroK = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
@@ -71,7 +70,6 @@ class BertModel(pl.LightningModule):
         langs = set(langs)
         # outputs is a of n dicts of m elements, where n is equal to the number of epoch steps and m is batchsize.
         # here we save epoch level metric values and compute them specifically for each language
-        # TODO: make this a function (reused in pl_gru epoch_end)
         res_macroF1 = {lang: [] for lang in langs}
         res_microF1 = {lang: [] for lang in langs}
         res_macroK = {lang: [] for lang in langs}
diff --git a/refactor/models/pl_gru.py b/refactor/models/pl_gru.py
index ca4f8da..eaf7304 100644
--- a/refactor/models/pl_gru.py
+++ b/refactor/models/pl_gru.py
@@ -41,8 +41,7 @@ class RecurrentModel(pl.LightningModule):
         self.macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
         self.microK = CustomK(num_classes=output_size, average='micro', device=self.gpus)
         self.macroK = CustomK(num_classes=output_size, average='macro', device=self.gpus)
-        # Language specific metrics - I am not really sure if they should be initialized
-        # independently or we can use the metrics init above... # TODO: check it
+        # Language specific metrics to compute metrics at epoch level
         self.lang_macroF1 = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
         self.lang_microF1 = CustomF1(num_classes=output_size, average='micro', device=self.gpus)
         self.lang_macroK = CustomF1(num_classes=output_size, average='macro', device=self.gpus)
@@ -110,7 +109,6 @@ class RecurrentModel(pl.LightningModule):
     def encode(self, lX, l_pad, batch_size=128):
         """
         Returns encoded data (i.e, RNN hidden state at second feed-forward layer - linear1). Dimensionality is 512.
-        # TODO: does not run on gpu..
         :param lX:
         :param l_pad:
         :param batch_size:
@@ -167,7 +165,6 @@ class RecurrentModel(pl.LightningModule):
     def training_epoch_end(self, outputs):
         # outputs is a of n dicts of m elements, where n is equal to the number of epoch steps and m is batchsize.
         # here we save epoch level metric values and compute them specifically for each language
-        # TODO: this is horrible...
         res_macroF1 = {lang: [] for lang in self.langs}
         res_microF1 = {lang: [] for lang in self.langs}
         res_macroK = {lang: [] for lang in self.langs}
diff --git a/refactor/requirements.txt b/refactor/requirements.txt
new file mode 100644
index 0000000..4546a4a
--- /dev/null
+++ b/refactor/requirements.txt
@@ -0,0 +1,12 @@
+transformers==2.11.0
+pandas==0.25.3
+numpy==1.17.4
+joblib==0.14.0
+tqdm==4.50.2
+pytorch_lightning==1.1.2
+torch==1.3.1
+nltk==3.4.5
+scipy==1.3.3
+rdflib==4.2.2
+torchtext==0.4.0
+scikit_learn==0.24.1
diff --git a/refactor/util/pl_metrics.py b/refactor/util/pl_metrics.py
index 6781d09..9b44eb0 100644
--- a/refactor/util/pl_metrics.py
+++ b/refactor/util/pl_metrics.py
@@ -102,10 +102,10 @@ class CustomK(Metric):
             specificity, recall = 0., 0.
             absolute_negatives = self.true_negative.sum() + self.false_positive.sum()
             if absolute_negatives != 0:
-                specificity = self.true_negative.sum()/absolute_negatives # Todo check if it is float
+                specificity = self.true_negative.sum()/absolute_negatives
             absolute_positives = self.true_positive.sum() + self.false_negative.sum()
             if absolute_positives != 0:
-                recall = self.true_positive.sum()/absolute_positives # Todo check if it is float
+                recall = self.true_positive.sum()/absolute_positives
 
             if absolute_positives == 0:
                 return 2. * specificity - 1
@@ -125,10 +125,10 @@ class CustomK(Metric):
                 specificity, recall = 0., 0.
                 absolute_negatives = class_tn + class_fp
                 if absolute_negatives != 0:
-                    specificity = class_tn / absolute_negatives  # Todo check if it is float
+                    specificity = class_tn / absolute_negatives
                 absolute_positives = class_tp + class_fn
                 if absolute_positives != 0:
-                    recall = class_tp / absolute_positives  # Todo check if it is float
+                    recall = class_tp / absolute_positives
 
                 if absolute_positives == 0:
                     class_specific.append(2. * specificity - 1)
diff --git a/refactor/view_generators.py b/refactor/view_generators.py
index 2d82a20..e366d7d 100644
--- a/refactor/view_generators.py
+++ b/refactor/view_generators.py
@@ -1,18 +1,19 @@
 """
 This module contains the view generators that take care of computing the view specific document embeddings:
 
-- VanillaFunGen (-X) cast document representations encoded via TFIDF into posterior probabilities by means of SVM.
+- VanillaFunGen (-x) cast document representations encoded via TFIDF into posterior probabilities by means of SVM.
 
-- WordClassGen (-W): generates document representation via Word-Class-Embeddings.
+- WordClassGen (-w): generates document representation via Word-Class-Embeddings.
     Document embeddings are obtained via weighted sum of document's constituent embeddings.
 
-- MuseGen (-M):
+- MuseGen (-m): generates document representation via MUSE embeddings.
+    Document embeddings are obtained via weighted sum of document's constituent embeddings.
 
-- RecurrentGen (-G): generates document embedding by means of a Gated Recurrent Units. The model can be
+- RecurrentGen (-g): generates document embedding by means of a Gated Recurrent Units. The model can be
     initialized with different (multilingual/aligned) word representations (e.g., MUSE, WCE, ecc.,).
     Output dimension is (n_docs, 512).
 
-- View generator (-B): generates document embedding via mBERT model.
+- View generator (-b): generates document embedding via mBERT model.
 """
 from abc import ABC, abstractmethod
 from models.learners import *
@@ -153,9 +154,6 @@ class WordClassGen(ViewGen):
 
 
 class RecurrentGen(ViewGen):
-    # TODO: save model https://forums.pytorchlightning.ai/t/how-to-save-hparams-when-not-provided-as-argument-apparently-assigning-to-hparams-is-not-recomended/339/5
-    #  Problem: we are passing lPretrained to init the RecurrentModel -> incredible slow at saving (checkpoint).
-    #  if we do not save it is impossible to init RecurrentModel by calling RecurrentModel.load_from_checkpoint()
     def __init__(self, multilingualIndex, pretrained_embeddings, wce, batch_size=512, nepochs=50,
                  gpus=0, n_jobs=-1, stored_path=None):
         """