diff --git a/examples/0.basics.py b/examples/0.basics.py
index be18109..a5ce67d 100644
--- a/examples/0.basics.py
+++ b/examples/0.basics.py
@@ -37,7 +37,7 @@ classifier = LogisticRegression()
 pacc = qp.method.aggregative.PACC(classifier)
 
 print(f'training {pacc}')
-pacc.fit(train)
+pacc.fit(X, y)
 
 # let's now test our quantifier on the test data (of course, we should not use the test labels y at this point, only X)
 X_test = test.X
diff --git a/examples/1.model_selection.py b/examples/1.model_selection.py
index 61b7087..94225df 100644
--- a/examples/1.model_selection.py
+++ b/examples/1.model_selection.py
@@ -12,9 +12,11 @@ In this example, we show how to perform model selection on a DistributionMatchin
 model = DMy()
 
 qp.environ['SAMPLE_SIZE'] = 100
+qp.environ['N_JOBS'] = -1
 
 print(f'running model selection with N_JOBS={qp.environ["N_JOBS"]}; '
-      f'to increase the number of jobs use:\n> N_JOBS=-1 python3 1.model_selection.py\n'
+      f'to increase/decrease the number of jobs use:\n'
+      f'> N_JOBS=-1 python3 1.model_selection.py\n'
       f'alternatively, you can set this variable within the script as:\n'
       f'import quapy as qp\n'
       f'qp.environ["N_JOBS"]=-1')
@@ -50,6 +52,7 @@ with qp.util.temp_seed(0):
 
     tinit = time()
 
+    Xtr, ytr = training.Xy
     model = qp.model_selection.GridSearchQ(
         model=model,
         param_grid=param_grid,
@@ -58,7 +61,7 @@ with qp.util.temp_seed(0):
         refit=False,   # retrain on the whole labelled set once done
         # raise_errors=False,
         verbose=True  # show information as the process goes on
-    ).fit(training)
+    ).fit(Xtr, ytr)
 
 tend = time()
 
diff --git a/examples/2.custom_quantifier.py b/examples/2.custom_quantifier.py
index 4f6c627..09fa71f 100644
--- a/examples/2.custom_quantifier.py
+++ b/examples/2.custom_quantifier.py
@@ -4,6 +4,7 @@ from quapy.method.base import BinaryQuantifier, BaseQuantifier
 from quapy.model_selection import GridSearchQ
 from quapy.method.aggregative import AggregativeSoftQuantifier
 from quapy.protocol import APP
+import quapy.functional as F
 import numpy as np
 from sklearn.linear_model import LogisticRegression
 from time import time
@@ -32,10 +33,11 @@ class MyQuantifier(BaseQuantifier):
 
     # in general, we would need to implement the method fit(self, data: LabelledCollection, fit_classifier=True,
     # val_split=None); this would amount to:
-    def fit(self, data: LabelledCollection):
-        assert data.n_classes==2, \
+    def fit(self, X, y):
+        n_classes = F.num_classes_from_labels(y)
+        assert n_classes==2, \
             'this quantifier is only valid for binary problems [abort]'
-        self.classifier.fit(*data.Xy)
+        self.classifier.fit(X, y)
         return self
 
     # in general, we would need to implement the method quantify(self, instances); this would amount to:
@@ -57,6 +59,7 @@ class MyQuantifier(BaseQuantifier):
 # of the method, now adhering to the AggregativeSoftQuantifier:
 
 class MyAggregativeSoftQuantifier(AggregativeSoftQuantifier, BinaryQuantifier):
+
     def __init__(self, classifier, alpha=0.5):
         # aggregative quantifiers have an internal attribute called self.classifier
         self.classifier = classifier
@@ -68,7 +71,7 @@ class MyAggregativeSoftQuantifier(AggregativeSoftQuantifier, BinaryQuantifier):
     # k-fold cross validation strategy). What remains ahead is to learn an aggregation function. In our case
     # this amounts to doing... nothing, since our method was pretty basic. BinaryQuantifier also add some
     # basic functionality for checking binary consistency.
-    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
+    def aggregation_fit(self, classif_predictions, labels):
         pass
 
     # since this method is of type aggregative, we can simply implement the method aggregate (i.e., we should
@@ -94,7 +97,7 @@ if __name__ == '__main__':
     train, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test
     train, val = train.split_stratified(train_prop=0.75)  # let's create a validation set for optimizing hyperparams
 
-    def test_implementation(quantifier):
+    def try_implementation(quantifier):
         class_name = quantifier.__class__.__name__
         print(f'\ntesting implementation {class_name}...')
         # model selection
@@ -104,7 +107,7 @@ if __name__ == '__main__':
             'alpha': np.linspace(0, 1, 11),         # quantifier-dependent hyperparameter
             'classifier__C': np.logspace(-2, 2, 5)  # classifier-dependent hyperparameter
         }
-        gridsearch = GridSearchQ(quantifier, param_grid, protocol=APP(val), n_jobs=-1, verbose=False).fit(train)
+        gridsearch = GridSearchQ(quantifier, param_grid, protocol=APP(val), n_jobs=-1, verbose=True).fit(*train.Xy)
         t_modsel = time() - tinit
         print(f'\tmodel selection took {t_modsel:.2f}s', flush=True)
 
@@ -112,7 +115,7 @@ if __name__ == '__main__':
         optimized_model = gridsearch.best_model_
         mae = qp.evaluation.evaluate(
             optimized_model,
-            protocol=APP(test, repeats=5000, sanity_check=None),  # disable the check, we want to generate many tests!
+            protocol=APP(test, repeats=500, sanity_check=None),  # disable the check, we want to generate many tests!
             error_metric='mae',
             verbose=True)
 
@@ -121,11 +124,11 @@ if __name__ == '__main__':
 
     # define an instance of our custom quantifier and test it!
     quantifier = MyQuantifier(LogisticRegression(), alpha=0.5)
-    test_implementation(quantifier)
+    try_implementation(quantifier)
 
     # define an instance of our custom quantifier, with the second implementation, and test it!
     quantifier = MyAggregativeSoftQuantifier(LogisticRegression(), alpha=0.5)
-    test_implementation(quantifier)
+    try_implementation(quantifier)
 
     # the output should look like this:
     """
diff --git a/examples/3.custom_collection.py b/examples/3.custom_collection.py
new file mode 100644
index 0000000..e69de29
diff --git a/quapy/data/base.py b/quapy/data/base.py
index 72561e4..c22e895 100644
--- a/quapy/data/base.py
+++ b/quapy/data/base.py
@@ -318,6 +318,15 @@ class LabelledCollection:
         classes = np.unique(labels).sort()
         return LabelledCollection(instances, labels, classes=classes)
 
+    @property
+    def classes(self):
+        """
+        Gets an array-like with the classes used in this collection
+
+        :return: array-like
+        """
+        return self.classes_
+
     @property
     def Xy(self):
         """
diff --git a/quapy/functional.py b/quapy/functional.py
index b508d76..2e477e0 100644
--- a/quapy/functional.py
+++ b/quapy/functional.py
@@ -14,13 +14,22 @@ import numpy as np
 def classes_from_labels(labels):
     """
     Obtains a np.ndarray with the (sorted) classes
-    :param labels:
-    :return:
+    :param labels: array-like with the instances' labels
+    :return: a sorted np.ndarray with the class labels
     """
     classes = np.unique(labels)
     classes.sort()
     return classes
 
+
+def num_classes_from_labels(labels):
+    """
+    Obtains the number of classes from an array-like of instance's labels
+    :param labels: array-like with the instances' labels
+    :return: int, the number of classes
+    """
+    return len(classes_from_labels(labels))
+
 # ------------------------------------------------------------------------------------------
 # Counter utils
 # ------------------------------------------------------------------------------------------
diff --git a/quapy/model_selection.py b/quapy/model_selection.py
index 75828ac..c8183f2 100644
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@@ -109,7 +109,7 @@ class GridSearchQ(BaseQuantifier):
 
         def job(cls_params):
             model.set_params(**cls_params)
-            predictions = model.classifier_fit_predict(self._training)
+            predictions = model.classifier_fit_predict(self._training_X, self._training_y)
             return predictions
 
         predictions, status, took = self._error_handler(job, cls_params)
@@ -123,7 +123,8 @@ class GridSearchQ(BaseQuantifier):
 
         def job(q_params):
             model.set_params(**q_params)
-            model.aggregation_fit(predictions, self._training)
+            P, y = predictions
+            model.aggregation_fit(P, y)
             score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
             return score
 
@@ -136,7 +137,7 @@ class GridSearchQ(BaseQuantifier):
 
         def job(params):
             model.set_params(**params)
-            model.fit(self._training)
+            model.fit(self._training_X, self._training_y)
             score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error)
             return score
 
@@ -159,17 +160,19 @@ class GridSearchQ(BaseQuantifier):
             return False
         return True
 
-    def _compute_scores_aggregative(self, training):
+    def _compute_scores_aggregative(self, X, y):
         # break down the set of hyperparameters into two: classifier-specific, quantifier-specific
         cls_configs, q_configs = group_params(self.param_grid)
 
         # train all classifiers and get the predictions
-        self._training = training
+        self._training_X = X
+        self._training_y = y
         cls_outs = qp.util.parallel(
             self._prepare_classifier,
             cls_configs,
             seed=qp.environ.get('_R_SEED', None),
-            n_jobs=self.n_jobs
+            n_jobs=self.n_jobs,
+            asarray=False
         )
 
         # filter out classifier configurations that yielded any error
@@ -194,9 +197,10 @@ class GridSearchQ(BaseQuantifier):
 
         return aggr_outs
 
-    def _compute_scores_nonaggregative(self, training):
+    def _compute_scores_nonaggregative(self, X, y):
         configs = expand_grid(self.param_grid)
-        self._training = training
+        self._training_X = X
+        self._training_y = y
         scores = qp.util.parallel(
             self._prepare_nonaggr_model,
             configs,
@@ -211,11 +215,12 @@ class GridSearchQ(BaseQuantifier):
         else:
             self._sout(f'error={status}')
 
-    def fit(self, training: LabelledCollection):
+    def fit(self, X, y):
         """ Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
             the error metric.
 
-        :param training: the training set on which to optimize the hyperparameters
+        :param X: array-like, training covariates
+        :param y: array-like, labels of training data
         :return: self
         """
 
@@ -231,9 +236,9 @@ class GridSearchQ(BaseQuantifier):
 
         self._sout(f'starting model selection with n_jobs={self.n_jobs}')
         if self._break_down_fit():
-            results = self._compute_scores_aggregative(training)
+            results = self._compute_scores_aggregative(X, y)
         else:
-            results = self._compute_scores_nonaggregative(training)
+            results = self._compute_scores_nonaggregative(X, y)
 
         self.param_scores_ = {}
         self.best_score_ = None
@@ -266,7 +271,10 @@ class GridSearchQ(BaseQuantifier):
             if isinstance(self.protocol, OnLabelledCollectionProtocol):
                 tinit = time()
                 self._sout(f'refitting on the whole development set')
-                self.best_model_.fit(training + self.protocol.get_labelled_collection())
+                validation_collection = self.protocol.get_labelled_collection()
+                training_collection = LabelledCollection(X, y, classes=validation_collection.classes)
+                devel_collection = training_collection + validation_collection
+                self.best_model_.fit(*devel_collection.Xy)
                 tend = time() - tinit
                 self.refit_time_ = tend
             else: