diff --git a/TODO.txt b/TODO.txt
index 0e3b6af..cbc7a9f 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -1,4 +1,5 @@
 Adapt examples; remaining: example 4-onwards
+not working: 4, 4b, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
 
 Add 'platt' to calib options in EMQ?
 
@@ -8,7 +9,7 @@ Update READMEs, wiki, & examples for new fit-predict interface
 
 Add the fix suggested by Alexander:
 
-For a more general application, I would maybe first stablish a per-class threshold value of plausible prevalence
+For a more general application, I would maybe first establish a per-class threshold value of plausible prevalence
 based on the number of actual positives and the required sample size; e.g., for sample_size=100 and actual
 positives [10, 100, 500] -> [0.1, 1.0, 1.0], meaning that class 0 can be sampled at most at 0.1 prevalence, while
 the others can be sampled up to 1. prevalence. Then, when a prevalence value is requested, e.g., [0.33, 0.33, 0.33],
diff --git a/examples/4.lequa2022_experiments.py b/examples/4.lequa2022_experiments.py
index 152f072..8bd9b09 100644
--- a/examples/4.lequa2022_experiments.py
+++ b/examples/4.lequa2022_experiments.py
@@ -37,7 +37,7 @@ quantifier = EMQ(classifier=LogisticRegression())
 param_grid = {
     'classifier__C': np.logspace(-3, 3, 7),          # classifier-dependent: inverse of regularization strength
     'classifier__class_weight': ['balanced', None],  # classifier-dependent: weights of each class
-    'calib': ['bcts', None]                 # quantifier-dependent: recalibration method (new in v0.1.7)
+    # 'calib': ['bcts', None]                 # quantifier-dependent: recalibration method (new in v0.1.7)
 }
 model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, error='mrae', refit=False, verbose=True)
 quantifier = model_selection.fit(Xtr, ytr)
@@ -51,4 +51,4 @@ report['estim-prev'] = report['estim-prev'].map(F.strprev)
 print(report)
 
 print('Averaged values:')
-print(report.mean())
+print(report.mean(numeric_only=True))
diff --git a/examples/5.explicit_loss_minimization.py b/examples/5.explicit_loss_minimization.py
index aee318e..b38728d 100644
--- a/examples/5.explicit_loss_minimization.py
+++ b/examples/5.explicit_loss_minimization.py
@@ -50,7 +50,7 @@ train_modsel, val = qp.datasets.fetch_twitter('hcr', for_model_selection=True, p
 model selection: 
 We explore the classifier's loss and the classifier's C hyperparameters.
 Since our model is actually an instance of OneVsAllAggregative, we need to add the prefix "binary_quantifier", and
-since our binary quantifier is an instance of CC, we need to add the prefix "classifier".
+since our binary quantifier is an instance of CC (an aggregative quantifier), we need to add the prefix "classifier".
 """
 param_grid = {
     'binary_quantifier__classifier__loss': ['q', 'kld', 'mae'],  # classifier-dependent hyperparameter
diff --git a/examples/6.quanet_example.py b/examples/6.quanet_example.py
index 55f065b..bbcad5d 100644
--- a/examples/6.quanet_example.py
+++ b/examples/6.quanet_example.py
@@ -20,11 +20,10 @@ train, test = dataset.train_test
 # train the text classifier:
 cnn_module = CNNnet(dataset.vocabulary_size, dataset.training.n_classes)
 cnn_classifier = NeuralClassifierTrainer(cnn_module, device='cuda')
-cnn_classifier.fit(*dataset.training.Xy)
 
 # train QuaNet (alternatively, we can set fit_classifier=True and let QuaNet train the classifier)
 quantifier = QuaNet(cnn_classifier, device='cuda')
-quantifier.fit(train, fit_classifier=False)
+quantifier.fit(*train.Xy)
 
 # prediction and evaluation
 estim_prevalence = quantifier.predict(test.instances)
diff --git a/examples/7.uci_experiments.py b/examples/7.uci_experiments.py
index 0623d14..b565ab2 100644
--- a/examples/7.uci_experiments.py
+++ b/examples/7.uci_experiments.py
@@ -50,7 +50,7 @@ def quantification_models():
     yield 'MAX', MAX(newLR()), lr_params
     yield 'MS', MS(newLR()), lr_params
     yield 'MS2', MS2(newLR()), lr_params
-    yield 'sldc', EMQ(newLR(), calib='platt'), lr_params
+    yield 'sldc', EMQ(newLR()), lr_params
     yield 'svmmae', newSVMAE(), svmperf_params
     yield 'hdy', HDy(newLR()), lr_params
 
@@ -98,8 +98,8 @@ def run(experiment):
         print(f'running dataset={dataset_name} model={model_name} loss={optim_loss} run={run+1}/5')
         # model selection (hyperparameter optimization for a quantification-oriented loss)
         train, test = data.train_test
-        train, val = train.split_stratified()
         if hyperparams is not None:
+            train, val = train.split_stratified()
             model_selection = qp.model_selection.GridSearchQ(
                 deepcopy(model),
                 param_grid=hyperparams,
@@ -109,11 +109,11 @@ def run(experiment):
                 timeout=60*60,
                 verbose=True
             )
-            model_selection.fit(train)
+            model_selection.fit(*train.Xy)
             model = model_selection.best_model()
             best_params = model_selection.best_params_
         else:
-            model.fit(data.training)
+            model.fit(*train.Xy)
             best_params = {}
 
         # model evaluation
@@ -121,19 +121,19 @@ def run(experiment):
             model,
             protocol=APP(test, n_prevalences=21, repeats=100)
         )
-        test_true_prevalence = data.test.prevalence()
+        test_true_prevalence = test.prevalence()
 
         evaluate_experiment(true_prevalences, estim_prevalences)
         save_results(dataset_name, model_name, run, optim_loss,
                      true_prevalences, estim_prevalences,
-                     data.training.prevalence(), test_true_prevalence,
+                     train.prevalence(), test_true_prevalence,
                      best_params)
 
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
-    parser.add_argument('results', metavar='RESULT_PATH', type=str,
-                        help='path to the directory where to store the results')
+    parser.add_argument('--results', metavar='RESULT_PATH', type=str,
+                        help='path to the directory where to store the results', default='./uci_results')
     parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='../svm_perf_quantification',
                         help='path to the directory with svmperf')
     parser.add_argument('--checkpointdir', metavar='PATH', type=str, default='./checkpoint',
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 055aee4..9b85650 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -193,7 +193,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
         """
         Trains the aggregation function.
 
-        :param classif_predictions: array-like with  the classification predictions
+        :param classif_predictions: array-like with the classification predictions
             (whatever the method :meth:`classify` returns)
         :param labels: array-like with the true labels associated to each classifier prediction
         """
@@ -1401,7 +1401,7 @@ class OneVsAllAggregative(OneVsAllGeneric, AggregativeQuantifier):
         """
         If the base quantifier is not probabilistic, returns a matrix of shape `(n,m,)` with `n` the number of
         instances and `m` the number of classes. The entry `(i,j)` is a binary value indicating whether instance
-        `i `belongs to class `j`. The binary classifications are independent of each other, meaning that an instance
+        `i` belongs to class `j`. The binary classifications are independent of each other, meaning that an instance
         can end up be attributed to 0, 1, or more classes.
         If the base quantifier is probabilistic, returns a matrix of shape `(n,m,2)` with `n` the number of instances
         and `m` the number of classes. The entry `(i,j,1)` (resp. `(i,j,0)`) is a value in [0,1] indicating the
@@ -1422,6 +1422,10 @@ class OneVsAllAggregative(OneVsAllGeneric, AggregativeQuantifier):
         prevalences = self._parallel(self._delayed_binary_aggregate, classif_predictions)
         return F.normalize_prevalence(prevalences)
 
+    def aggregation_fit(self, classif_predictions, labels):
+        self._parallel(self._delayed_binary_aggregate_fit(c, classif_predictions, labels))
+        return self
+
     def _delayed_binary_classification(self, c, X):
         return self.dict_binary_quantifiers[c].classify(X)
 
@@ -1429,6 +1433,10 @@ class OneVsAllAggregative(OneVsAllGeneric, AggregativeQuantifier):
         # the estimation for the positive class prevalence
         return self.dict_binary_quantifiers[c].aggregate(classif_predictions[:, c])[1]
 
+    def _delayed_binary_aggregate_fit(self, c, classif_predictions, labels):
+        # trains the aggregation function of the cth quantifier
+        return self.dict_binary_quantifiers[c].aggregate_fit(classif_predictions[:, c], labels)
+
 
 class AggregativeMedianEstimator(BinaryQuantifier):
     """
diff --git a/quapy/method/base.py b/quapy/method/base.py
index 1d7ad34..85a0525 100644
--- a/quapy/method/base.py
+++ b/quapy/method/base.py
@@ -89,18 +89,18 @@ class OneVsAllGeneric(OneVsAll, BaseQuantifier):
         self.binary_quantifier = binary_quantifier
         self.n_jobs = qp._get_njobs(n_jobs)
 
-    def fit(self, data: LabelledCollection, fit_classifier=True):
-        assert not data.binary, f'{self.__class__.__name__} expect non-binary data'
-        assert fit_classifier == True, 'fit_classifier must be True'
+    def fit(self, X, y):
+        self.classes = sorted(np.unique(y))
+        assert len(self.classes)!=2, f'{self.__class__.__name__} expect non-binary data'
 
-        self.dict_binary_quantifiers = {c: deepcopy(self.binary_quantifier) for c in data.classes_}
-        self._parallel(self._delayed_binary_fit, data)
+        self.dict_binary_quantifiers = {c: deepcopy(self.binary_quantifier) for c in self.classes}
+        self._parallel(self._delayed_binary_fit, X, y)
         return self
 
     def _parallel(self, func, *args, **kwargs):
         return np.asarray(
             Parallel(n_jobs=self.n_jobs, backend='threading')(
-                delayed(func)(c, *args, **kwargs) for c in self.classes_
+                delayed(func)(c, *args, **kwargs) for c in self.classes
             )
         )
 
@@ -108,13 +108,13 @@ class OneVsAllGeneric(OneVsAll, BaseQuantifier):
         prevalences = self._parallel(self._delayed_binary_predict, X)
         return qp.functional.normalize_prevalence(prevalences)
 
-    @property
-    def classes_(self):
-        return sorted(self.dict_binary_quantifiers.keys())
+    # @property
+    # def classes_(self):
+    #     return sorted(self.dict_binary_quantifiers.keys())
 
     def _delayed_binary_predict(self, c, X):
         return self.dict_binary_quantifiers[c].predict(X)[1]
 
-    def _delayed_binary_fit(self, c, data):
-        bindata = LabelledCollection(data.instances, data.labels == c, classes=[False, True])
-        self.dict_binary_quantifiers[c].fit(bindata)
+    def _delayed_binary_fit(self, c, X, y):
+        bindata = LabelledCollection(X, y == c, classes=[False, True])
+        self.dict_binary_quantifiers[c].fit(*bindata.Xy)