diff --git a/TODO.txt b/TODO.txt
index 3b66cd5..7e2766e 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -4,6 +4,15 @@ Documentation with sphinx
 Document methods with paper references
 unit-tests
 
+Refactor:
+==========================================
+Unify ThresholdOptimization methods, as an extension of PACC (and not ACC), the fit methods are almost identical and
+    use a prob classifier (take into account that PACC uses pcc internally, whereas the threshold methods use cc
+    instead). The fit method of ACC and PACC has a block for estimating the validation estimates that should be unified
+    as well...
+Rename APP NPP
+Add NPP as an option for GridSearchQ
+
 New features:
 ==========================================
 Add NAE, NRAE
@@ -21,6 +30,7 @@ Add automatic reindex of class labels in LabelledCollection (currently, class in
 OVR I believe is currently tied to aggregative methods. We should provide a general interface also for general quantifiers
 Currently, being "binary" only adds one checker; we should figure out how to impose the check to be automatically performed
 Add random seed management to support replicability (see temp_seed in util.py).
+GridSearchQ is not trully parallelized. It only parallelizes on the predictions.
 
 Improvements:
 ==========================================
@@ -34,6 +44,7 @@ We might want to think of (improving and) adding the class Tabular (it is define
     to generate tables is typically a bad idea, but in this specific case we do have pretty good control of what an
     experiment looks like. (Do we want to abstract experimental results? this could be useful not only for tables but
     also for plots).
+Add proper logging system. Currently we use print
 
 Checks:
 ==========================================
diff --git a/quapy/data/base.py b/quapy/data/base.py
index ffd7e31..5234d8d 100644
--- a/quapy/data/base.py
+++ b/quapy/data/base.py
@@ -88,12 +88,12 @@ class LabelledCollection:
 
         return indexes_sample
 
-    # def uniform_sampling_index(self, size):
-    #     return np.random.choice(len(self), size, replace=False)
+    def uniform_sampling_index(self, size):
+        return np.random.choice(len(self), size, replace=False)
 
-    # def uniform_sampling(self, size):
-    #     unif_index = self.uniform_sampling_index(size)
-    #     return self.sampling_from_index(unif_index)
+    def uniform_sampling(self, size):
+        unif_index = self.uniform_sampling_index(size)
+        return self.sampling_from_index(unif_index)
 
     def sampling(self, size, *prevs, shuffle=True):
         prev_index = self.sampling_index(size, *prevs, shuffle=shuffle)
diff --git a/quapy/evaluation.py b/quapy/evaluation.py
index 1fb5517..ebdb537 100644
--- a/quapy/evaluation.py
+++ b/quapy/evaluation.py
@@ -1,7 +1,6 @@
 from typing import Union, Callable, Iterable
 
 import numpy as np
-from joblib import Parallel, delayed
 from tqdm import tqdm
 
 import quapy as qp
@@ -12,8 +11,7 @@ import quapy.functional as F
 import pandas as pd
 
 
-
-def artificial_sampling_prediction(
+def artificial_prevalence_prediction(
         model: BaseQuantifier,
         test: LabelledCollection,
         sample_size,
@@ -51,7 +49,7 @@ def artificial_sampling_prediction(
     return _predict_from_indexes(indexes, model, test, n_jobs, verbose)
 
 
-def natural_sampling_prediction(
+def natural_prevalence_prediction(
         model: BaseQuantifier,
         test: LabelledCollection,
         sample_size,
@@ -117,7 +115,7 @@ def _predict_from_indexes(
     return true_prevalences, estim_prevalences
 
 
-def artificial_sampling_report(
+def artificial_prevalence_report(
         model: BaseQuantifier,
         test: LabelledCollection,
         sample_size,
@@ -129,13 +127,13 @@ def artificial_sampling_report(
         error_metrics:Iterable[Union[str,Callable]]='mae',
         verbose=False):
 
-    true_prevs, estim_prevs = artificial_sampling_prediction(
+    true_prevs, estim_prevs = artificial_prevalence_prediction(
         model, test, sample_size, n_prevpoints, n_repetitions, eval_budget, n_jobs, random_seed, verbose
     )
-    return _sampling_report(true_prevs, estim_prevs, error_metrics)
+    return _prevalence_report(true_prevs, estim_prevs, error_metrics)
 
 
-def natural_sampling_report(
+def natural_prevalence_report(
         model: BaseQuantifier,
         test: LabelledCollection,
         sample_size,
@@ -145,13 +143,13 @@ def natural_sampling_report(
         error_metrics:Iterable[Union[str,Callable]]='mae',
         verbose=False):
 
-    true_prevs, estim_prevs = natural_sampling_prediction(
+    true_prevs, estim_prevs = natural_prevalence_prediction(
         model, test, sample_size, n_repetitions, n_jobs, random_seed, verbose
     )
-    return _sampling_report(true_prevs, estim_prevs, error_metrics)
+    return _prevalence_report(true_prevs, estim_prevs, error_metrics)
 
 
-def _sampling_report(
+def _prevalence_report(
         true_prevs,
         estim_prevs,
         error_metrics: Iterable[Union[str, Callable]] = 'mae'):
@@ -173,7 +171,8 @@ def _sampling_report(
 
     return df
 
-def artificial_sampling_eval(
+
+def artificial_prevalence_protocol(
         model: BaseQuantifier,
         test: LabelledCollection,
         sample_size,
@@ -190,14 +189,14 @@ def artificial_sampling_eval(
 
     assert hasattr(error_metric, '__call__'), 'invalid error function'
 
-    true_prevs, estim_prevs = artificial_sampling_prediction(
+    true_prevs, estim_prevs = artificial_prevalence_prediction(
         model, test, sample_size, n_prevpoints, n_repetitions, eval_budget, n_jobs, random_seed, verbose
     )
 
     return error_metric(true_prevs, estim_prevs)
 
 
-def natural_sampling_eval(
+def natural_prevalence_protocol(
         model: BaseQuantifier,
         test: LabelledCollection,
         sample_size,
@@ -212,7 +211,7 @@ def natural_sampling_eval(
 
     assert hasattr(error_metric, '__call__'), 'invalid error function'
 
-    true_prevs, estim_prevs = natural_sampling_prediction(
+    true_prevs, estim_prevs = natural_prevalence_prediction(
         model, test, sample_size, n_repetitions, n_jobs, random_seed, verbose
     )
 
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 6294e1b..151dd2e 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -513,6 +513,170 @@ class SVMRAE(ELM):
         super(SVMRAE, self).__init__(svmperf_base, loss='mrae', **kwargs)
 
 
+class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier):
+
+    def __init__(self, learner: BaseEstimator, val_split=0.4):
+        self.learner = learner
+        self.val_split = val_split
+
+    @abstractmethod
+    def optimize_threshold(self, y, probabilities):
+        ...
+
+    def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, int, LabelledCollection] = None):
+        BinaryQuantifier._check_binary(data, "Threshold Optimization")
+
+        if val_split is None:
+            val_split = self.val_split
+        if isinstance(val_split, int):
+            # kFCV estimation of parameters
+            y, probabilities = [], []
+            kfcv = StratifiedKFold(n_splits=val_split)
+            pbar = tqdm(kfcv.split(*data.Xy), total=val_split)
+            for k, (training_idx, validation_idx) in enumerate(pbar):
+                pbar.set_description(f'{self.__class__.__name__} fitting fold {k}')
+                training = data.sampling_from_index(training_idx)
+                validation = data.sampling_from_index(validation_idx)
+                learner, val_data = training_helper(self.learner, training, fit_learner, val_split=validation)
+                probabilities.append(learner.predict_proba(val_data.instances))
+                y.append(val_data.labels)
+
+            y = np.concatenate(y)
+            probabilities = np.concatenate(probabilities)
+
+            # fit the learner on all data
+            self.learner, _ = training_helper(self.learner, data, fit_learner, val_split=None)
+
+        else:
+            self.learner, val_data = training_helper(self.learner, data, fit_learner, val_split=val_split)
+            probabilities = self.learner.predict_proba(val_data.instances)
+            y = val_data.labels
+
+        self.cc = CC(self.learner)
+
+        self.tpr, self.fpr = self.optimize_threshold(y, probabilities)
+
+        return self
+
+    @abstractmethod
+    def _condition(self, tpr, fpr) -> float:
+        """
+        Implements the criterion according to which the threshold should be selected.
+        This function should return a (float) score to be minimized.
+        """
+        ...
+
+    def optimize_threshold(self, y, probabilities):
+        best_candidate_threshold_score = None
+        best_tpr = 0
+        best_fpr = 0
+        candidate_thresholds = np.unique(probabilities[:, 1])
+        for candidate_threshold in candidate_thresholds:
+            y_ = [self.classes_[1] if p > candidate_threshold else self.classes_[0] for p in probabilities[:, 1]]
+            TP, FP, FN, TN = self.compute_table(y, y_)
+            tpr = self.compute_tpr(TP, FP)
+            fpr = self.compute_fpr(FP, TN)
+            condition_score = self._condition(tpr, fpr)
+            if best_candidate_threshold_score is None or condition_score < best_candidate_threshold_score:
+                best_candidate_threshold_score = condition_score
+                best_tpr = tpr
+                best_fpr = fpr
+
+        return best_tpr, best_fpr
+
+    def aggregate(self, classif_predictions):
+        prevs_estim = self.cc.aggregate(classif_predictions)
+        if self.tpr - self.fpr == 0:
+            return prevs_estim
+        adjusted_prevs_estim = np.clip((prevs_estim[1] - self.fpr) / (self.tpr - self.fpr), 0, 1)
+        adjusted_prevs_estim = np.array((1 - adjusted_prevs_estim, adjusted_prevs_estim))
+        return adjusted_prevs_estim
+
+    def compute_table(self, y, y_):
+        TP = np.logical_and(y == y_, y == self.classes_[1]).sum()
+        FP = np.logical_and(y != y_, y == self.classes_[0]).sum()
+        FN = np.logical_and(y != y_, y == self.classes_[1]).sum()
+        TN = np.logical_and(y == y_, y == self.classes_[0]).sum()
+        return TP, FP, FN, TN
+
+    def compute_tpr(self, TP, FP):
+        if TP + FP == 0:
+            return 0
+        return TP / (TP + FP)
+
+    def compute_fpr(self, FP, TN):
+        if FP + TN == 0:
+            return 0
+        return FP / (FP + TN)
+
+
+class T50(ThresholdOptimization):
+
+    def __init__(self, learner: BaseEstimator, val_split=0.4):
+        super().__init__(learner, val_split)
+
+    def _condition(self, tpr, fpr) -> float:
+        return abs(tpr - 0.5)
+
+
+class MAX(ThresholdOptimization):
+
+    def __init__(self, learner: BaseEstimator, val_split=0.4):
+        super().__init__(learner, val_split)
+
+    def _condition(self, tpr, fpr) -> float:
+        # MAX strives to maximize (tpr - fpr), which is equivalent to minimize (fpr - tpr)
+        return (fpr - tpr)
+
+
+class X(ThresholdOptimization):
+
+    def __init__(self, learner: BaseEstimator, val_split=0.4):
+        super().__init__(learner, val_split)
+
+    def _condition(self, tpr, fpr) -> float:
+        return abs(1 - (tpr + fpr))
+
+
+class MS(ThresholdOptimization):
+
+    def __init__(self, learner: BaseEstimator, val_split=0.4):
+        super().__init__(learner, val_split)
+
+    def optimize_threshold(self, y, probabilities):
+        tprs = []
+        fprs = []
+        candidate_thresholds = np.unique(probabilities[:, 1])
+        for candidate_threshold in candidate_thresholds:
+            y_ = [self.classes_[1] if p > candidate_threshold else self.classes_[0] for p in probabilities[:, 1]]
+            TP, FP, FN, TN = self.compute_table(y, y_)
+            tpr = self.compute_tpr(TP, FP)
+            fpr = self.compute_fpr(FP, TN)
+            tprs.append(tpr)
+            fprs.append(fpr)
+        return np.median(tprs), np.median(fprs)
+
+
+class MS2(MS):
+
+    def __init__(self, learner: BaseEstimator, val_split=0.4):
+        super().__init__(learner, val_split)
+
+    def optimize_threshold(self, y, probabilities):
+        tprs = [0, 1]
+        fprs = [0, 1]
+        candidate_thresholds = np.unique(probabilities[:, 1])
+        for candidate_threshold in candidate_thresholds:
+            y_ = [self.classes_[1] if p > candidate_threshold else self.classes_[0] for p in probabilities[:, 1]]
+            TP, FP, FN, TN = self.compute_table(y, y_)
+            tpr = self.compute_tpr(TP, FP)
+            fpr = self.compute_fpr(FP, TN)
+            if (tpr - fpr) > 0.25:
+                tprs.append(tpr)
+                fprs.append(fpr)
+        return np.median(tprs), np.median(fprs)
+
+
 ClassifyAndCount = CC
 AdjustedClassifyAndCount = ACC
 ProbabilisticClassifyAndCount = PCC
@@ -520,6 +684,8 @@ ProbabilisticAdjustedClassifyAndCount = PACC
 ExpectationMaximizationQuantifier = EMQ
 HellingerDistanceY = HDy
 ExplicitLossMinimisation = ELM
+MedianSweep = MS
+MedianSweep2 = MS2
 
 
 class OneVsAll(AggregativeQuantifier):
diff --git a/quapy/model_selection.py b/quapy/model_selection.py
index f78a576..3fe9bb6 100644
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@@ -5,7 +5,7 @@ from typing import Union, Callable
 
 import quapy as qp
 from quapy.data.base import LabelledCollection
-from quapy.evaluation import artificial_sampling_prediction
+from quapy.evaluation import artificial_prevalence_prediction, natural_prevalence_prediction
 from quapy.method.aggregative import BaseQuantifier
 
 
@@ -15,6 +15,7 @@ class GridSearchQ(BaseQuantifier):
                  model: BaseQuantifier,
                  param_grid: dict,
                  sample_size: int,
+                 protocol='app',
                  n_prevpoints: int = None,
                  n_repetitions: int = 1,
                  eval_budget: int = None,
@@ -29,15 +30,15 @@ class GridSearchQ(BaseQuantifier):
         Optimizes the hyperparameters of a quantification method, based on an evaluation method and on an evaluation
         protocol for quantification.
         :param model: the quantifier to optimize
-        :param training: the training set on which to optimize the hyperparameters
-        :param validation: either a LabelledCollection on which to test the performance of the different settings, or
-        a float in [0,1] indicating the proportion of labelled data to extract from the training set
         :param param_grid: a dictionary with keys the parameter names and values the list of values to explore for
-        that particular parameter
         :param sample_size: the size of the samples to extract from the validation set
+        that particular parameter
+        :param protocol: either 'app' for the artificial prevalence protocol, or 'npp' for the natural prevalence
+        protocol
         :param n_prevpoints: if specified, indicates the number of equally distant point to extract from the interval
         [0,1] in order to define the prevalences of the samples; e.g., if n_prevpoints=5, then the prevalences for
-        each class will be explored in [0.00, 0.25, 0.50, 0.75, 1.00]. If not specified, then eval_budget is requested
+        each class will be explored in [0.00, 0.25, 0.50, 0.75, 1.00]. If not specified, then eval_budget is requested.
+        Ignored if protocol='npp'.
         :param n_repetitions: the number of repetitions for each combination of prevalences. This parameter is ignored
         if eval_budget is set and is lower than the number of combinations that would be generated using the value
         assigned to n_prevpoints (for the current number of classes and n_repetitions)
@@ -45,10 +46,13 @@ class GridSearchQ(BaseQuantifier):
         combination. For example, if there are 3 classes, n_repetitions=1 and eval_budget=20, then n_prevpoints will be
         set to 5, since this will generate 15 different prevalences:
          [0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]
+        Ignored if protocol='npp'.
         :param error: an error function (callable) or a string indicating the name of an error function (valid ones
         are those in qp.error.QUANTIFICATION_ERROR
         :param refit: whether or not to refit the model on the whole labelled collection (training+validation) with
         the best chosen hyperparameter combination
+        :param val_split: either a LabelledCollection on which to test the performance of the different settings, or
+        a float in [0,1] indicating the proportion of labelled data to extract from the training set
         :param n_jobs: number of parallel jobs
         :param random_seed: set the seed of the random generator to replicate experiments
         :param timeout: establishes a timer (in seconds) for each of the hyperparameters configurations being tested.
@@ -59,6 +63,7 @@ class GridSearchQ(BaseQuantifier):
         self.model = model
         self.param_grid = param_grid
         self.sample_size = sample_size
+        self.protocol = protocol.lower()
         self.n_prevpoints = n_prevpoints
         self.n_repetitions = n_repetitions
         self.eval_budget = eval_budget
@@ -69,6 +74,19 @@ class GridSearchQ(BaseQuantifier):
         self.timeout = timeout
         self.verbose = verbose
         self.__check_error(error)
+        assert self.protocol in {'app', 'npp'}, \
+            'unknown protocol; valid ones are "app" or "npp" for the "artificial" or the "natural" prevalence protocols'
+        if self.protocol == 'npp':
+            if self.n_repetitions is None or self.n_repetitions == 1:
+                if self.eval_budget is not None:
+                    print(f'[warning] when protocol=="npp" the parameter n_repetitions should be indicated '
+                          f'(and not eval_budget). Setting n_repetitions={self.eval_budget}...')
+                    self.n_repetitions = self.eval_budget
+                else:
+                    raise ValueError(f'when protocol=="npp" the parameter n_repetitions should be indicated '
+                                     f'(and should be >1).')
+            if self.n_prevpoints is not None:
+                print('[warning] n_prevpoints has been set along with the npp protocol, and will be ignored')
 
     def sout(self, msg):
         if self.verbose:
@@ -83,7 +101,7 @@ class GridSearchQ(BaseQuantifier):
             return training, validation
         else:
             raise ValueError(f'"validation" must either be a LabelledCollection or a float in (0,1) indicating the'
-                             f'proportion of training documents to extract (found) {type(validation)}')
+                             f'proportion of training documents to extract (type found: {type(validation)})')
 
     def __check_error(self, error):
         if error in qp.error.QUANTIFICATION_ERROR:
@@ -96,6 +114,27 @@ class GridSearchQ(BaseQuantifier):
             raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n'
                              f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}')
 
+    def __generate_predictions(self, model, val_split):
+        commons = {
+            'n_repetitions': self.n_repetitions,
+            'n_jobs': self.n_jobs,
+            'random_seed': self.random_seed,
+            'verbose': False
+        }
+        if self.protocol == 'app':
+            return artificial_prevalence_prediction(
+                model, val_split, self.sample_size,
+                n_prevpoints=self.n_prevpoints,
+                eval_budget=self.eval_budget,
+                **commons
+            )
+        elif self.protocol == 'npp':
+            return natural_prevalence_prediction(
+                model, val_split, self.sample_size,
+                **commons)
+        else:
+            raise ValueError('unknown protocol')
+
     def fit(self, training: LabelledCollection, val_split: Union[LabelledCollection, float] = None):
         """
         :param training: the training set on which to optimize the hyperparameters
@@ -134,16 +173,7 @@ class GridSearchQ(BaseQuantifier):
                 # overrides default parameters with the parameters being explored at this iteration
                 model.set_params(**params)
                 model.fit(training)
-                true_prevalences, estim_prevalences = artificial_sampling_prediction(
-                    model, val_split, self.sample_size,
-                    n_prevpoints=self.n_prevpoints,
-                    n_repetitions=self.n_repetitions,
-                    eval_budget=self.eval_budget,
-                    n_jobs=n_jobs,
-                    random_seed=self.random_seed,
-                    verbose=False
-                )
-
+                true_prevalences, estim_prevalences = self.__generate_predictions(model, val_split)
                 score = self.error(true_prevalences, estim_prevalences)
                 self.sout(f'checking hyperparams={params} got {self.error.__name__} score {score:.5f}')
                 if self.best_score_ is None or score < self.best_score_:
@@ -173,6 +203,7 @@ class GridSearchQ(BaseQuantifier):
         return self
 
     def quantify(self, instances):
+        assert hasattr(self, 'best_model_'), 'quantify called before fit'
         return self.best_model_.quantify(instances)
 
     @property