Merge branch 'devel' of github.com:HLT-ISTI/QuaPy into devel

2024-11-29 10:57:14 +01:00 · 2024-11-29 10:57:14 +01:00 · 1c733f3d77
parent b59d8cbc9e 20343920dc
commit 1c733f3d77
13 changed files with 595 additions and 21 deletions
--- a/CHANGE_LOG.txt
+++ b/CHANGE_LOG.txt
@ -1,3 +1,12 @@
+Change Log 0.1.10
+-----------------
+
+- Added (aggregative) bootstrap for deriving confidence regions (confidence intervals, ellipses in the simplex, or
+    ellipses in the CLR space). This method is efficient as it leverages the two-phases of the aggregative quantifiers.
+    This method applies resampling only to the aggregation phase, thus avoiding to train many quantifiers, or
+    classify multiple times the instances of a sample. See the new example no. 15.
+
+
 Change Log 0.1.9
 ----------------

--- a/TODO.txt
+++ b/TODO.txt
@ -1,3 +1,6 @@
+- [TODO] adapt BayesianCC to WithConfidence interface
+- [TODO] Test the return_type="index" in protocols and finish the "distributin_samples.py" example
+- [TODO] Add EDy (an implementation is available at quantificationlib)
 - [TODO] add ensemble methods SC-MQ, MC-SQ, MC-MQ
 - [TODO] add HistNetQ
 - [TODO] add CDE-iteration and Bayes-CDE methods
--- a/examples/0.basics.py
+++ b/examples/0.basics.py
@ -33,10 +33,8 @@ import quapy.functional as F  # <- this module has some functional utilities, li
 print(f'training prevalence = {F.strprev(train.prevalence())}')

 # let us train one quantifier, for example, PACC using a sklearn's Logistic Regressor as the underlying classifier
-# classifier = LogisticRegression()
-
-# pacc = qp.method.aggregative.PACC(classifier)
-pacc = qp.method.aggregative.PACC()
+classifier = LogisticRegression()
+pacc = qp.method.aggregative.PACC(classifier)

 print(f'training {pacc}')
 pacc.fit(train)
--- a/examples/15.confidence_regions.py
+++ b/examples/15.confidence_regions.py
@ -0,0 +1,78 @@
+from quapy.method.confidence import AggregativeBootstrap
+from quapy.method.aggregative import PACC
+import quapy.functional as F
+import quapy as qp
+
+"""
+Just like any other type of estimator, quantifier predictions are affected by error. It is therefore useful to provide,
+along with the point estimate (the class prevalence values) a measure of uncertainty. These, typically come in the 
+form of credible regions around the point estimate. 
+
+QuaPy implements a method for deriving confidence regions around point estimates of class prevalence based on bootstrap.
+
+Bootstrap method comes down to resampling the population several times, thus generating a series of point estimates.
+QuaPy provides a variant of bootstrap for aggregative quantifiers, that only applies resampling to the pre-classified
+instances.
+
+Let see one example:  
+"""
+
+# load some data
+data = qp.datasets.fetch_UCIMulticlassDataset('molecular')
+train, test = data.train_test
+
+# by simply wrapping an aggregative quantifier within the AggregativeBootstrap class, we can obtain confidence
+# intervals around the point estimate, in this case, at 95% of confidence
+pacc = AggregativeBootstrap(PACC(), confidence_level=0.95)
+
+with qp.util.temp_seed(0):
+    # we train the quantifier the usual way
+    pacc.fit(train)
+
+    # let us simulate some shift in the test data
+    random_prevalence = F.uniform_prevalence_sampling(n_classes=test.n_classes)
+    shifted_test = test.sampling(200, *random_prevalence)
+    true_prev = shifted_test.prevalence()
+
+    # by calling "quantify_conf", we obtain the point estimate and the confidence intervals around it
+    pred_prev, conf_intervals = pacc.quantify_conf(shifted_test.X)
+
+    # conf_intervals is an instance of ConfidenceRegionABC, which provides some useful utilities like:
+    # - coverage: a function which computes the fraction of true values that belong to the confidence region
+    # - simplex_proportion: estimates the proportion of the simplex covered by the confidence region (amplitude)
+    # ideally, we are interested in obtaining confidence regions with high level of coverage and small amplitude
+
+    # the point estimate is computed as the mean of all bootstrap predictions; let us see the prediction error
+    error = qp.error.ae(true_prev, pred_prev)
+
+    # some useful outputs
+    print(f'train prevalence: {F.strprev(train.prevalence())}')
+    print(f'test prevalence:  {F.strprev(true_prev)}')
+    print(f'point-estimate:   {F.strprev(pred_prev)}')
+    print(f'absolute error:   {error:.3f}')
+    print(f'Is the true value in the confidence region?: {conf_intervals.coverage(true_prev)==1}')
+    print(f'Proportion of simplex covered at {pacc.confidence_level*100:.1f}%: {conf_intervals.simplex_portion()*100:.2f}%')
+
+"""
+Final remarks: 
+There are various ways for performing bootstrap:
+- the population-based approach (default): performs resampling of the test instances
+    e.g., use  AggregativeBootstrap(PACC(), n_train_samples=1, n_test_samples=100, confidence_level=0.95)
+- the model-based approach: performs resampling of the training instances, thus training several quantifiers
+    e.g., use  AggregativeBootstrap(PACC(), n_train_samples=100, n_test_samples=1, confidence_level=0.95)
+    this implementation avoids retraining the classifier, and performs resampling only to train different aggregation functions 
+- the combined approach: a combination of the above
+    e.g., use  AggregativeBootstrap(PACC(), n_train_samples=100, n_test_samples=100, confidence_level=0.95)
+    this example will generate 100 x 100 predictions
+    
+There are different ways for constructing confidence regions implemented in QuaPy:
+- confidence intervals: the simplest way, and one that typically works well in practice
+    use: AggregativeBootstrap(PACC(), confidence_level=0.95, method='intervals')
+- confidence ellipse in the simplex: creates an ellipse, which lies on the probability simplex, around the point estimate
+    use: AggregativeBootstrap(PACC(), confidence_level=0.95, method='ellipse')
+- confidence ellipse in the Centered-Log Ratio (CLR) space: creates an ellipse in the CLR space (this should be 
+    convenient for taking into account the inner structure of the probability simplex)
+    use: AggregativeBootstrap(PACC(), confidence_level=0.95, method='ellipse-clr')
+"""
+
+
--- a/examples/distributing_samples.py
+++ b/examples/distributing_samples.py
@ -0,0 +1,38 @@
+"""
+Imagine we want to generate many samples out of a collection, that we want to distribute for others to run their
+own experiments in the very same test samples. One naive solution would come down to applying a given protocol to
+our collection (say the artificial prevalence protocol on the 'academic-success' UCI dataset), store all those samples
+on disk and make them available online. Distributing many such samples is undesirable.
+In this example, we generate the indexes that allow anyone to regenerate the samples out of the original collection.
+"""
+
+import quapy as qp
+from quapy.method.aggregative import PACC
+from quapy.protocol import UPP
+
+data = qp.datasets.fetch_UCIMulticlassDataset('academic-success')
+train, test = data.train_test
+
+# let us train a quantifier to check whether we can actually replicate the results
+quantifier = PACC()
+quantifier.fit(train)
+
+# let us simulate our experimental results
+protocol = UPP(test, sample_size=100, repeats=100, random_state=0)
+our_mae = qp.evaluation.evaluate(quantifier, protocol=protocol, error_metric='mae')
+
+print(f'We have obtained a MAE={our_mae:.3f}')
+
+# let us distribute the indexes; we specify that we want the indexes, not the samples
+protocol = UPP(test, sample_size=100, repeats=100, random_state=0, return_type='index')
+indexes = protocol.samples_parameters()
+
+# Imagine we distribute the indexes; now we show how to replicate our experiments.
+from quapy.protocol import ProtocolFromIndex
+data = qp.datasets.fetch_UCIMulticlassDataset('academic-success')
+train, test = data.train_test
+protocol = ProtocolFromIndex(data=test, indexes=indexes)
+their_mae = qp.evaluation.evaluate(quantifier, protocol=protocol, error_metric='mae')
+
+print(f'Another lab obtains a MAE={our_mae:.3f}')
+
--- a/examples/ensembles.py
+++ b/examples/ensembles.py
@ -0,0 +1,36 @@
+from sklearn.linear_model import LogisticRegression
+from statsmodels.sandbox.distributions.genpareto import quant
+
+import quapy as qp
+from quapy.protocol import UPP
+from quapy.method.aggregative import PACC, DMy, EMQ, KDEyML
+from quapy.method.meta import SCMQ
+
+qp.environ["SAMPLE_SIZE"]=100
+
+def train_and_test_model(quantifier, train, test):
+    quantifier.fit(train)
+    report = qp.evaluation.evaluation_report(quantifier, UPP(test), error_metrics=['mae', 'mrae'])
+    print(quantifier.__class__.__name__)
+    print(report.mean(numeric_only=True))
+
+
+quantifiers = [
+    PACC(),
+    DMy(),
+    EMQ(),
+    KDEyML()
+]
+
+classifier = LogisticRegression()
+
+dataset_name = qp.datasets.UCI_MULTICLASS_DATASETS[0]
+data = qp.datasets.fetch_UCIMulticlassDataset(dataset_name)
+train, test = data.train_test
+
+scmq = SCMQ(classifier, quantifiers)
+
+train_and_test_model(scmq, train, test)
+
+for quantifier in quantifiers:
+    train_and_test_model(quantifier, train, test)
--- a/quapy/init.py
+++ b/quapy/init.py
@ -14,7 +14,7 @@ from . import model_selection
 from . import classification
 import os

-__version__ = '0.1.9'
+__version__ = '0.1.10'

 environ = {
    'SAMPLE_SIZE': None,
--- a/quapy/error.py
+++ b/quapy/error.py
@ -298,6 +298,31 @@ def nmd(prevs, prevs_hat):
    return (1./(n-1))*np.mean(match_distance(prevs, prevs_hat))


+def bias_binary(prevs, prevs_hat):
+    """
+    Computes the (positive) bias in a binary problem. The bias is simply the difference between the
+    predicted positive value and the true positive value, so that a positive such value indicates the
+    prediction has positive bias (i.e., it tends to overestimate) the true value, and negative otherwise.
+    :math:`bias(p,\\hat{p})=\\hat{p}_1-p_1`,
+    :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
+    :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
+        prevalence values
+    :return: binary bias
+    """
+    assert prevs.shape[-1] == 2 and prevs.shape[-1] == 2, f'bias_binary can only be applied to binary problems'
+    return prevs_hat[...,1]-prevs[...,1]
+
+
+def mean_bias_binary(prevs, prevs_hat):
+    """
+    Computes the mean of the (positive) bias in a binary problem.
+    :param prevs: array-like of shape `(n_classes,)` with the true prevalence values
+    :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
+    :return: mean binary bias
+    """
+    return np.mean(bias_binary(prevs, prevs_hat))
+
+
 def md(prevs, prevs_hat, ERROR_TOL=1E-3):
    """
    Computes the Match Distance, under the assumption that the cost in mistaking class i with class i+1 is 1 in
@ -338,8 +363,8 @@ def __check_eps(eps=None):


 CLASSIFICATION_ERROR = {f1e, acce}
-QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld}
-QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld}
+QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld, mean_bias_binary}
+QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld, bias_binary}
 QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, nrae, mkld, mnkld, mrae}
 CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
 QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -591,7 +591,6 @@ class PACC(AggregativeSoftQuantifier):
        if self.norm not in ACC.NORMALIZATIONS:
            raise ValueError(f"unknown normalization; valid ones are {ACC.NORMALIZATIONS}")

-
    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
        """
        Estimates the misclassification rates
@ -870,13 +869,13 @@ class BayesianCC(AggregativeCrispQuantifier):
        :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data
        """
        pred_labels, true_labels = classif_predictions.Xy
-        self._n_and_c_labeled = confusion_matrix(y_true=true_labels, y_pred=pred_labels, labels=self.classifier.classes_)
+        self._n_and_c_labeled = confusion_matrix(y_true=true_labels, y_pred=pred_labels, labels=self.classifier.classes_).astype(float)

    def sample_from_posterior(self, classif_predictions):
        if self._n_and_c_labeled is None:
            raise ValueError("aggregation_fit must be called before sample_from_posterior")

-        n_c_unlabeled = F.counts_from_labels(classif_predictions, self.classifier.classes_)
+        n_c_unlabeled = F.counts_from_labels(classif_predictions, self.classifier.classes_).astype(float)

        self._samples = _bayesian.sample_posterior(
            n_c_unlabeled=n_c_unlabeled,
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@ -0,0 +1,291 @@
+from functools import cached_property
+import numpy as np
+import quapy as qp
+import quapy.functional as F
+from quapy.data import LabelledCollection
+from quapy.method.aggregative import AggregativeQuantifier
+from scipy.stats import chi2
+from scipy.special import gamma
+from sklearn.utils import resample
+from abc import ABC, abstractmethod
+from scipy.special import softmax, factorial
+import copy
+from functools import lru_cache
+
+
+
+class ConfidenceRegionABC(ABC):
+
+    @abstractmethod
+    def point_estimate(self) -> np.ndarray:
+        ...
+
+    def ndim(self):
+        return len(self.point_estimate())
+
+    @abstractmethod
+    def coverage(self, true_value):
+        ...
+
+    @lru_cache
+    def simplex_portion(self):
+        return self.montecarlo_proportion()
+
+    @lru_cache
+    def montecarlo_proportion(self, n_trials=10_000):
+        with qp.util.temp_seed(0):
+            uniform_simplex = F.uniform_simplex_sampling(n_classes=self.ndim(), size=n_trials)
+        proportion = np.clip(self.coverage(uniform_simplex), 0., 1.)
+        return proportion
+
+
+class WithConfidenceABC(ABC):
+    @abstractmethod
+    def quantify_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
+        ...
+
+
+def simplex_volume(n):
+    return 1 / factorial(n)
+
+
+def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
+    """
+    Checks the proportion of values that belong to the ellipse with center `mean` and precision matrix `prec_matrix`
+    at a distance `chi2_critical`.
+
+    :param values: a np.ndarray with shape (ndim,) or (n_values,ndim,)
+    :param mean: a np.ndarray with the mean of the sample
+    :param prec_matrix: a np.ndarray with the precision matrix (inverse of the
+        covariance matrix) of the sample. If this inverse cannot be computed
+        then None must be passed
+    :param chi2_critical: the chi2 critical value
+
+    :return: the fraction of values that are contained in the ellipse
+        defined by the mean, the precision matrix, and the chi2_critical.
+        If values is only one value, then either 0 (not contained) or
+        1 (contained) is returned.
+    """
+    if prec_matrix is None:
+        return 0.
+
+    diff = values - mean  # Mahalanobis distance
+
+    d_M_squared = diff @ prec_matrix @ diff.T  # d_M^2
+    if d_M_squared.ndim == 2:
+        d_M_squared = np.diag(d_M_squared)
+
+    within_elipse = (d_M_squared <= chi2_critical)
+
+    if isinstance(within_elipse, np.ndarray):
+        within_elipse = np.mean(within_elipse)
+
+    return within_elipse * 1.0
+
+
+class ConfidenceEllipseSimplex(ConfidenceRegionABC):
+
+    def __init__(self, X, confidence_level=0.95):
+
+        assert 0. < confidence_level < 1., f'{confidence_level=} must be in range(0,1)'
+
+        X = np.asarray(X)
+
+        self.mean_ = X.mean(axis=0)
+        self.cov_ = np.cov(X, rowvar=False, ddof=1)
+
+        try:
+            self.precision_matrix_ = np.linalg.inv(self.cov_)
+        except:
+            self.precision_matrix_ = None
+
+        self.dim = X.shape[-1]
+        self.ddof = self.dim - 1
+
+        # critical chi-square value
+        self.confidence_level = confidence_level
+        self.chi2_critical_ = chi2.ppf(confidence_level, df=self.ddof)
+
+    def point_estimate(self):
+        return self.mean_
+
+    def coverage(self, true_value):
+        """
+        true_value can be an array (n_dimensions,) or a matrix (n_vectors, n_dimensions,)
+        confidence_level None means that the confidence_level is taken from the __init__
+        returns true or false depending on whether true_value is in the ellipse or not,
+            or returns the proportion of true_values that are within the ellipse if more
+            than one are passed
+        """
+        return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_)
+
+
+class ConfidenceEllipseCLR(ConfidenceRegionABC):
+
+    def __init__(self, X, confidence_level=0.95):
+        self.clr = CLRtransformation()
+        Z = self.clr(X)
+        self.mean_ = np.mean(X, axis=0)
+        self.conf_region_clr = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
+
+    def point_estimate(self):
+        # Z_mean = self.conf_region_clr.mean()
+        # return self.clr.inverse(Z_mean)
+        # the inverse of the CLR does not coincide with the clean mean because the geometric mean
+        # requires smoothing the prevalence vectors and this affects the softmax (inverse)
+        return self.mean_
+
+    def coverage(self, true_value):
+        """
+        true_value can be an array (n_dimensions,) or a matrix (n_vectors, n_dimensions,)
+        confidence_level None means that the confidence_level is taken from the __init__
+        returns true or false depending on whether true_value is in the ellipse or not,
+            or returns the proportion of true_values that are within the ellipse if more
+            than one are passed
+        """
+        transformed_values = self.clr(true_value)
+        return self.conf_region_clr.coverage(transformed_values)
+
+
+class ConfidenceIntervals(ConfidenceRegionABC):
+
+    def __init__(self, X, confidence_level=0.95):
+        assert 0 < confidence_level < 1, f'{confidence_level=} must be in range(0,1)'
+
+        X = np.asarray(X)
+
+        self.means_ = X.mean(axis=0)
+        self.I_low, self.I_high = np.percentile(X, q=[2.5, 97.5], axis=0)
+
+    def point_estimate(self):
+        return self.means_
+
+    def coverage(self, true_value):
+        """
+        true_value can be an array (n_dimensions,) or a matrix (n_vectors, n_dimensions,)
+        returns true or false depending on whether true_value is in the ellipse or not,
+            or returns the proportion of true_values that are within the ellipse if more
+            than one are passed
+        """
+        within_intervals = np.logical_and(self.I_low <= true_value, true_value <= self.I_high)
+        within_all_intervals = np.all(within_intervals, axis=-1, keepdims=True)
+        proportion = within_all_intervals.mean()
+
+        return proportion
+
+
+class CLRtransformation:
+    """
+    Centered log-ratio
+    """
+
+    def __call__(self, X, epsilon=1e-6):
+        X = np.asarray(X)
+        X = qp.error.smooth(X, epsilon)
+        G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True))  # geometric mean
+        return np.log(X / G)
+
+    def inverse(self, X):
+        return softmax(X, axis=-1)
+
+
+class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
+
+    METHODS = ['intervals', 'ellipse', 'ellipse-clr']
+
+    def __init__(self,
+                 quantifier: AggregativeQuantifier,
+                 n_train_samples=1,
+                 n_test_samples=500,
+                 confidence_level=0.95,
+                 method='intervals',
+                 random_state=None):
+
+        assert isinstance(quantifier, AggregativeQuantifier), \
+            f'base quantifier does not seem to be an instance of {AggregativeQuantifier.__name__}'
+        assert n_train_samples >= 1, \
+            f'{n_train_samples=} must be >= 1'
+        assert n_test_samples >= 1, \
+            f'{n_test_samples=} must be >= 1'
+        assert n_test_samples>1 or n_train_samples>1, \
+            f'either {n_test_samples=} or {n_train_samples=} must be >1'
+        assert method in self.METHODS, \
+            f'unknown method; valid ones are {self.METHODS}'
+
+        self.quantifier = quantifier
+        self.n_train_samples = n_train_samples
+        self.n_test_samples = n_test_samples
+        self.confidence_level = confidence_level
+        self.method = method
+        self.random_state = random_state
+
+    def _return_conf(self, prevs, confidence_level):
+        region = None
+        if self.method == 'intervals':
+            region = ConfidenceIntervals(prevs, confidence_level=confidence_level)
+        elif self.method == 'ellipse':
+            region = ConfidenceEllipseSimplex(prevs, confidence_level=confidence_level)
+        elif self.method == 'ellipse-clr':
+            region = ConfidenceEllipseCLR(prevs, confidence_level=confidence_level)
+
+        if region is None:
+            raise NotImplementedError(f'unknown method {self.method}')
+
+        return region
+
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
+        self.quantifiers = []
+        if self.n_train_samples==1:
+            self.quantifier.aggregation_fit(classif_predictions, data)
+            self.quantifiers.append(self.quantifier)
+        else:
+            # model-based bootstrap (only on the aggregative part)
+            full_index = np.arange(len(data))
+            with qp.util.temp_seed(self.random_state):
+                for i in range(self.n_train_samples):
+                    quantifier = copy.deepcopy(self.quantifier)
+                    index = resample(full_index, n_samples=len(data))
+                    classif_predictions_i = classif_predictions.sampling_from_index(index)
+                    data_i = data.sampling_from_index(index)
+                    quantifier.aggregation_fit(classif_predictions_i, data_i)
+                    self.quantifiers.append(quantifier)
+        return self
+
+    def aggregate(self, classif_predictions: np.ndarray):
+        prev_mean, self.confidence = self.aggregate_conf(classif_predictions)
+        return prev_mean
+
+    def aggregate_conf(self, classif_predictions: np.ndarray, confidence_level=None):
+        if confidence_level is None:
+            confidence_level = self.confidence_level
+
+        n_samples = classif_predictions.shape[0]
+        prevs = []
+        with qp.util.temp_seed(self.random_state):
+            for quantifier in self.quantifiers:
+                for i in range(self.n_test_samples):
+                    sample_i = resample(classif_predictions, n_samples=n_samples)
+                    prev_i = quantifier.aggregate(sample_i)
+                    prevs.append(prev_i)
+
+        conf = self._return_conf(prevs, confidence_level)
+        prev_estim = conf.point_estimate()
+
+        return prev_estim, conf
+
+    def fit(self, data: LabelledCollection, fit_classifier=True, val_split=None):
+        self.quantifier._check_init_parameters()
+        classif_predictions = self.quantifier.classifier_fit_predict(data, fit_classifier, predict_on=val_split)
+        self.aggregation_fit(classif_predictions, data)
+        return self
+
+    def quantify_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
+        predictions = self.quantifier.classify(instances)
+        return self.aggregate_conf(predictions, confidence_level=confidence_level)
+
+    @property
+    def classifier(self):
+        return self.quantifier.classifier
+
+    def _classifier_method(self):
+        return self.quantifier._classifier_method()
--- a/quapy/method/meta.py
+++ b/quapy/method/meta.py
@ -1,6 +1,6 @@
 import itertools
 from copy import deepcopy
-from typing import Union
+from typing import Union, List
 import numpy as np
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import f1_score, make_scorer, accuracy_score
@ -12,7 +12,7 @@ from quapy import functional as F
 from quapy.data import LabelledCollection
 from quapy.model_selection import GridSearchQ
 from quapy.method.base import BaseQuantifier, BinaryQuantifier
-from quapy.method.aggregative import CC, ACC, PACC, HDy, EMQ, AggregativeQuantifier
+from quapy.method.aggregative import CC, ACC, PACC, HDy, EMQ, AggregativeQuantifier, AggregativeSoftQuantifier

 try:
    from . import _neural
@ -691,3 +691,66 @@ def EEMQ(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs):
    """

    return ensembleFactory(classifier, EMQ, param_grid, optim, param_mod_sel, **kwargs)
+
+
+class SCMQ(AggregativeSoftQuantifier):
+
+    MERGE_FUNCTIONS = ['median']
+
+    def __init__(self, classifier, quantifiers: List[AggregativeSoftQuantifier], merge_fun='median', val_split=5):
+        self.classifier = classifier
+        self.quantifiers = quantifiers
+        assert merge_fun in self.MERGE_FUNCTIONS, f'unknwon {merge_fun=}, valid ones are {self.MERGE_FUNCTIONS}'
+        self.merge_fun = merge_fun
+        self.val_split = val_split
+
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
+        for quantifier in self.quantifiers:
+            quantifier.classifier = self.classifier
+            quantifier.aggregation_fit(classif_predictions, data)
+        return self
+
+    def aggregate(self, classif_predictions: np.ndarray):
+        prev_predictions = []
+        for quantifier_i in self.quantifiers:
+            prevalence_i = quantifier_i.aggregate(classif_predictions)
+            prev_predictions.append(prevalence_i)
+        return self.merge(prev_predictions)
+
+    def merge(self, prev_predictions):
+        prev_predictions = np.asarray(prev_predictions)
+        if self.merge_fun == 'median':
+            prevalences = np.median(prev_predictions, axis=0)
+            prevalences = F.normalize_prevalence(prevalences, method='l1')
+        elif self.merge_fun == 'mean':
+            prevalences = np.mean(prev_predictions, axis=0)
+        else:
+            raise NotImplementedError(f'merge function {self.merge_fun} not implemented!')
+        return prevalences
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/quapy/protocol.py
+++ b/quapy/protocol.py
@ -1,4 +1,6 @@
 from copy import deepcopy
+from typing import Iterable
+
 import quapy as qp
 import numpy as np
 import itertools
@ -62,6 +64,36 @@ class IterateProtocol(AbstractProtocol):
        return len(self.samples)


+class ProtocolFromIndex(AbstractProtocol):
+    """
+    A protocol from a list of indexes
+
+    :param data: a :class:`quapy.data.base.LabelledCollection`
+    :param indexes: a list of indexes
+    """
+    def __init__(self, data: LabelledCollection, indexes: Iterable):
+        self.data = data
+        self.indexes = indexes
+
+    def __call__(self):
+        """
+        Yields one sample at a time extracted using the indexes
+
+        :return: yields a tuple `(sample, prev) at a time, where `sample` is a set of instances
+            and in which `prev` is an `nd.array` with the class prevalence values
+        """
+        for index in self.indexes:
+            yield self.data.sampling_from_index(index).Xp
+
+    def total(self):
+        """
+        Returns the number of samples in this protocol
+
+        :return: int
+        """
+        return len(self.indexes)
+
+
 class AbstractStochasticSeededProtocol(AbstractProtocol):
    """
    An `AbstractStochasticSeededProtocol` is a protocol that generates, via any random procedure (e.g.,
@ -124,9 +156,9 @@ class AbstractStochasticSeededProtocol(AbstractProtocol):
            if self.random_state is not None:
                stack.enter_context(qp.util.temp_seed(self.random_state))
            for params in self.samples_parameters():
-                yield self.collator(self.sample(params))
+                yield self.collator(self.sample(params), params)

-    def collator(self, sample, *args):
+    def collator(self, sample, params):
        """
        The collator prepares the sample to accommodate the desired output format before returning the output.
        This collator simply returns the sample as it is. Classes inheriting from this abstract class can
@ -191,9 +223,11 @@ class OnLabelledCollectionProtocol:
        assert return_type in cls.RETURN_TYPES, \
            f'unknown return type passed as argument; valid ones are {cls.RETURN_TYPES}'
        if return_type=='sample_prev':
-            return lambda lc:lc.Xp
+            return lambda lc,params:lc.Xp
        elif return_type=='labelled_collection':
-            return lambda lc:lc
+            return lambda lc,params:lc
+        elif return_type=='index':
+            return lambda lc,params:params


 class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol):
--- a/quapy/tests/test_modsel.py
+++ b/quapy/tests/test_modsel.py
@ -25,7 +25,7 @@ class ModselTestCase(unittest.TestCase):
        param_grid = {'classifier__C': [0.000001, 10.]}
        app = APP(validation, sample_size=100, random_state=1)
        q = GridSearchQ(
-            q, param_grid, protocol=app, error='mae', refit=True, timeout=-1, verbose=True
+            q, param_grid, protocol=app, error='mae', refit=False, timeout=-1, verbose=True, n_jobs=-1
        ).fit(training)
        print('best params', q.best_params_)
        print('best score', q.best_score_)
@ -39,9 +39,9 @@ class ModselTestCase(unittest.TestCase):
        obtains the same optimal parameters
        """

-        q = PACC(LogisticRegression(random_state=1, max_iter=5000))
+        q = PACC(LogisticRegression(random_state=1, max_iter=500))

-        data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(n_train=500, random_state=1)
+        data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=50).reduce(n_train=500, random_state=1)
        training, validation = data.training.split_stratified(0.7, random_state=1)

        param_grid = {'classifier__C': np.logspace(-3,3,7)}
@ -50,7 +50,7 @@ class ModselTestCase(unittest.TestCase):
        print('starting model selection in sequential exploration')
        tinit = time.time()
        modsel = GridSearchQ(
-            q, param_grid, protocol=app, error='mae', refit=True, timeout=-1, n_jobs=1, verbose=True
+            q, param_grid, protocol=app, error='mae', refit=False, timeout=-1, n_jobs=1, verbose=True
        ).fit(training)
        tend_seq = time.time()-tinit
        best_c_seq = modsel.best_params_['classifier__C']
@ -59,7 +59,7 @@ class ModselTestCase(unittest.TestCase):
        print('starting model selection in parallel exploration')
        tinit = time.time()
        modsel = GridSearchQ(
-            q, param_grid, protocol=app, error='mae', refit=True, timeout=-1, n_jobs=-1, verbose=True
+            q, param_grid, protocol=app, error='mae', refit=False, timeout=-1, n_jobs=-1, verbose=True
        ).fit(training)
        tend_par = time.time() - tinit
        best_c_par = modsel.best_params_['classifier__C']