bugfix in protocols, return_type='index' not working

2024-11-19 16:00:03 +01:00 · 2024-11-19 16:00:03 +01:00 · e6ae1e7d77
parent 24c28edfd9
commit e6ae1e7d77
5 changed files with 108 additions and 10 deletions
--- a/TODO.txt
+++ b/TODO.txt
@ -0,0 +1,3 @@
 - Test the return_type="index" in protocols and finish the "distributin_samples.py" example
 - Add EDy (an implementation is available at quantificationlib)
 -
--- a/examples/0.basics.py
+++ b/examples/0.basics.py
@ -33,10 +33,8 @@ import quapy.functional as F  # <- this module has some functional utilities, li
 print(f'training prevalence = {F.strprev(train.prevalence())}')
 # let us train one quantifier, for example, PACC using a sklearn's Logistic Regressor as the underlying classifier
-# classifier = LogisticRegression()
+classifier = LogisticRegression()
-
+pacc = qp.method.aggregative.PACC(classifier)
 # pacc = qp.method.aggregative.PACC(classifier)
 pacc = qp.method.aggregative.PACC()
 print(f'training {pacc}')
 pacc.fit(train)
--- a/examples/distributing_samples.py
+++ b/examples/distributing_samples.py
@ -0,0 +1,38 @@
 """
 Imagine we want to generate many samples out of a collection, that we want to distribute for others to run their
 own experiments in the very same test samples. One naive solution would come down to applying a given protocol to
 our collection (say the artificial prevalence protocol on the 'academic-success' UCI dataset), store all those samples
 on disk and make them available online. Distributing many such samples is undesirable.
 In this example, we generate the indexes that allow anyone to regenerate the samples out of the original collection.
 """
 import quapy as qp
 from quapy.method.aggregative import PACC
 from quapy.protocol import UPP
 data = qp.datasets.fetch_UCIMulticlassDataset('academic-success')
 train, test = data.train_test
 # let us train a quantifier to check whether we can actually replicate the results
 quantifier = PACC()
 quantifier.fit(train)
 # let us simulate our experimental results
 protocol = UPP(test, sample_size=100, repeats=100, random_state=0)
 our_mae = qp.evaluation.evaluate(quantifier, protocol=protocol, error_metric='mae')
 print(f'We have obtained a MAE={our_mae:.3f}')
 # let us distribute the indexes; we specify that we want the indexes, not the samples
 protocol = UPP(test, sample_size=100, repeats=100, random_state=0, return_type='index')
 indexes = protocol.samples_parameters()
 # Imagine we distribute the indexes; now we show how to replicate our experiments.
 from quapy.protocol import ProtocolFromIndex
 data = qp.datasets.fetch_UCIMulticlassDataset('academic-success')
 train, test = data.train_test
 protocol = ProtocolFromIndex(data=test, indexes=indexes)
 their_mae = qp.evaluation.evaluate(quantifier, protocol=protocol, error_metric='mae')
 print(f'Another lab obtains a MAE={our_mae:.3f}')
--- a/quapy/error.py
+++ b/quapy/error.py
@ -298,6 +298,31 @@ def nmd(prevs, prevs_hat):
    return (1./(n-1))*np.mean(match_distance(prevs, prevs_hat))
 def bias_binary(prevs, prevs_hat):
    """
    Computes the (positive) bias in a binary problem. The bias is simply the difference between the
    predicted positive value and the true positive value, so that a positive such value indicates the
    prediction has positive bias (i.e., it tends to overestimate) the true value, and negative otherwise.
    :math:`bias(p,\\hat{p})=\\hat{p}_1-p_1`,
    :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
    :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
        prevalence values
    :return: binary bias
    """
    assert prevs.shape[-1] == 2 and prevs.shape[-1] == 2, f'bias_binary can only be applied to binary problems'
    return prevs_hat[...,1]-prevs[...,1]
 def mean_bias_binary(prevs, prevs_hat):
    """
    Computes the mean of the (positive) bias in a binary problem.
    :param prevs: array-like of shape `(n_classes,)` with the true prevalence values
    :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
    :return: mean binary bias
    """
    return np.mean(bias_binary(prevs, prevs_hat))
 def md(prevs, prevs_hat, ERROR_TOL=1E-3):
    """
    Computes the Match Distance, under the assumption that the cost in mistaking class i with class i+1 is 1 in
@ -338,8 +363,8 @@ def __check_eps(eps=None):
 CLASSIFICATION_ERROR = {f1e, acce}
-QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld}
+QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld, mean_bias_binary}
-QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld}
+QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld, bias_binary}
 QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, nrae, mkld, mnkld, mrae}
 CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
 QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
--- a/quapy/protocol.py
+++ b/quapy/protocol.py
@ -1,4 +1,6 @@
 from copy import deepcopy
 from typing import Iterable
 import quapy as qp
 import numpy as np
 import itertools
@ -62,6 +64,36 @@ class IterateProtocol(AbstractProtocol):
        return len(self.samples)
 class ProtocolFromIndex(AbstractProtocol):
    """
    A protocol from a list of indexes
    :param data: a :class:`quapy.data.base.LabelledCollection`
    :param indexes: a list of indexes
    """
    def __init__(self, data: LabelledCollection, indexes: Iterable):
        self.data = data
        self.indexes = indexes
    def __call__(self):
        """
        Yields one sample at a time extracted using the indexes
        :return: yields a tuple `(sample, prev) at a time, where `sample` is a set of instances
            and in which `prev` is an `nd.array` with the class prevalence values
        """
        for index in self.indexes:
            yield self.data.sampling_from_index(index).Xp
    def total(self):
        """
        Returns the number of samples in this protocol
        :return: int
        """
        return len(self.indexes)
 class AbstractStochasticSeededProtocol(AbstractProtocol):
    """
    An `AbstractStochasticSeededProtocol` is a protocol that generates, via any random procedure (e.g.,
@ -124,9 +156,9 @@ class AbstractStochasticSeededProtocol(AbstractProtocol):
            if self.random_state is not None:
                stack.enter_context(qp.util.temp_seed(self.random_state))
            for params in self.samples_parameters():
-                yield self.collator(self.sample(params))
+                yield self.collator(self.sample(params), params)
-    def collator(self, sample, *args):
+    def collator(self, sample, params):
        """
        The collator prepares the sample to accommodate the desired output format before returning the output.
        This collator simply returns the sample as it is. Classes inheriting from this abstract class can
@ -191,9 +223,11 @@ class OnLabelledCollectionProtocol:
        assert return_type in cls.RETURN_TYPES, \
            f'unknown return type passed as argument; valid ones are {cls.RETURN_TYPES}'
        if return_type=='sample_prev':
-            return lambda lc:lc.Xp
+            return lambda lc,params:lc.Xp
        elif return_type=='labelled_collection':
-            return lambda lc:lc
+            return lambda lc,params:lc
        elif return_type=='index':
            return lambda lc,params:params
 class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol):