bugfix in protocols, return_type='index' not working
This commit is contained in:
parent
24c28edfd9
commit
e6ae1e7d77
3
TODO.txt
3
TODO.txt
|
|
@ -0,0 +1,3 @@
|
||||||
|
- Test the return_type="index" in protocols and finish the "distributin_samples.py" example
|
||||||
|
- Add EDy (an implementation is available at quantificationlib)
|
||||||
|
-
|
||||||
|
|
@ -33,10 +33,8 @@ import quapy.functional as F # <- this module has some functional utilities, li
|
||||||
print(f'training prevalence = {F.strprev(train.prevalence())}')
|
print(f'training prevalence = {F.strprev(train.prevalence())}')
|
||||||
|
|
||||||
# let us train one quantifier, for example, PACC using a sklearn's Logistic Regressor as the underlying classifier
|
# let us train one quantifier, for example, PACC using a sklearn's Logistic Regressor as the underlying classifier
|
||||||
# classifier = LogisticRegression()
|
classifier = LogisticRegression()
|
||||||
|
pacc = qp.method.aggregative.PACC(classifier)
|
||||||
# pacc = qp.method.aggregative.PACC(classifier)
|
|
||||||
pacc = qp.method.aggregative.PACC()
|
|
||||||
|
|
||||||
print(f'training {pacc}')
|
print(f'training {pacc}')
|
||||||
pacc.fit(train)
|
pacc.fit(train)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,38 @@
|
||||||
|
"""
|
||||||
|
Imagine we want to generate many samples out of a collection, that we want to distribute for others to run their
|
||||||
|
own experiments in the very same test samples. One naive solution would come down to applying a given protocol to
|
||||||
|
our collection (say the artificial prevalence protocol on the 'academic-success' UCI dataset), store all those samples
|
||||||
|
on disk and make them available online. Distributing many such samples is undesirable.
|
||||||
|
In this example, we generate the indexes that allow anyone to regenerate the samples out of the original collection.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import quapy as qp
|
||||||
|
from quapy.method.aggregative import PACC
|
||||||
|
from quapy.protocol import UPP
|
||||||
|
|
||||||
|
data = qp.datasets.fetch_UCIMulticlassDataset('academic-success')
|
||||||
|
train, test = data.train_test
|
||||||
|
|
||||||
|
# let us train a quantifier to check whether we can actually replicate the results
|
||||||
|
quantifier = PACC()
|
||||||
|
quantifier.fit(train)
|
||||||
|
|
||||||
|
# let us simulate our experimental results
|
||||||
|
protocol = UPP(test, sample_size=100, repeats=100, random_state=0)
|
||||||
|
our_mae = qp.evaluation.evaluate(quantifier, protocol=protocol, error_metric='mae')
|
||||||
|
|
||||||
|
print(f'We have obtained a MAE={our_mae:.3f}')
|
||||||
|
|
||||||
|
# let us distribute the indexes; we specify that we want the indexes, not the samples
|
||||||
|
protocol = UPP(test, sample_size=100, repeats=100, random_state=0, return_type='index')
|
||||||
|
indexes = protocol.samples_parameters()
|
||||||
|
|
||||||
|
# Imagine we distribute the indexes; now we show how to replicate our experiments.
|
||||||
|
from quapy.protocol import ProtocolFromIndex
|
||||||
|
data = qp.datasets.fetch_UCIMulticlassDataset('academic-success')
|
||||||
|
train, test = data.train_test
|
||||||
|
protocol = ProtocolFromIndex(data=test, indexes=indexes)
|
||||||
|
their_mae = qp.evaluation.evaluate(quantifier, protocol=protocol, error_metric='mae')
|
||||||
|
|
||||||
|
print(f'Another lab obtains a MAE={our_mae:.3f}')
|
||||||
|
|
||||||
|
|
@ -298,6 +298,31 @@ def nmd(prevs, prevs_hat):
|
||||||
return (1./(n-1))*np.mean(match_distance(prevs, prevs_hat))
|
return (1./(n-1))*np.mean(match_distance(prevs, prevs_hat))
|
||||||
|
|
||||||
|
|
||||||
|
def bias_binary(prevs, prevs_hat):
|
||||||
|
"""
|
||||||
|
Computes the (positive) bias in a binary problem. The bias is simply the difference between the
|
||||||
|
predicted positive value and the true positive value, so that a positive such value indicates the
|
||||||
|
prediction has positive bias (i.e., it tends to overestimate) the true value, and negative otherwise.
|
||||||
|
:math:`bias(p,\\hat{p})=\\hat{p}_1-p_1`,
|
||||||
|
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
|
||||||
|
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
|
||||||
|
prevalence values
|
||||||
|
:return: binary bias
|
||||||
|
"""
|
||||||
|
assert prevs.shape[-1] == 2 and prevs.shape[-1] == 2, f'bias_binary can only be applied to binary problems'
|
||||||
|
return prevs_hat[...,1]-prevs[...,1]
|
||||||
|
|
||||||
|
|
||||||
|
def mean_bias_binary(prevs, prevs_hat):
|
||||||
|
"""
|
||||||
|
Computes the mean of the (positive) bias in a binary problem.
|
||||||
|
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
|
||||||
|
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
|
||||||
|
:return: mean binary bias
|
||||||
|
"""
|
||||||
|
return np.mean(bias_binary(prevs, prevs_hat))
|
||||||
|
|
||||||
|
|
||||||
def md(prevs, prevs_hat, ERROR_TOL=1E-3):
|
def md(prevs, prevs_hat, ERROR_TOL=1E-3):
|
||||||
"""
|
"""
|
||||||
Computes the Match Distance, under the assumption that the cost in mistaking class i with class i+1 is 1 in
|
Computes the Match Distance, under the assumption that the cost in mistaking class i with class i+1 is 1 in
|
||||||
|
|
@ -338,8 +363,8 @@ def __check_eps(eps=None):
|
||||||
|
|
||||||
|
|
||||||
CLASSIFICATION_ERROR = {f1e, acce}
|
CLASSIFICATION_ERROR = {f1e, acce}
|
||||||
QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld}
|
QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld, mean_bias_binary}
|
||||||
QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld}
|
QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld, bias_binary}
|
||||||
QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, nrae, mkld, mnkld, mrae}
|
QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, nrae, mkld, mnkld, mrae}
|
||||||
CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
|
CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
|
||||||
QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
|
QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,6 @@
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import itertools
|
import itertools
|
||||||
|
|
@ -62,6 +64,36 @@ class IterateProtocol(AbstractProtocol):
|
||||||
return len(self.samples)
|
return len(self.samples)
|
||||||
|
|
||||||
|
|
||||||
|
class ProtocolFromIndex(AbstractProtocol):
|
||||||
|
"""
|
||||||
|
A protocol from a list of indexes
|
||||||
|
|
||||||
|
:param data: a :class:`quapy.data.base.LabelledCollection`
|
||||||
|
:param indexes: a list of indexes
|
||||||
|
"""
|
||||||
|
def __init__(self, data: LabelledCollection, indexes: Iterable):
|
||||||
|
self.data = data
|
||||||
|
self.indexes = indexes
|
||||||
|
|
||||||
|
def __call__(self):
|
||||||
|
"""
|
||||||
|
Yields one sample at a time extracted using the indexes
|
||||||
|
|
||||||
|
:return: yields a tuple `(sample, prev) at a time, where `sample` is a set of instances
|
||||||
|
and in which `prev` is an `nd.array` with the class prevalence values
|
||||||
|
"""
|
||||||
|
for index in self.indexes:
|
||||||
|
yield self.data.sampling_from_index(index).Xp
|
||||||
|
|
||||||
|
def total(self):
|
||||||
|
"""
|
||||||
|
Returns the number of samples in this protocol
|
||||||
|
|
||||||
|
:return: int
|
||||||
|
"""
|
||||||
|
return len(self.indexes)
|
||||||
|
|
||||||
|
|
||||||
class AbstractStochasticSeededProtocol(AbstractProtocol):
|
class AbstractStochasticSeededProtocol(AbstractProtocol):
|
||||||
"""
|
"""
|
||||||
An `AbstractStochasticSeededProtocol` is a protocol that generates, via any random procedure (e.g.,
|
An `AbstractStochasticSeededProtocol` is a protocol that generates, via any random procedure (e.g.,
|
||||||
|
|
@ -124,9 +156,9 @@ class AbstractStochasticSeededProtocol(AbstractProtocol):
|
||||||
if self.random_state is not None:
|
if self.random_state is not None:
|
||||||
stack.enter_context(qp.util.temp_seed(self.random_state))
|
stack.enter_context(qp.util.temp_seed(self.random_state))
|
||||||
for params in self.samples_parameters():
|
for params in self.samples_parameters():
|
||||||
yield self.collator(self.sample(params))
|
yield self.collator(self.sample(params), params)
|
||||||
|
|
||||||
def collator(self, sample, *args):
|
def collator(self, sample, params):
|
||||||
"""
|
"""
|
||||||
The collator prepares the sample to accommodate the desired output format before returning the output.
|
The collator prepares the sample to accommodate the desired output format before returning the output.
|
||||||
This collator simply returns the sample as it is. Classes inheriting from this abstract class can
|
This collator simply returns the sample as it is. Classes inheriting from this abstract class can
|
||||||
|
|
@ -191,9 +223,11 @@ class OnLabelledCollectionProtocol:
|
||||||
assert return_type in cls.RETURN_TYPES, \
|
assert return_type in cls.RETURN_TYPES, \
|
||||||
f'unknown return type passed as argument; valid ones are {cls.RETURN_TYPES}'
|
f'unknown return type passed as argument; valid ones are {cls.RETURN_TYPES}'
|
||||||
if return_type=='sample_prev':
|
if return_type=='sample_prev':
|
||||||
return lambda lc:lc.Xp
|
return lambda lc,params:lc.Xp
|
||||||
elif return_type=='labelled_collection':
|
elif return_type=='labelled_collection':
|
||||||
return lambda lc:lc
|
return lambda lc,params:lc
|
||||||
|
elif return_type=='index':
|
||||||
|
return lambda lc,params:params
|
||||||
|
|
||||||
|
|
||||||
class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol):
|
class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue