adding example with pre-trained classifier

This commit is contained in:
Alejandro Moreo Fernandez 2025-10-02 16:19:10 +02:00
parent 24ab704661
commit b7931cf01a
5 changed files with 148 additions and 62 deletions

View File

@ -0,0 +1,75 @@
"""
Aggregative quantifiers use an underlying classifier. Often, one has one pre-trained classifier available, and
needs to use this classifier at the basis of a quantification system. In such cases, the classifier should not
be retrained, but only used to issue classifier predictions for the quantifier.
In this example, we show how to instantiate a quantifier with a pre-trained classifier.
"""
from typing import List, Dict
import quapy as qp
from quapy.method.aggregative import PACC
from sklearn.base import BaseEstimator, ClassifierMixin
from transformers import pipeline
import numpy as np
import quapy.functional as F
# A scikit-learn's style wrapper for a huggingface-based pre-trained transformer
class HFTextClassifier(BaseEstimator, ClassifierMixin):
def __init__(self, model_name='distilbert-base-uncased-finetuned-sst-2-english'):
self.pipe = pipeline("sentiment-analysis", model=model_name)
self.classes_ = np.asarray([0,1])
def fit(self, X, y=None):
return self
def _binary_decisions(self, transformer_output: List[Dict]):
return np.array([(1 if p['label']=='POSITIVE' else 0) for p in transformer_output], dtype=int)
def predict(self, X):
X = list(map(str, X))
preds = self.pipe(X, truncation=True)
return self._binary_decisions(preds)
def predict_proba(self, X):
X = list(map(str, X))
n_examples = len(X)
preds = self.pipe(X, truncation=True)
decisions = self._binary_decisions(preds)
scores = np.array([p['score'] for p in preds], dtype=float)
probas = np.zeros(shape=(len(X), 2), dtype=float)
probas[np.arange(n_examples),decisions] = scores
probas[np.arange(n_examples),~decisions] = 1-scores
return probas
# load a sentiment dataset
dataset = qp.datasets.fetch_reviews('imdb', tfidf=False) # raw text
train, test = dataset.training, dataset.test
# instantiate a pre-trained classifier
clf = HFTextClassifier()
# Let us fit a quantifier based on our pre-trained classifier.
# Note that, since the classifier is already fit, we will use the entire training set for
# learning the aggregation function of the quantifier.
# To do so, we only need to indicate "fit_classifier"=False, as follows:
quantifier = PACC(clf, fit_classifier=False) # Probabilistic Classify & Count using a pre-trained model
print('training PACC...')
quantifier.fit(*train.Xy)
# let us simulate some shifted test data...
new_prevalence = [0.75, 0.25]
shifted_test = test.sampling(500, *new_prevalence, random_state=0)
# and do some evaluation
print('predicting with PACC...')
estim_prevalence = quantifier.predict(shifted_test.X)
print('Result:\n'+('='*20))
print(f'training prevalence: {F.strprev(train.prevalence())}')
print(f'(shifted) test prevalence: {F.strprev(shifted_test.prevalence())}')
print(f'estimated prevalence: {F.strprev(estim_prevalence)}')
absolute_error = qp.error.ae(new_prevalence, estim_prevalence)
print(f'absolute error={absolute_error:.4f}')

View File

@ -37,7 +37,7 @@ quantifier = EMQ(classifier=LogisticRegression(), val_split=5)
param_grid = {
'classifier__C': np.logspace(-3, 3, 7), # classifier-dependent: inverse of regularization strength
'classifier__class_weight': ['balanced', None], # classifier-dependent: weights of each class
'calib': ['bcts', None] # quantifier-dependent: recalibration method (new in v0.1.7)
'calib': ['bcts', None] # quantifier-dependent: recalibration method (new in v0.1.7)
}
model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, error='mrae', refit=False, verbose=True)
quantifier = model_selection.fit(Xtr, ytr)

View File

@ -45,89 +45,95 @@ def acce(y_true, y_pred):
return 1. - (y_true == y_pred).mean()
def mae(prevs, prevs_hat):
def mae(prevs_true, prevs_hat):
"""Computes the mean absolute error (see :meth:`quapy.error.ae`) across the sample pairs.
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_true: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
prevalence values
:return: mean absolute error
"""
return ae(prevs, prevs_hat).mean()
return ae(prevs_true, prevs_hat).mean()
def ae(prevs, prevs_hat):
def ae(prevs_true, prevs_hat):
"""Computes the absolute error between the two prevalence vectors.
Absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as
:math:`AE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}|\\hat{p}(y)-p(y)|`,
where :math:`\\mathcal{Y}` are the classes of interest.
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_true: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:return: absolute error
"""
assert prevs.shape == prevs_hat.shape, f'wrong shape {prevs.shape} vs. {prevs_hat.shape}'
return abs(prevs_hat - prevs).mean(axis=-1)
prevs_true = np.asarray(prevs_true)
prevs_hat = np.asarray(prevs_hat)
assert prevs_true.shape == prevs_hat.shape, f'wrong shape {prevs_true.shape} vs. {prevs_hat.shape}'
return abs(prevs_hat - prevs_true).mean(axis=-1)
def nae(prevs, prevs_hat):
def nae(prevs_true, prevs_hat):
"""Computes the normalized absolute error between the two prevalence vectors.
Normalized absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as
:math:`NAE(p,\\hat{p})=\\frac{AE(p,\\hat{p})}{z_{AE}}`,
where :math:`z_{AE}=\\frac{2(1-\\min_{y\\in \\mathcal{Y}} p(y))}{|\\mathcal{Y}|}`, and :math:`\\mathcal{Y}`
are the classes of interest.
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_true: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:return: normalized absolute error
"""
assert prevs.shape == prevs_hat.shape, f'wrong shape {prevs.shape} vs. {prevs_hat.shape}'
return abs(prevs_hat - prevs).sum(axis=-1)/(2*(1-prevs.min(axis=-1)))
prevs_true = np.asarray(prevs_true)
prevs_hat = np.asarray(prevs_hat)
assert prevs_true.shape == prevs_hat.shape, f'wrong shape {prevs_true.shape} vs. {prevs_hat.shape}'
return abs(prevs_hat - prevs_true).sum(axis=-1)/(2 * (1 - prevs_true.min(axis=-1)))
def mnae(prevs, prevs_hat):
def mnae(prevs_true, prevs_hat):
"""Computes the mean normalized absolute error (see :meth:`quapy.error.nae`) across the sample pairs.
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_true: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
prevalence values
:return: mean normalized absolute error
"""
return nae(prevs, prevs_hat).mean()
return nae(prevs_true, prevs_hat).mean()
def mse(prevs, prevs_hat):
def mse(prevs_true, prevs_hat):
"""Computes the mean squared error (see :meth:`quapy.error.se`) across the sample pairs.
:param prevs: array-like of shape `(n_samples, n_classes,)` with the
:param prevs_true: array-like of shape `(n_samples, n_classes,)` with the
true prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the
predicted prevalence values
:return: mean squared error
"""
return se(prevs, prevs_hat).mean()
return se(prevs_true, prevs_hat).mean()
def se(prevs, prevs_hat):
def se(prevs_true, prevs_hat):
"""Computes the squared error between the two prevalence vectors.
Squared error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as
:math:`SE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}(\\hat{p}(y)-p(y))^2`,
where
:math:`\\mathcal{Y}` are the classes of interest.
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_true: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:return: absolute error
"""
return ((prevs_hat - prevs) ** 2).mean(axis=-1)
prevs_true = np.asarray(prevs_true)
prevs_hat = np.asarray(prevs_hat)
return ((prevs_hat - prevs_true) ** 2).mean(axis=-1)
def mkld(prevs, prevs_hat, eps=None):
def mkld(prevs_true, prevs_hat, eps=None):
"""Computes the mean Kullback-Leibler divergence (see :meth:`quapy.error.kld`) across the
sample pairs. The distributions are smoothed using the `eps` factor
(see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true
:param prevs_true: array-like of shape `(n_samples, n_classes,)` with the true
prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
prevalence values
@ -137,10 +143,10 @@ def mkld(prevs, prevs_hat, eps=None):
(which has thus to be set beforehand).
:return: mean Kullback-Leibler distribution
"""
return kld(prevs, prevs_hat, eps).mean()
return kld(prevs_true, prevs_hat, eps).mean()
def kld(prevs, prevs_hat, eps=None):
def kld(prevs_true, prevs_hat, eps=None):
"""Computes the Kullback-Leibler divergence between the two prevalence distributions.
Kullback-Leibler divergence between two prevalence distributions :math:`p` and :math:`\\hat{p}`
is computed as
@ -149,7 +155,7 @@ def kld(prevs, prevs_hat, eps=None):
where :math:`\\mathcal{Y}` are the classes of interest.
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_true: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:param eps: smoothing factor. KLD is not defined in cases in which the distributions contain
zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.
@ -158,17 +164,17 @@ def kld(prevs, prevs_hat, eps=None):
:return: Kullback-Leibler divergence between the two distributions
"""
eps = __check_eps(eps)
smooth_prevs = smooth(prevs, eps)
smooth_prevs = smooth(prevs_true, eps)
smooth_prevs_hat = smooth(prevs_hat, eps)
return (smooth_prevs*np.log(smooth_prevs/smooth_prevs_hat)).sum(axis=-1)
def mnkld(prevs, prevs_hat, eps=None):
def mnkld(prevs_true, prevs_hat, eps=None):
"""Computes the mean Normalized Kullback-Leibler divergence (see :meth:`quapy.error.nkld`)
across the sample pairs. The distributions are smoothed using the `eps` factor
(see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_true: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
prevalence values
:param eps: smoothing factor. NKLD is not defined in cases in which the distributions contain
@ -177,10 +183,10 @@ def mnkld(prevs, prevs_hat, eps=None):
(which has thus to be set beforehand).
:return: mean Normalized Kullback-Leibler distribution
"""
return nkld(prevs, prevs_hat, eps).mean()
return nkld(prevs_true, prevs_hat, eps).mean()
def nkld(prevs, prevs_hat, eps=None):
def nkld(prevs_true, prevs_hat, eps=None):
"""Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.
Normalized Kullback-Leibler divergence between two prevalence distributions :math:`p` and
:math:`\\hat{p}` is computed as
@ -189,7 +195,7 @@ def nkld(prevs, prevs_hat, eps=None):
:math:`\\mathcal{Y}` are the classes of interest.
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_true: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:param eps: smoothing factor. NKLD is not defined in cases in which the distributions
contain zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample
@ -197,16 +203,16 @@ def nkld(prevs, prevs_hat, eps=None):
`SAMPLE_SIZE` (which has thus to be set beforehand).
:return: Normalized Kullback-Leibler divergence between the two distributions
"""
ekld = np.exp(kld(prevs, prevs_hat, eps))
ekld = np.exp(kld(prevs_true, prevs_hat, eps))
return 2. * ekld / (1 + ekld) - 1.
def mrae(prevs, prevs_hat, eps=None):
def mrae(prevs_true, prevs_hat, eps=None):
"""Computes the mean relative absolute error (see :meth:`quapy.error.rae`) across
the sample pairs. The distributions are smoothed using the `eps` factor (see
:meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true
:param prevs_true: array-like of shape `(n_samples, n_classes,)` with the true
prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
prevalence values
@ -216,10 +222,10 @@ def mrae(prevs, prevs_hat, eps=None):
the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
:return: mean relative absolute error
"""
return rae(prevs, prevs_hat, eps).mean()
return rae(prevs_true, prevs_hat, eps).mean()
def rae(prevs, prevs_hat, eps=None):
def rae(prevs_true, prevs_hat, eps=None):
"""Computes the absolute relative error between the two prevalence vectors.
Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}`
is computed as
@ -228,7 +234,7 @@ def rae(prevs, prevs_hat, eps=None):
where :math:`\\mathcal{Y}` are the classes of interest.
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_true: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:param eps: smoothing factor. `rae` is not defined in cases in which the true distribution
contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the
@ -237,12 +243,12 @@ def rae(prevs, prevs_hat, eps=None):
:return: relative absolute error
"""
eps = __check_eps(eps)
prevs = smooth(prevs, eps)
prevs_true = smooth(prevs_true, eps)
prevs_hat = smooth(prevs_hat, eps)
return (abs(prevs - prevs_hat) / prevs).mean(axis=-1)
return (abs(prevs_true - prevs_hat) / prevs_true).mean(axis=-1)
def nrae(prevs, prevs_hat, eps=None):
def nrae(prevs_true, prevs_hat, eps=None):
"""Computes the normalized absolute relative error between the two prevalence vectors.
Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}`
is computed as
@ -252,7 +258,7 @@ def nrae(prevs, prevs_hat, eps=None):
and :math:`\\mathcal{Y}` are the classes of interest.
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_true: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:param eps: smoothing factor. `nrae` is not defined in cases in which the true distribution
contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the
@ -261,18 +267,18 @@ def nrae(prevs, prevs_hat, eps=None):
:return: normalized relative absolute error
"""
eps = __check_eps(eps)
prevs = smooth(prevs, eps)
prevs_true = smooth(prevs_true, eps)
prevs_hat = smooth(prevs_hat, eps)
min_p = prevs.min(axis=-1)
return (abs(prevs - prevs_hat) / prevs).sum(axis=-1)/(prevs.shape[-1]-1+(1-min_p)/min_p)
min_p = prevs_true.min(axis=-1)
return (abs(prevs_true - prevs_hat) / prevs_true).sum(axis=-1)/(prevs_true.shape[-1] - 1 + (1 - min_p) / min_p)
def mnrae(prevs, prevs_hat, eps=None):
def mnrae(prevs_true, prevs_hat, eps=None):
"""Computes the mean normalized relative absolute error (see :meth:`quapy.error.nrae`) across
the sample pairs. The distributions are smoothed using the `eps` factor (see
:meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true
:param prevs_true: array-like of shape `(n_samples, n_classes,)` with the true
prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
prevalence values
@ -282,57 +288,61 @@ def mnrae(prevs, prevs_hat, eps=None):
the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
:return: mean normalized relative absolute error
"""
return nrae(prevs, prevs_hat, eps).mean()
return nrae(prevs_true, prevs_hat, eps).mean()
def nmd(prevs, prevs_hat):
def nmd(prevs_true, prevs_hat):
"""
Computes the Normalized Match Distance; which is the Normalized Distance multiplied by the factor
`1/(n-1)` to guarantee the measure ranges between 0 (best prediction) and 1 (worst prediction).
:param prevs: array-like of shape `(n_classes,)` or `(n_instances, n_classes)` with the true prevalence values
:param prevs_true: array-like of shape `(n_classes,)` or `(n_instances, n_classes)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` or `(n_instances, n_classes)` with the predicted prevalence values
:return: float in [0,1]
"""
n = prevs.shape[-1]
return (1./(n-1))*np.mean(match_distance(prevs, prevs_hat))
prevs_true = np.asarray(prevs_true)
prevs_hat = np.asarray(prevs_hat)
n = prevs_true.shape[-1]
return (1./(n-1))*np.mean(match_distance(prevs_true, prevs_hat))
def bias_binary(prevs, prevs_hat):
def bias_binary(prevs_true, prevs_hat):
"""
Computes the (positive) bias in a binary problem. The bias is simply the difference between the
predicted positive value and the true positive value, so that a positive such value indicates the
prediction has positive bias (i.e., it tends to overestimate) the true value, and negative otherwise.
:math:`bias(p,\\hat{p})=\\hat{p}_1-p_1`,
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_true: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
prevalence values
:return: binary bias
"""
assert prevs.shape[-1] == 2 and prevs.shape[-1] == 2, f'bias_binary can only be applied to binary problems'
return prevs_hat[...,1]-prevs[...,1]
prevs_true = np.asarray(prevs_true)
prevs_hat = np.asarray(prevs_hat)
assert prevs_true.shape[-1] == 2 and prevs_true.shape[-1] == 2, f'bias_binary can only be applied to binary problems'
return prevs_hat[...,1]-prevs_true[...,1]
def mean_bias_binary(prevs, prevs_hat):
def mean_bias_binary(prevs_true, prevs_hat):
"""
Computes the mean of the (positive) bias in a binary problem.
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_true: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:return: mean binary bias
"""
return np.mean(bias_binary(prevs, prevs_hat))
return np.mean(bias_binary(prevs_true, prevs_hat))
def md(prevs, prevs_hat, ERROR_TOL=1E-3):
def md(prevs_true, prevs_hat, ERROR_TOL=1E-3):
"""
Computes the Match Distance, under the assumption that the cost in mistaking class i with class i+1 is 1 in
all cases.
:param prevs: array-like of shape `(n_classes,)` or `(n_instances, n_classes)` with the true prevalence values
:param prevs_true: array-like of shape `(n_classes,)` or `(n_instances, n_classes)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` or `(n_instances, n_classes)` with the predicted prevalence values
:return: float
"""
P = np.cumsum(prevs, axis=-1)
P = np.cumsum(prevs_true, axis=-1)
P_hat = np.cumsum(prevs_hat, axis=-1)
assert np.all(np.isclose(P_hat[..., -1], 1.0, rtol=ERROR_TOL)), \
'arg error in match_distance: the array does not represent a valid distribution'
@ -349,6 +359,7 @@ def smooth(prevs, eps):
:param eps: smoothing factor
:return: array-like of shape `(n_classes,)` with the smoothed distribution
"""
prevs = np.asarray(prevs)
n_classes = prevs.shape[-1]
return (prevs + eps) / (eps * n_classes + 1)