forked from moreo/QuaPy
Added NAE, NRAE
This commit is contained in:
parent
e71f82105e
commit
69e78edbee
|
@ -76,7 +76,7 @@ See the [Wiki](https://github.com/HLT-ISTI/QuaPy/wiki) for detailed examples.
|
||||||
* Implementation of many popular quantification methods (Classify-&-Count and its variants, Expectation Maximization,
|
* Implementation of many popular quantification methods (Classify-&-Count and its variants, Expectation Maximization,
|
||||||
quantification methods based on structured output learning, HDy, QuaNet, quantification ensembles, among others).
|
quantification methods based on structured output learning, HDy, QuaNet, quantification ensembles, among others).
|
||||||
* Versatile functionality for performing evaluation based on sampling generation protocols (e.g., APP, NPP, etc.).
|
* Versatile functionality for performing evaluation based on sampling generation protocols (e.g., APP, NPP, etc.).
|
||||||
* Implementation of most commonly used evaluation metrics (e.g., AE, RAE, SE, KLD, NKLD, etc.).
|
* Implementation of most commonly used evaluation metrics (e.g., AE, RAE, NAE, NRAE, SE, KLD, NKLD, etc.).
|
||||||
* Datasets frequently used in quantification (textual and numeric), including:
|
* Datasets frequently used in quantification (textual and numeric), including:
|
||||||
* 32 UCI Machine Learning datasets.
|
* 32 UCI Machine Learning datasets.
|
||||||
* 11 Twitter quantification-by-sentiment datasets.
|
* 11 Twitter quantification-by-sentiment datasets.
|
||||||
|
|
1
TODO.txt
1
TODO.txt
|
@ -33,7 +33,6 @@ Refactor protocols. APP and NPP related functionalities are duplicated in functi
|
||||||
|
|
||||||
New features:
|
New features:
|
||||||
==========================================
|
==========================================
|
||||||
Add NAE, NRAE
|
|
||||||
Add "measures for evaluating ordinal"?
|
Add "measures for evaluating ordinal"?
|
||||||
Add datasets for topic.
|
Add datasets for topic.
|
||||||
Do we want to cover cross-lingual quantification natively in QuaPy, or does it make more sense as an application on top?
|
Do we want to cover cross-lingual quantification natively in QuaPy, or does it make more sense as an application on top?
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
Change Log 0.1.7
|
Change Log 0.1.8
|
||||||
----------------
|
----------------
|
||||||
|
|
||||||
- New UCI multiclass datasets added (thanks to Pablo González). The 5 UCI multiclass datasets are those corresponding
|
- New UCI multiclass datasets added (thanks to Pablo González). The 5 UCI multiclass datasets are those corresponding
|
||||||
|
@ -7,6 +7,7 @@ Change Log 0.1.7
|
||||||
- >2 classes
|
- >2 classes
|
||||||
- classification datasets
|
- classification datasets
|
||||||
- Python API available
|
- Python API available
|
||||||
|
- Added NAE, NRAE
|
||||||
|
|
||||||
Change Log 0.1.7
|
Change Log 0.1.7
|
||||||
----------------
|
----------------
|
||||||
|
|
|
@ -70,6 +70,32 @@ def ae(prevs, prevs_hat):
|
||||||
return abs(prevs_hat - prevs).mean(axis=-1)
|
return abs(prevs_hat - prevs).mean(axis=-1)
|
||||||
|
|
||||||
|
|
||||||
|
def nae(prevs, prevs_hat):
|
||||||
|
"""Computes the normalized absolute error between the two prevalence vectors.
|
||||||
|
Normalized absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as
|
||||||
|
:math:`NAE(p,\\hat{p})=\\frac{AE(p,\\hat{p})}{z_{AE}}`,
|
||||||
|
where :math:`z_{AE}=\\frac{2(1-\\min_{y\\in \\mathcal{Y}} p(y))}{|\\mathcal{Y}|}`, and :math:`\\mathcal{Y}`
|
||||||
|
are the classes of interest.
|
||||||
|
|
||||||
|
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
|
||||||
|
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
|
||||||
|
:return: normalized absolute error
|
||||||
|
"""
|
||||||
|
assert prevs.shape == prevs_hat.shape, f'wrong shape {prevs.shape} vs. {prevs_hat.shape}'
|
||||||
|
return abs(prevs_hat - prevs).sum(axis=-1)/(2*(1-prevs.min(axis=-1)))
|
||||||
|
|
||||||
|
|
||||||
|
def mnae(prevs, prevs_hat):
|
||||||
|
"""Computes the mean normalized absolute error (see :meth:`quapy.error.nae`) across the sample pairs.
|
||||||
|
|
||||||
|
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
|
||||||
|
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
|
||||||
|
prevalence values
|
||||||
|
:return: mean normalized absolute error
|
||||||
|
"""
|
||||||
|
return nae(prevs, prevs_hat).mean()
|
||||||
|
|
||||||
|
|
||||||
def mse(prevs, prevs_hat):
|
def mse(prevs, prevs_hat):
|
||||||
"""Computes the mean squared error (see :meth:`quapy.error.se`) across the sample pairs.
|
"""Computes the mean squared error (see :meth:`quapy.error.se`) across the sample pairs.
|
||||||
|
|
||||||
|
@ -216,6 +242,49 @@ def rae(prevs, prevs_hat, eps=None):
|
||||||
return (abs(prevs - prevs_hat) / prevs).mean(axis=-1)
|
return (abs(prevs - prevs_hat) / prevs).mean(axis=-1)
|
||||||
|
|
||||||
|
|
||||||
|
def nrae(prevs, prevs_hat, eps=None):
|
||||||
|
"""Computes the normalized absolute relative error between the two prevalence vectors.
|
||||||
|
Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}`
|
||||||
|
is computed as
|
||||||
|
:math:`NRAE(p,\\hat{p})= \\frac{RAE(p,\\hat{p})}{z_{RAE}}`,
|
||||||
|
where
|
||||||
|
:math:`z_{RAE} = \\frac{|\\mathcal{Y}|-1+\\frac{1-\\min_{y\\in \\mathcal{Y}} p(y)}{\\min_{y\\in \\mathcal{Y}} p(y)}}{|\\mathcal{Y}|}`
|
||||||
|
and :math:`\\mathcal{Y}` are the classes of interest.
|
||||||
|
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
|
||||||
|
|
||||||
|
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
|
||||||
|
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
|
||||||
|
:param eps: smoothing factor. `nrae` is not defined in cases in which the true distribution
|
||||||
|
contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the
|
||||||
|
sample size. If `eps=None`, the sample size will be taken from the environment variable
|
||||||
|
`SAMPLE_SIZE` (which has thus to be set beforehand).
|
||||||
|
:return: normalized relative absolute error
|
||||||
|
"""
|
||||||
|
eps = __check_eps(eps)
|
||||||
|
prevs = smooth(prevs, eps)
|
||||||
|
prevs_hat = smooth(prevs_hat, eps)
|
||||||
|
min_p = prevs.min(axis=-1)
|
||||||
|
return (abs(prevs - prevs_hat) / prevs).sum(axis=-1)/(prevs.shape[-1]-1+(1-min_p)/min_p)
|
||||||
|
|
||||||
|
|
||||||
|
def mnrae(prevs, prevs_hat, eps=None):
|
||||||
|
"""Computes the mean normalized relative absolute error (see :meth:`quapy.error.nrae`) across
|
||||||
|
the sample pairs. The distributions are smoothed using the `eps` factor (see
|
||||||
|
:meth:`quapy.error.smooth`).
|
||||||
|
|
||||||
|
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true
|
||||||
|
prevalence values
|
||||||
|
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
|
||||||
|
prevalence values
|
||||||
|
:param eps: smoothing factor. `mnrae` is not defined in cases in which the true
|
||||||
|
distribution contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`,
|
||||||
|
with :math:`T` the sample size. If `eps=None`, the sample size will be taken from
|
||||||
|
the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
|
||||||
|
:return: mean normalized relative absolute error
|
||||||
|
"""
|
||||||
|
return nrae(prevs, prevs_hat, eps).mean()
|
||||||
|
|
||||||
|
|
||||||
def smooth(prevs, eps):
|
def smooth(prevs, eps):
|
||||||
""" Smooths a prevalence distribution with :math:`\\epsilon` (`eps`) as:
|
""" Smooths a prevalence distribution with :math:`\\epsilon` (`eps`) as:
|
||||||
:math:`\\underline{p}(y)=\\frac{\\epsilon+p(y)}{\\epsilon|\\mathcal{Y}|+
|
:math:`\\underline{p}(y)=\\frac{\\epsilon+p(y)}{\\epsilon|\\mathcal{Y}|+
|
||||||
|
@ -239,9 +308,9 @@ def __check_eps(eps=None):
|
||||||
|
|
||||||
|
|
||||||
CLASSIFICATION_ERROR = {f1e, acce}
|
CLASSIFICATION_ERROR = {f1e, acce}
|
||||||
QUANTIFICATION_ERROR = {mae, mrae, mse, mkld, mnkld}
|
QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld}
|
||||||
QUANTIFICATION_ERROR_SINGLE = {ae, rae, se, kld, nkld}
|
QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld}
|
||||||
QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, mkld, mnkld, mrae}
|
QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, nrae, mkld, mnkld, mrae}
|
||||||
CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
|
CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
|
||||||
QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
|
QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
|
||||||
QUANTIFICATION_ERROR_SINGLE_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR_SINGLE}
|
QUANTIFICATION_ERROR_SINGLE_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR_SINGLE}
|
||||||
|
@ -255,3 +324,7 @@ mean_absolute_error = mae
|
||||||
absolute_error = ae
|
absolute_error = ae
|
||||||
mean_relative_absolute_error = mrae
|
mean_relative_absolute_error = mrae
|
||||||
relative_absolute_error = rae
|
relative_absolute_error = rae
|
||||||
|
normalized_absolute_error = nae
|
||||||
|
normalized_relative_absolute_error = nrae
|
||||||
|
mean_normalized_absolute_error = mnae
|
||||||
|
mean_normalized_relative_absolute_error = mnrae
|
||||||
|
|
|
@ -6,7 +6,7 @@ import quapy as qp
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
from time import time
|
from time import time
|
||||||
|
|
||||||
from error import QUANTIFICATION_ERROR_SINGLE, QUANTIFICATION_ERROR, QUANTIFICATION_ERROR_NAMES, \
|
from quapy.error import QUANTIFICATION_ERROR_SINGLE, QUANTIFICATION_ERROR, QUANTIFICATION_ERROR_NAMES, \
|
||||||
QUANTIFICATION_ERROR_SINGLE_NAMES
|
QUANTIFICATION_ERROR_SINGLE_NAMES
|
||||||
from quapy.method.aggregative import EMQ, PCC
|
from quapy.method.aggregative import EMQ, PCC
|
||||||
from quapy.method.base import BaseQuantifier
|
from quapy.method.base import BaseQuantifier
|
||||||
|
|
Loading…
Reference in New Issue