fixed evaluation_report and dataframe visualization

This commit is contained in:
Lorenzo Volpi 2023-06-02 19:36:54 +02:00
parent 5959a0d323
commit 5234ce1387
5 changed files with 2451 additions and 164 deletions

2237
out.html Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,9 @@
from typing import List, Optional
import numpy as np
import quapy as qp
import scipy.sparse as sp
from quapy.data import LabelledCollection
from typing import List, Optional
class ExtendedCollection(LabelledCollection):
@ -12,3 +14,17 @@ class ExtendedCollection(LabelledCollection):
classes: Optional[List] = None,
):
super().__init__(instances, labels, classes=classes)
def get_dataset(name):
datasets = {
"spambase": lambda: qp.datasets.fetch_UCIDataset(
"spambase", verbose=False
).train_test,
"hp": lambda: qp.datasets.fetch_reviews("hp", tfidf=True).train_test,
"imdb": lambda: qp.datasets.fetch_reviews("imdb", tfidf=True).train_test,
}
try:
return datasets[name]()
except KeyError:
raise KeyError(f"{name} is not available as a dataset")

View File

@ -1,16 +1,20 @@
import itertools
from quapy.protocol import (
OnLabelledCollectionProtocol,
AbstractStochasticSeededProtocol,
)
import quapy as qp
from typing import Iterable, Callable, Union
from .estimator import AccuracyEstimator
import pandas as pd
import numpy as np
import quacc.error as error
def estimate(estimator: AccuracyEstimator, protocol: AbstractStochasticSeededProtocol):
def estimate(
estimator: AccuracyEstimator,
protocol: AbstractStochasticSeededProtocol,
):
# ensure that the protocol returns a LabelledCollection for each iteration
protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
@ -18,6 +22,9 @@ def estimate(estimator: AccuracyEstimator, protocol: AbstractStochasticSeededPro
for sample in protocol():
e_sample = estimator.extend(sample)
estim_prev = estimator.estimate(e_sample.X, ext=True)
# base_prevs.append(_prettyfloat(accuracy, sample.prevalence()))
# true_prevs.append(_prettyfloat(accuracy, e_sample.prevalence()))
# estim_prevs.append(_prettyfloat(accuracy, estim_prev))
base_prevs.append(sample.prevalence())
true_prevs.append(e_sample.prevalence())
estim_prevs.append(estim_prev)
@ -25,6 +32,38 @@ def estimate(estimator: AccuracyEstimator, protocol: AbstractStochasticSeededPro
return base_prevs, true_prevs, estim_prevs
_bprev_col_0 = ["base"]
_bprev_col_1 = ["0", "1"]
_prev_col_0 = ["true", "estim"]
_prev_col_1 = ["T0", "F1", "F0", "T1"]
_err_col_0 = ["errors"]
def _report_columns(err_names):
bprev_cols = list(itertools.product(_bprev_col_0, _bprev_col_1))
prev_cols = list(itertools.product(_prev_col_0, _prev_col_1))
err_1 = err_names
err_cols = list(itertools.product(_err_col_0, err_1))
cols = bprev_cols + prev_cols + err_cols
return pd.MultiIndex.from_tuples(cols)
def _dict_prev(base_prev, true_prev, estim_prev):
prev_cols = list(itertools.product(_bprev_col_0, _bprev_col_1)) + list(
itertools.product(_prev_col_0, _prev_col_1)
)
return {
k: v
for (k, v) in zip(
prev_cols, np.concatenate((base_prev, true_prev, estim_prev), axis=0)
)
}
def evaluation_report(
estimator: AccuracyEstimator,
protocol: AbstractStochasticSeededProtocol,
@ -40,26 +79,25 @@ def evaluation_report(
]
assert all(hasattr(e, "__call__") for e in error_funcs), "invalid error function"
error_names = [e.__name__ for e in error_funcs]
error_cols = error_names.copy()
if "f1e" in error_cols:
error_cols.remove("f1e")
error_cols.extend(["f1e_true", "f1e_estim"])
# df_cols = ["base_prev", "true_prev", "estim_prev"] + error_names
df_cols = _report_columns(error_cols)
df_cols = ["base_prev", "true_prev", "estim_prev"] + error_names
if "f1e" in df_cols:
df_cols.remove("f1e")
df_cols.extend(["f1e_true", "f1e_estim"])
lst = []
for base_prev, true_prev, estim_prev in zip(base_prevs, true_prevs, estim_prevs):
series = {
"base_prev": base_prev,
"true_prev": true_prev,
"estim_prev": estim_prev,
}
series = _dict_prev(base_prev, true_prev, estim_prev)
for error_name, error_metric in zip(error_names, error_funcs):
if error_name == "f1e":
series["f1e_true"] = error_metric(true_prev)
series["f1e_estim"] = error_metric(estim_prev)
series[("errors", "f1e_true")] = error_metric(true_prev)
series[("errors", "f1e_estim")] = error_metric(estim_prev)
continue
score = error_metric(true_prev, estim_prev)
series[error_name] = score
series[("errors", error_name)] = score
lst.append(series)

View File

@ -1,158 +1,17 @@
import numpy as np
import pandas as pd
import quapy as qp
import scipy.sparse as sp
from quapy.data import LabelledCollection
from quapy.method.aggregative import SLD
from quapy.protocol import APP, AbstractStochasticSeededProtocol
from quapy.protocol import APP
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_predict
import quacc.evaluation as eval
from quacc.estimator import AccuracyEstimator
qp.environ['SAMPLE_SIZE'] = 100
from .data import get_dataset
qp.environ["SAMPLE_SIZE"] = 100
# Extended classes
#
# 0 ~ True 0
# 1 ~ False 1
# 2 ~ False 0
# 3 ~ True 1
# _____________________
# | | |
# | True 0 | False 1 |
# |__________|__________|
# | | |
# | False 0 | True 1 |
# |__________|__________|
#
def get_ex_class(classes, true_class, pred_class):
return true_class * classes + pred_class
def extend_collection(coll, pred_prob):
n_classes = coll.n_classes
# n_X = [ X | predicted probs. ]
if isinstance(coll.X, sp.csr_matrix):
pred_prob_csr = sp.csr_matrix(pred_prob)
n_x = sp.hstack([coll.X, pred_prob_csr])
elif isinstance(coll.X, np.ndarray):
n_x = np.concatenate((coll.X, pred_prob), axis=1)
else:
raise ValueError("Unsupported matrix format")
# n_y = (exptected y, predicted y)
n_y = []
for i, true_class in enumerate(coll.y):
pred_class = pred_prob[i].argmax(axis=0)
n_y.append(get_ex_class(n_classes, true_class, pred_class))
return LabelledCollection(n_x, np.asarray(n_y), [*range(0, n_classes * n_classes)])
def qf1e_binary(prev):
recall = prev[0] / (prev[0] + prev[1])
precision = prev[0] / (prev[0] + prev[2])
return 1 - 2 * (precision * recall) / (precision + recall)
def compute_errors(true_prev, estim_prev, n_instances):
errors = {}
_eps = 1 / (2 * n_instances)
errors = {
"mae": qp.error.mae(true_prev, estim_prev),
"rae": qp.error.rae(true_prev, estim_prev, eps=_eps),
"mrae": qp.error.mrae(true_prev, estim_prev, eps=_eps),
"kld": qp.error.kld(true_prev, estim_prev, eps=_eps),
"nkld": qp.error.nkld(true_prev, estim_prev, eps=_eps),
"true_f1e": qf1e_binary(true_prev),
"estim_f1e": qf1e_binary(estim_prev),
}
return errors
def extend_and_quantify(
model,
q_model,
train,
test: LabelledCollection | AbstractStochasticSeededProtocol,
):
model.fit(*train.Xy)
pred_prob_train = cross_val_predict(model, *train.Xy, method="predict_proba")
_train = extend_collection(train, pred_prob_train)
q_model.fit(_train)
def quantify_extended(test):
pred_prob_test = model.predict_proba(test.X)
_test = extend_collection(test, pred_prob_test)
_estim_prev = q_model.quantify(_test.instances)
# check that _estim_prev has all the classes and eventually fill the missing
# ones with 0
for _cls in _test.classes_:
if _cls not in q_model.classes_:
_estim_prev = np.insert(_estim_prev, _cls, [0.0], axis=0)
print(_estim_prev)
return _test.prevalence(), _estim_prev
if isinstance(test, LabelledCollection):
_true_prev, _estim_prev = quantify_extended(test)
_errors = compute_errors(_true_prev, _estim_prev, test.X.shape[0])
return ([test.prevalence()], [_true_prev], [_estim_prev], [_errors])
elif isinstance(test, AbstractStochasticSeededProtocol):
orig_prevs, true_prevs, estim_prevs, errors = [], [], [], []
for index in test.samples_parameters():
sample = test.sample(index)
_true_prev, _estim_prev = quantify_extended(sample)
orig_prevs.append(sample.prevalence())
true_prevs.append(_true_prev)
estim_prevs.append(_estim_prev)
errors.append(compute_errors(_true_prev, _estim_prev, sample.X.shape[0]))
return orig_prevs, true_prevs, estim_prevs, errors
def get_dataset(name):
datasets = {
"spambase": lambda: qp.datasets.fetch_UCIDataset(
"spambase", verbose=False
).train_test,
"hp": lambda: qp.datasets.fetch_reviews("hp", tfidf=True).train_test,
"imdb": lambda: qp.datasets.fetch_reviews("imdb", tfidf=True).train_test,
}
try:
return datasets[name]()
except KeyError:
raise KeyError(f"{name} is not available as a dataset")
def test_1(dataset_name):
train, test = get_dataset(dataset_name)
orig_prevs, true_prevs, estim_prevs, errors = extend_and_quantify(
LogisticRegression(),
SLD(LogisticRegression()),
train,
APP(test, n_prevalences=11, repeats=1),
)
for orig_prev, true_prev, estim_prev, _errors in zip(
orig_prevs, true_prevs, estim_prevs, errors
):
print(f"original prevalence:\t{orig_prev}")
print(f"true prevalence:\t{true_prev}")
print(f"estimated prevalence:\t{estim_prev}")
for name, err in _errors.items():
print(f"{name}={err:.3f}")
print()
pd.set_option("display.float_format", "{:.4f}".format)
def test_2(dataset_name):
@ -161,9 +20,8 @@ def test_2(dataset_name):
model.fit(*train.Xy)
estimator = AccuracyEstimator(model, SLD(LogisticRegression()))
estimator.fit(train)
df = eval.evaluation_report(
estimator, APP(test, n_prevalences=11, repeats=1)
)
df = eval.evaluation_report(estimator, APP(test, n_prevalences=11, repeats=100))
# print(df.to_string())
print(df.to_string())

138
quacc/test_1.py Normal file
View File

@ -0,0 +1,138 @@
import numpy as np
import scipy as sp
import quapy as qp
from quapy.data import LabelledCollection
from quapy.method.aggregative import SLD
from quapy.protocol import APP, AbstractStochasticSeededProtocol
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_predict
from .data import get_dataset
# Extended classes
#
# 0 ~ True 0
# 1 ~ False 1
# 2 ~ False 0
# 3 ~ True 1
# _____________________
# | | |
# | True 0 | False 1 |
# |__________|__________|
# | | |
# | False 0 | True 1 |
# |__________|__________|
#
def get_ex_class(classes, true_class, pred_class):
return true_class * classes + pred_class
def extend_collection(coll, pred_prob):
n_classes = coll.n_classes
# n_X = [ X | predicted probs. ]
if isinstance(coll.X, sp.csr_matrix):
pred_prob_csr = sp.csr_matrix(pred_prob)
n_x = sp.hstack([coll.X, pred_prob_csr])
elif isinstance(coll.X, np.ndarray):
n_x = np.concatenate((coll.X, pred_prob), axis=1)
else:
raise ValueError("Unsupported matrix format")
# n_y = (exptected y, predicted y)
n_y = []
for i, true_class in enumerate(coll.y):
pred_class = pred_prob[i].argmax(axis=0)
n_y.append(get_ex_class(n_classes, true_class, pred_class))
return LabelledCollection(n_x, np.asarray(n_y), [*range(0, n_classes * n_classes)])
def qf1e_binary(prev):
recall = prev[0] / (prev[0] + prev[1])
precision = prev[0] / (prev[0] + prev[2])
return 1 - 2 * (precision * recall) / (precision + recall)
def compute_errors(true_prev, estim_prev, n_instances):
errors = {}
_eps = 1 / (2 * n_instances)
errors = {
"mae": qp.error.mae(true_prev, estim_prev),
"rae": qp.error.rae(true_prev, estim_prev, eps=_eps),
"mrae": qp.error.mrae(true_prev, estim_prev, eps=_eps),
"kld": qp.error.kld(true_prev, estim_prev, eps=_eps),
"nkld": qp.error.nkld(true_prev, estim_prev, eps=_eps),
"true_f1e": qf1e_binary(true_prev),
"estim_f1e": qf1e_binary(estim_prev),
}
return errors
def extend_and_quantify(
model,
q_model,
train,
test: LabelledCollection | AbstractStochasticSeededProtocol,
):
model.fit(*train.Xy)
pred_prob_train = cross_val_predict(model, *train.Xy, method="predict_proba")
_train = extend_collection(train, pred_prob_train)
q_model.fit(_train)
def quantify_extended(test):
pred_prob_test = model.predict_proba(test.X)
_test = extend_collection(test, pred_prob_test)
_estim_prev = q_model.quantify(_test.instances)
# check that _estim_prev has all the classes and eventually fill the missing
# ones with 0
for _cls in _test.classes_:
if _cls not in q_model.classes_:
_estim_prev = np.insert(_estim_prev, _cls, [0.0], axis=0)
print(_estim_prev)
return _test.prevalence(), _estim_prev
if isinstance(test, LabelledCollection):
_true_prev, _estim_prev = quantify_extended(test)
_errors = compute_errors(_true_prev, _estim_prev, test.X.shape[0])
return ([test.prevalence()], [_true_prev], [_estim_prev], [_errors])
elif isinstance(test, AbstractStochasticSeededProtocol):
orig_prevs, true_prevs, estim_prevs, errors = [], [], [], []
for index in test.samples_parameters():
sample = test.sample(index)
_true_prev, _estim_prev = quantify_extended(sample)
orig_prevs.append(sample.prevalence())
true_prevs.append(_true_prev)
estim_prevs.append(_estim_prev)
errors.append(compute_errors(_true_prev, _estim_prev, sample.X.shape[0]))
return orig_prevs, true_prevs, estim_prevs, errors
def test_1(dataset_name):
train, test = get_dataset(dataset_name)
orig_prevs, true_prevs, estim_prevs, errors = extend_and_quantify(
LogisticRegression(),
SLD(LogisticRegression()),
train,
APP(test, n_prevalences=11, repeats=1),
)
for orig_prev, true_prev, estim_prev, _errors in zip(
orig_prevs, true_prevs, estim_prevs, errors
):
print(f"original prevalence:\t{orig_prev}")
print(f"true prevalence:\t{true_prev}")
print(f"estimated prevalence:\t{estim_prev}")
for name, err in _errors.items():
print(f"{name}={err:.3f}")
print()