fixed evaluation_report and dataframe visualization
This commit is contained in:
parent
5959a0d323
commit
5234ce1387
|
@ -1,7 +1,9 @@
|
|||
from typing import List, Optional
|
||||
|
||||
import numpy as np
|
||||
import quapy as qp
|
||||
import scipy.sparse as sp
|
||||
from quapy.data import LabelledCollection
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
class ExtendedCollection(LabelledCollection):
|
||||
|
@ -12,3 +14,17 @@ class ExtendedCollection(LabelledCollection):
|
|||
classes: Optional[List] = None,
|
||||
):
|
||||
super().__init__(instances, labels, classes=classes)
|
||||
|
||||
def get_dataset(name):
|
||||
datasets = {
|
||||
"spambase": lambda: qp.datasets.fetch_UCIDataset(
|
||||
"spambase", verbose=False
|
||||
).train_test,
|
||||
"hp": lambda: qp.datasets.fetch_reviews("hp", tfidf=True).train_test,
|
||||
"imdb": lambda: qp.datasets.fetch_reviews("imdb", tfidf=True).train_test,
|
||||
}
|
||||
|
||||
try:
|
||||
return datasets[name]()
|
||||
except KeyError:
|
||||
raise KeyError(f"{name} is not available as a dataset")
|
||||
|
|
|
@ -1,16 +1,20 @@
|
|||
import itertools
|
||||
from quapy.protocol import (
|
||||
OnLabelledCollectionProtocol,
|
||||
AbstractStochasticSeededProtocol,
|
||||
)
|
||||
import quapy as qp
|
||||
from typing import Iterable, Callable, Union
|
||||
|
||||
from .estimator import AccuracyEstimator
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import quacc.error as error
|
||||
|
||||
|
||||
def estimate(estimator: AccuracyEstimator, protocol: AbstractStochasticSeededProtocol):
|
||||
def estimate(
|
||||
estimator: AccuracyEstimator,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
):
|
||||
# ensure that the protocol returns a LabelledCollection for each iteration
|
||||
protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
|
||||
|
||||
|
@ -18,6 +22,9 @@ def estimate(estimator: AccuracyEstimator, protocol: AbstractStochasticSeededPro
|
|||
for sample in protocol():
|
||||
e_sample = estimator.extend(sample)
|
||||
estim_prev = estimator.estimate(e_sample.X, ext=True)
|
||||
# base_prevs.append(_prettyfloat(accuracy, sample.prevalence()))
|
||||
# true_prevs.append(_prettyfloat(accuracy, e_sample.prevalence()))
|
||||
# estim_prevs.append(_prettyfloat(accuracy, estim_prev))
|
||||
base_prevs.append(sample.prevalence())
|
||||
true_prevs.append(e_sample.prevalence())
|
||||
estim_prevs.append(estim_prev)
|
||||
|
@ -25,6 +32,38 @@ def estimate(estimator: AccuracyEstimator, protocol: AbstractStochasticSeededPro
|
|||
return base_prevs, true_prevs, estim_prevs
|
||||
|
||||
|
||||
_bprev_col_0 = ["base"]
|
||||
_bprev_col_1 = ["0", "1"]
|
||||
_prev_col_0 = ["true", "estim"]
|
||||
_prev_col_1 = ["T0", "F1", "F0", "T1"]
|
||||
_err_col_0 = ["errors"]
|
||||
|
||||
|
||||
def _report_columns(err_names):
|
||||
bprev_cols = list(itertools.product(_bprev_col_0, _bprev_col_1))
|
||||
prev_cols = list(itertools.product(_prev_col_0, _prev_col_1))
|
||||
|
||||
err_1 = err_names
|
||||
err_cols = list(itertools.product(_err_col_0, err_1))
|
||||
|
||||
cols = bprev_cols + prev_cols + err_cols
|
||||
|
||||
return pd.MultiIndex.from_tuples(cols)
|
||||
|
||||
|
||||
def _dict_prev(base_prev, true_prev, estim_prev):
|
||||
prev_cols = list(itertools.product(_bprev_col_0, _bprev_col_1)) + list(
|
||||
itertools.product(_prev_col_0, _prev_col_1)
|
||||
)
|
||||
|
||||
return {
|
||||
k: v
|
||||
for (k, v) in zip(
|
||||
prev_cols, np.concatenate((base_prev, true_prev, estim_prev), axis=0)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
def evaluation_report(
|
||||
estimator: AccuracyEstimator,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
|
@ -40,26 +79,25 @@ def evaluation_report(
|
|||
]
|
||||
assert all(hasattr(e, "__call__") for e in error_funcs), "invalid error function"
|
||||
error_names = [e.__name__ for e in error_funcs]
|
||||
error_cols = error_names.copy()
|
||||
if "f1e" in error_cols:
|
||||
error_cols.remove("f1e")
|
||||
error_cols.extend(["f1e_true", "f1e_estim"])
|
||||
|
||||
# df_cols = ["base_prev", "true_prev", "estim_prev"] + error_names
|
||||
df_cols = _report_columns(error_cols)
|
||||
|
||||
df_cols = ["base_prev", "true_prev", "estim_prev"] + error_names
|
||||
if "f1e" in df_cols:
|
||||
df_cols.remove("f1e")
|
||||
df_cols.extend(["f1e_true", "f1e_estim"])
|
||||
lst = []
|
||||
for base_prev, true_prev, estim_prev in zip(base_prevs, true_prevs, estim_prevs):
|
||||
series = {
|
||||
"base_prev": base_prev,
|
||||
"true_prev": true_prev,
|
||||
"estim_prev": estim_prev,
|
||||
}
|
||||
series = _dict_prev(base_prev, true_prev, estim_prev)
|
||||
for error_name, error_metric in zip(error_names, error_funcs):
|
||||
if error_name == "f1e":
|
||||
series["f1e_true"] = error_metric(true_prev)
|
||||
series["f1e_estim"] = error_metric(estim_prev)
|
||||
series[("errors", "f1e_true")] = error_metric(true_prev)
|
||||
series[("errors", "f1e_estim")] = error_metric(estim_prev)
|
||||
continue
|
||||
|
||||
score = error_metric(true_prev, estim_prev)
|
||||
series[error_name] = score
|
||||
series[("errors", error_name)] = score
|
||||
|
||||
lst.append(series)
|
||||
|
||||
|
|
156
quacc/main.py
156
quacc/main.py
|
@ -1,158 +1,17 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
import quapy as qp
|
||||
import scipy.sparse as sp
|
||||
from quapy.data import LabelledCollection
|
||||
from quapy.method.aggregative import SLD
|
||||
from quapy.protocol import APP, AbstractStochasticSeededProtocol
|
||||
from quapy.protocol import APP
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import cross_val_predict
|
||||
|
||||
import quacc.evaluation as eval
|
||||
from quacc.estimator import AccuracyEstimator
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = 100
|
||||
from .data import get_dataset
|
||||
|
||||
qp.environ["SAMPLE_SIZE"] = 100
|
||||
|
||||
# Extended classes
|
||||
#
|
||||
# 0 ~ True 0
|
||||
# 1 ~ False 1
|
||||
# 2 ~ False 0
|
||||
# 3 ~ True 1
|
||||
# _____________________
|
||||
# | | |
|
||||
# | True 0 | False 1 |
|
||||
# |__________|__________|
|
||||
# | | |
|
||||
# | False 0 | True 1 |
|
||||
# |__________|__________|
|
||||
#
|
||||
def get_ex_class(classes, true_class, pred_class):
|
||||
return true_class * classes + pred_class
|
||||
|
||||
|
||||
def extend_collection(coll, pred_prob):
|
||||
n_classes = coll.n_classes
|
||||
|
||||
# n_X = [ X | predicted probs. ]
|
||||
if isinstance(coll.X, sp.csr_matrix):
|
||||
pred_prob_csr = sp.csr_matrix(pred_prob)
|
||||
n_x = sp.hstack([coll.X, pred_prob_csr])
|
||||
elif isinstance(coll.X, np.ndarray):
|
||||
n_x = np.concatenate((coll.X, pred_prob), axis=1)
|
||||
else:
|
||||
raise ValueError("Unsupported matrix format")
|
||||
|
||||
# n_y = (exptected y, predicted y)
|
||||
n_y = []
|
||||
for i, true_class in enumerate(coll.y):
|
||||
pred_class = pred_prob[i].argmax(axis=0)
|
||||
n_y.append(get_ex_class(n_classes, true_class, pred_class))
|
||||
|
||||
return LabelledCollection(n_x, np.asarray(n_y), [*range(0, n_classes * n_classes)])
|
||||
|
||||
|
||||
def qf1e_binary(prev):
|
||||
recall = prev[0] / (prev[0] + prev[1])
|
||||
precision = prev[0] / (prev[0] + prev[2])
|
||||
|
||||
return 1 - 2 * (precision * recall) / (precision + recall)
|
||||
|
||||
|
||||
def compute_errors(true_prev, estim_prev, n_instances):
|
||||
errors = {}
|
||||
_eps = 1 / (2 * n_instances)
|
||||
errors = {
|
||||
"mae": qp.error.mae(true_prev, estim_prev),
|
||||
"rae": qp.error.rae(true_prev, estim_prev, eps=_eps),
|
||||
"mrae": qp.error.mrae(true_prev, estim_prev, eps=_eps),
|
||||
"kld": qp.error.kld(true_prev, estim_prev, eps=_eps),
|
||||
"nkld": qp.error.nkld(true_prev, estim_prev, eps=_eps),
|
||||
"true_f1e": qf1e_binary(true_prev),
|
||||
"estim_f1e": qf1e_binary(estim_prev),
|
||||
}
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def extend_and_quantify(
|
||||
model,
|
||||
q_model,
|
||||
train,
|
||||
test: LabelledCollection | AbstractStochasticSeededProtocol,
|
||||
):
|
||||
model.fit(*train.Xy)
|
||||
|
||||
pred_prob_train = cross_val_predict(model, *train.Xy, method="predict_proba")
|
||||
_train = extend_collection(train, pred_prob_train)
|
||||
|
||||
q_model.fit(_train)
|
||||
|
||||
def quantify_extended(test):
|
||||
pred_prob_test = model.predict_proba(test.X)
|
||||
_test = extend_collection(test, pred_prob_test)
|
||||
_estim_prev = q_model.quantify(_test.instances)
|
||||
# check that _estim_prev has all the classes and eventually fill the missing
|
||||
# ones with 0
|
||||
for _cls in _test.classes_:
|
||||
if _cls not in q_model.classes_:
|
||||
_estim_prev = np.insert(_estim_prev, _cls, [0.0], axis=0)
|
||||
print(_estim_prev)
|
||||
return _test.prevalence(), _estim_prev
|
||||
|
||||
if isinstance(test, LabelledCollection):
|
||||
_true_prev, _estim_prev = quantify_extended(test)
|
||||
_errors = compute_errors(_true_prev, _estim_prev, test.X.shape[0])
|
||||
return ([test.prevalence()], [_true_prev], [_estim_prev], [_errors])
|
||||
|
||||
elif isinstance(test, AbstractStochasticSeededProtocol):
|
||||
orig_prevs, true_prevs, estim_prevs, errors = [], [], [], []
|
||||
for index in test.samples_parameters():
|
||||
sample = test.sample(index)
|
||||
_true_prev, _estim_prev = quantify_extended(sample)
|
||||
|
||||
orig_prevs.append(sample.prevalence())
|
||||
true_prevs.append(_true_prev)
|
||||
estim_prevs.append(_estim_prev)
|
||||
errors.append(compute_errors(_true_prev, _estim_prev, sample.X.shape[0]))
|
||||
|
||||
return orig_prevs, true_prevs, estim_prevs, errors
|
||||
|
||||
|
||||
def get_dataset(name):
|
||||
datasets = {
|
||||
"spambase": lambda: qp.datasets.fetch_UCIDataset(
|
||||
"spambase", verbose=False
|
||||
).train_test,
|
||||
"hp": lambda: qp.datasets.fetch_reviews("hp", tfidf=True).train_test,
|
||||
"imdb": lambda: qp.datasets.fetch_reviews("imdb", tfidf=True).train_test,
|
||||
}
|
||||
|
||||
try:
|
||||
return datasets[name]()
|
||||
except KeyError:
|
||||
raise KeyError(f"{name} is not available as a dataset")
|
||||
|
||||
|
||||
def test_1(dataset_name):
|
||||
train, test = get_dataset(dataset_name)
|
||||
|
||||
orig_prevs, true_prevs, estim_prevs, errors = extend_and_quantify(
|
||||
LogisticRegression(),
|
||||
SLD(LogisticRegression()),
|
||||
train,
|
||||
APP(test, n_prevalences=11, repeats=1),
|
||||
)
|
||||
|
||||
for orig_prev, true_prev, estim_prev, _errors in zip(
|
||||
orig_prevs, true_prevs, estim_prevs, errors
|
||||
):
|
||||
print(f"original prevalence:\t{orig_prev}")
|
||||
print(f"true prevalence:\t{true_prev}")
|
||||
print(f"estimated prevalence:\t{estim_prev}")
|
||||
for name, err in _errors.items():
|
||||
print(f"{name}={err:.3f}")
|
||||
print()
|
||||
pd.set_option("display.float_format", "{:.4f}".format)
|
||||
|
||||
|
||||
def test_2(dataset_name):
|
||||
|
@ -161,9 +20,8 @@ def test_2(dataset_name):
|
|||
model.fit(*train.Xy)
|
||||
estimator = AccuracyEstimator(model, SLD(LogisticRegression()))
|
||||
estimator.fit(train)
|
||||
df = eval.evaluation_report(
|
||||
estimator, APP(test, n_prevalences=11, repeats=1)
|
||||
)
|
||||
df = eval.evaluation_report(estimator, APP(test, n_prevalences=11, repeats=100))
|
||||
# print(df.to_string())
|
||||
print(df.to_string())
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,138 @@
|
|||
import numpy as np
|
||||
import scipy as sp
|
||||
import quapy as qp
|
||||
from quapy.data import LabelledCollection
|
||||
from quapy.method.aggregative import SLD
|
||||
from quapy.protocol import APP, AbstractStochasticSeededProtocol
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import cross_val_predict
|
||||
|
||||
from .data import get_dataset
|
||||
|
||||
# Extended classes
|
||||
#
|
||||
# 0 ~ True 0
|
||||
# 1 ~ False 1
|
||||
# 2 ~ False 0
|
||||
# 3 ~ True 1
|
||||
# _____________________
|
||||
# | | |
|
||||
# | True 0 | False 1 |
|
||||
# |__________|__________|
|
||||
# | | |
|
||||
# | False 0 | True 1 |
|
||||
# |__________|__________|
|
||||
#
|
||||
def get_ex_class(classes, true_class, pred_class):
|
||||
return true_class * classes + pred_class
|
||||
|
||||
|
||||
def extend_collection(coll, pred_prob):
|
||||
n_classes = coll.n_classes
|
||||
|
||||
# n_X = [ X | predicted probs. ]
|
||||
if isinstance(coll.X, sp.csr_matrix):
|
||||
pred_prob_csr = sp.csr_matrix(pred_prob)
|
||||
n_x = sp.hstack([coll.X, pred_prob_csr])
|
||||
elif isinstance(coll.X, np.ndarray):
|
||||
n_x = np.concatenate((coll.X, pred_prob), axis=1)
|
||||
else:
|
||||
raise ValueError("Unsupported matrix format")
|
||||
|
||||
# n_y = (exptected y, predicted y)
|
||||
n_y = []
|
||||
for i, true_class in enumerate(coll.y):
|
||||
pred_class = pred_prob[i].argmax(axis=0)
|
||||
n_y.append(get_ex_class(n_classes, true_class, pred_class))
|
||||
|
||||
return LabelledCollection(n_x, np.asarray(n_y), [*range(0, n_classes * n_classes)])
|
||||
|
||||
|
||||
def qf1e_binary(prev):
|
||||
recall = prev[0] / (prev[0] + prev[1])
|
||||
precision = prev[0] / (prev[0] + prev[2])
|
||||
|
||||
return 1 - 2 * (precision * recall) / (precision + recall)
|
||||
|
||||
|
||||
def compute_errors(true_prev, estim_prev, n_instances):
|
||||
errors = {}
|
||||
_eps = 1 / (2 * n_instances)
|
||||
errors = {
|
||||
"mae": qp.error.mae(true_prev, estim_prev),
|
||||
"rae": qp.error.rae(true_prev, estim_prev, eps=_eps),
|
||||
"mrae": qp.error.mrae(true_prev, estim_prev, eps=_eps),
|
||||
"kld": qp.error.kld(true_prev, estim_prev, eps=_eps),
|
||||
"nkld": qp.error.nkld(true_prev, estim_prev, eps=_eps),
|
||||
"true_f1e": qf1e_binary(true_prev),
|
||||
"estim_f1e": qf1e_binary(estim_prev),
|
||||
}
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def extend_and_quantify(
|
||||
model,
|
||||
q_model,
|
||||
train,
|
||||
test: LabelledCollection | AbstractStochasticSeededProtocol,
|
||||
):
|
||||
model.fit(*train.Xy)
|
||||
|
||||
pred_prob_train = cross_val_predict(model, *train.Xy, method="predict_proba")
|
||||
_train = extend_collection(train, pred_prob_train)
|
||||
|
||||
q_model.fit(_train)
|
||||
|
||||
def quantify_extended(test):
|
||||
pred_prob_test = model.predict_proba(test.X)
|
||||
_test = extend_collection(test, pred_prob_test)
|
||||
_estim_prev = q_model.quantify(_test.instances)
|
||||
# check that _estim_prev has all the classes and eventually fill the missing
|
||||
# ones with 0
|
||||
for _cls in _test.classes_:
|
||||
if _cls not in q_model.classes_:
|
||||
_estim_prev = np.insert(_estim_prev, _cls, [0.0], axis=0)
|
||||
print(_estim_prev)
|
||||
return _test.prevalence(), _estim_prev
|
||||
|
||||
if isinstance(test, LabelledCollection):
|
||||
_true_prev, _estim_prev = quantify_extended(test)
|
||||
_errors = compute_errors(_true_prev, _estim_prev, test.X.shape[0])
|
||||
return ([test.prevalence()], [_true_prev], [_estim_prev], [_errors])
|
||||
|
||||
elif isinstance(test, AbstractStochasticSeededProtocol):
|
||||
orig_prevs, true_prevs, estim_prevs, errors = [], [], [], []
|
||||
for index in test.samples_parameters():
|
||||
sample = test.sample(index)
|
||||
_true_prev, _estim_prev = quantify_extended(sample)
|
||||
|
||||
orig_prevs.append(sample.prevalence())
|
||||
true_prevs.append(_true_prev)
|
||||
estim_prevs.append(_estim_prev)
|
||||
errors.append(compute_errors(_true_prev, _estim_prev, sample.X.shape[0]))
|
||||
|
||||
return orig_prevs, true_prevs, estim_prevs, errors
|
||||
|
||||
|
||||
|
||||
|
||||
def test_1(dataset_name):
|
||||
train, test = get_dataset(dataset_name)
|
||||
|
||||
orig_prevs, true_prevs, estim_prevs, errors = extend_and_quantify(
|
||||
LogisticRegression(),
|
||||
SLD(LogisticRegression()),
|
||||
train,
|
||||
APP(test, n_prevalences=11, repeats=1),
|
||||
)
|
||||
|
||||
for orig_prev, true_prev, estim_prev, _errors in zip(
|
||||
orig_prevs, true_prevs, estim_prevs, errors
|
||||
):
|
||||
print(f"original prevalence:\t{orig_prev}")
|
||||
print(f"true prevalence:\t{true_prev}")
|
||||
print(f"estimated prevalence:\t{estim_prev}")
|
||||
for name, err in _errors.items():
|
||||
print(f"{name}={err:.3f}")
|
||||
print()
|
Loading…
Reference in New Issue