QuAcc/quacc/evaluation/method.py

410 lines
21 KiB
Python

from dataclasses import dataclass
from typing import Callable, List, Union
import numpy as np
from matplotlib.pylab import rand
from quapy.method.aggregative import PACC, SLD, BaseQuantifier
from quapy.protocol import UPP, AbstractProtocol, OnLabelledCollectionProtocol
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
import quacc as qc
from quacc.environment import env
from quacc.evaluation.report import EvaluationReport
from quacc.method.base import BQAE, MCAE, BaseAccuracyEstimator
from quacc.method.model_selection import (
GridSearchAE,
SpiderSearchAE,
)
from quacc.quantification import KDEy
def _param_grid(method, X_fit: np.ndarray):
match method:
case "sld_lr":
return {
"q__classifier__C": np.logspace(-3, 3, 7),
"q__classifier__class_weight": [None, "balanced"],
"q__recalib": [None, "bcts"],
"confidence": [None, ["isoft"], ["max_conf", "entropy"]],
}
case "sld_rbf":
_scale = 1.0 / (X_fit.shape[1] * X_fit.var())
return {
"q__classifier__C": np.logspace(-3, 3, 7),
"q__classifier__class_weight": [None, "balanced"],
"q__classifier__gamma": _scale * np.logspace(-2, 2, 5),
"q__recalib": [None, "bcts"],
"confidence": [None, ["isoft"], ["max_conf", "entropy"]],
}
case "pacc":
return {
"q__classifier__C": np.logspace(-3, 3, 7),
"q__classifier__class_weight": [None, "balanced"],
"confidence": [None, ["isoft"], ["max_conf", "entropy"]],
}
case "kde_lr":
return {
"q__classifier__C": np.logspace(-3, 3, 7),
"q__classifier__class_weight": [None, "balanced"],
"q__bandwidth": np.linspace(0.01, 0.2, 20),
"confidence": [None, ["isoft"]],
}
case "kde_rbf":
_scale = 1.0 / (X_fit.shape[1] * X_fit.var())
return {
"q__classifier__C": np.logspace(-3, 3, 7),
"q__classifier__class_weight": [None, "balanced"],
"q__classifier__gamma": _scale * np.logspace(-2, 2, 5),
"q__bandwidth": np.linspace(0.01, 0.2, 20),
"confidence": [None, ["isoft"]],
}
def evaluation_report(
estimator: BaseAccuracyEstimator, protocol: AbstractProtocol, method_name=None
) -> EvaluationReport:
# method_name = inspect.stack()[1].function
report = EvaluationReport(name=method_name)
for sample in protocol():
try:
e_sample = estimator.extend(sample)
estim_prev = estimator.estimate(e_sample.eX)
true_prev = e_sample.e_prevalence()
acc_score = qc.error.acc(estim_prev)
row = dict(
acc_score=acc_score,
acc=abs(qc.error.acc(true_prev) - acc_score),
)
if estim_prev.can_f1():
f1_score = qc.error.f1(estim_prev)
row = row | dict(
f1_score=f1_score,
f1=abs(qc.error.f1(true_prev) - f1_score),
)
report.append_row(sample.prevalence(), **row)
except Exception as e:
print(f"sample prediction failed for method {method_name}: {e}")
report.append_row(
sample.prevalence(),
acc_score=np.nan,
acc=np.nan,
f1_score=np.nan,
f1=np.nan,
)
return report
@dataclass(frozen=True)
class EvaluationMethod:
name: str
q: BaseQuantifier
est_n: str
conf: List[str] | str = None
cf: bool = False # collapse_false
gf: bool = False # group_false
d: bool = False # dense
def get_est(self, c_model):
match self.est_n:
case "mul":
return MCAE(
c_model,
self.q,
confidence=self.conf,
collapse_false=self.cf,
group_false=self.gf,
dense=self.d,
)
case "bin":
return BQAE(
c_model,
self.q,
confidence=self.conf,
group_false=self.gf,
dense=self.d,
)
def __call__(self, c_model, validation, protocol) -> EvaluationReport:
est = self.get_est(c_model).fit(validation)
return evaluation_report(
estimator=est, protocol=protocol, method_name=self.name
)
@dataclass(frozen=True)
class EvaluationMethodGridSearch(EvaluationMethod):
pg: str = "sld"
search: str = "grid"
def get_search(self):
match self.search:
case "grid":
return (GridSearchAE, {})
case "spider" | "spider2":
return (SpiderSearchAE, dict(best_width=2))
case "spider3":
return (SpiderSearchAE, dict(best_width=3))
case _:
return GridSearchAE
def __call__(self, c_model, validation, protocol) -> EvaluationReport:
v_train, v_val = validation.split_stratified(0.6, random_state=env._R_SEED)
_model = self.get_est(c_model)
_grid = _param_grid(self.pg, X_fit=_model.extend(v_train, prefit=True).X)
_search_class, _search_params = self.get_search()
est = _search_class(
model=_model,
param_grid=_grid,
refit=False,
protocol=UPP(v_val, repeats=100),
verbose=False,
**_search_params,
).fit(v_train)
return evaluation_report(
estimator=est,
protocol=protocol,
method_name=self.name,
)
M = EvaluationMethod
G = EvaluationMethodGridSearch
def __sld_lr():
return SLD(LogisticRegression())
def __sld_rbf():
return SLD(SVC(kernel="rbf", probability=True))
def __kde_lr():
return KDEy(LogisticRegression(), random_state=env._R_SEED)
def __kde_rbf():
return KDEy(SVC(kernel="rbf", probability=True), random_state=env._R_SEED)
def __sld_lsvc():
return SLD(LinearSVC())
def __pacc_lr():
return PACC(LogisticRegression())
# fmt: off
__sld_lr_set = [
M("bin_sld_lr", __sld_lr(), "bin" ),
M("bgf_sld_lr", __sld_lr(), "bin", gf=True),
M("mul_sld_lr", __sld_lr(), "mul" ),
M("m3w_sld_lr", __sld_lr(), "mul", cf=True),
M("mgf_sld_lr", __sld_lr(), "mul", gf=True),
# max_conf + entropy sld
M("bin_sld_lr_c", __sld_lr(), "bin", conf=["max_conf", "entropy"] ),
M("bgf_sld_lr_c", __sld_lr(), "bin", conf=["max_conf", "entropy"], gf=True),
M("mul_sld_lr_c", __sld_lr(), "mul", conf=["max_conf", "entropy"] ),
M("m3w_sld_lr_c", __sld_lr(), "mul", conf=["max_conf", "entropy"], cf=True),
M("mgf_sld_lr_c", __sld_lr(), "mul", conf=["max_conf", "entropy"], gf=True),
# max_conf sld
M("bin_sld_lr_mc", __sld_lr(), "bin", conf="max_conf", ),
M("bgf_sld_lr_mc", __sld_lr(), "bin", conf="max_conf", gf=True),
M("mul_sld_lr_mc", __sld_lr(), "mul", conf="max_conf", ),
M("m3w_sld_lr_mc", __sld_lr(), "mul", conf="max_conf", cf=True),
M("mgf_sld_lr_mc", __sld_lr(), "mul", conf="max_conf", gf=True),
# entropy sld
M("bin_sld_lr_ne", __sld_lr(), "bin", conf="entropy", ),
M("bgf_sld_lr_ne", __sld_lr(), "bin", conf="entropy", gf=True),
M("mul_sld_lr_ne", __sld_lr(), "mul", conf="entropy", ),
M("m3w_sld_lr_ne", __sld_lr(), "mul", conf="entropy", cf=True),
M("mgf_sld_lr_ne", __sld_lr(), "mul", conf="entropy", gf=True),
# inverse softmax sld
M("bin_sld_lr_is", __sld_lr(), "bin", conf="isoft", ),
M("bgf_sld_lr_is", __sld_lr(), "bin", conf="isoft", gf=True),
M("mul_sld_lr_is", __sld_lr(), "mul", conf="isoft", ),
M("m3w_sld_lr_is", __sld_lr(), "mul", conf="isoft", cf=True),
M("mgf_sld_lr_is", __sld_lr(), "mul", conf="isoft", gf=True),
# gs sld
G("bin_sld_lr_gs", __sld_lr(), "bin", pg="sld_lr" ),
G("bgf_sld_lr_gs", __sld_lr(), "bin", pg="sld_lr", gf=True),
G("mul_sld_lr_gs", __sld_lr(), "mul", pg="sld_lr" ),
G("m3w_sld_lr_gs", __sld_lr(), "mul", pg="sld_lr", cf=True),
G("mgf_sld_lr_gs", __sld_lr(), "mul", pg="sld_lr", gf=True),
]
__dense_sld_lr_set = [
M("d_bin_sld_lr", __sld_lr(), "bin", d=True, ),
M("d_bgf_sld_lr", __sld_lr(), "bin", d=True, gf=True),
M("d_mul_sld_lr", __sld_lr(), "mul", d=True, ),
M("d_m3w_sld_lr", __sld_lr(), "mul", d=True, cf=True),
M("d_mgf_sld_lr", __sld_lr(), "mul", d=True, gf=True),
# max_conf + entropy sld
M("d_bin_sld_lr_c", __sld_lr(), "bin", d=True, conf=["max_conf", "entropy"] ),
M("d_bgf_sld_lr_c", __sld_lr(), "bin", d=True, conf=["max_conf", "entropy"], gf=True),
M("d_mul_sld_lr_c", __sld_lr(), "mul", d=True, conf=["max_conf", "entropy"] ),
M("d_m3w_sld_lr_c", __sld_lr(), "mul", d=True, conf=["max_conf", "entropy"], cf=True),
M("d_mgf_sld_lr_c", __sld_lr(), "mul", d=True, conf=["max_conf", "entropy"], gf=True),
# max_conf sld
M("d_bin_sld_lr_mc", __sld_lr(), "bin", d=True, conf="max_conf", ),
M("d_bgf_sld_lr_mc", __sld_lr(), "bin", d=True, conf="max_conf", gf=True),
M("d_mul_sld_lr_mc", __sld_lr(), "mul", d=True, conf="max_conf", ),
M("d_m3w_sld_lr_mc", __sld_lr(), "mul", d=True, conf="max_conf", cf=True),
M("d_mgf_sld_lr_mc", __sld_lr(), "mul", d=True, conf="max_conf", gf=True),
# entropy sld
M("d_bin_sld_lr_ne", __sld_lr(), "bin", d=True, conf="entropy", ),
M("d_bgf_sld_lr_ne", __sld_lr(), "bin", d=True, conf="entropy", gf=True),
M("d_mul_sld_lr_ne", __sld_lr(), "mul", d=True, conf="entropy", ),
M("d_m3w_sld_lr_ne", __sld_lr(), "mul", d=True, conf="entropy", cf=True),
M("d_mgf_sld_lr_ne", __sld_lr(), "mul", d=True, conf="entropy", gf=True),
# inverse softmax sld
M("d_bin_sld_lr_is", __sld_lr(), "bin", d=True, conf="isoft", ),
M("d_bgf_sld_lr_is", __sld_lr(), "bin", d=True, conf="isoft", gf=True),
M("d_mul_sld_lr_is", __sld_lr(), "mul", d=True, conf="isoft", ),
M("d_m3w_sld_lr_is", __sld_lr(), "mul", d=True, conf="isoft", cf=True),
M("d_mgf_sld_lr_is", __sld_lr(), "mul", d=True, conf="isoft", gf=True),
# gs sld
G("d_bin_sld_lr_gs", __sld_lr(), "bin", d=True, pg="sld_lr" ),
G("d_bgf_sld_lr_gs", __sld_lr(), "bin", d=True, pg="sld_lr", gf=True),
G("d_mul_sld_lr_gs", __sld_lr(), "mul", d=True, pg="sld_lr" ),
G("d_m3w_sld_lr_gs", __sld_lr(), "mul", d=True, pg="sld_lr", cf=True),
G("d_mgf_sld_lr_gs", __sld_lr(), "mul", d=True, pg="sld_lr", gf=True),
]
__dense_sld_rbf_set = [
M("d_bin_sld_rbf", __sld_rbf(), "bin", d=True, ),
M("d_bgf_sld_rbf", __sld_rbf(), "bin", d=True, gf=True),
M("d_mul_sld_rbf", __sld_rbf(), "mul", d=True, ),
M("d_m3w_sld_rbf", __sld_rbf(), "mul", d=True, cf=True),
M("d_mgf_sld_rbf", __sld_rbf(), "mul", d=True, gf=True),
# max_conf + entropy sld
M("d_bin_sld_rbf_c", __sld_rbf(), "bin", d=True, conf=["max_conf", "entropy"] ),
M("d_bgf_sld_rbf_c", __sld_rbf(), "bin", d=True, conf=["max_conf", "entropy"], gf=True),
M("d_mul_sld_rbf_c", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy"] ),
M("d_m3w_sld_rbf_c", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy"], cf=True),
M("d_mgf_sld_rbf_c", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy"], gf=True),
# max_conf sld
M("d_bin_sld_rbf_mc", __sld_rbf(), "bin", d=True, conf="max_conf", ),
M("d_bgf_sld_rbf_mc", __sld_rbf(), "bin", d=True, conf="max_conf", gf=True),
M("d_mul_sld_rbf_mc", __sld_rbf(), "mul", d=True, conf="max_conf", ),
M("d_m3w_sld_rbf_mc", __sld_rbf(), "mul", d=True, conf="max_conf", cf=True),
M("d_mgf_sld_rbf_mc", __sld_rbf(), "mul", d=True, conf="max_conf", gf=True),
# entropy sld
M("d_bin_sld_rbf_ne", __sld_rbf(), "bin", d=True, conf="entropy", ),
M("d_bgf_sld_rbf_ne", __sld_rbf(), "bin", d=True, conf="entropy", gf=True),
M("d_mul_sld_rbf_ne", __sld_rbf(), "mul", d=True, conf="entropy", ),
M("d_m3w_sld_rbf_ne", __sld_rbf(), "mul", d=True, conf="entropy", cf=True),
M("d_mgf_sld_rbf_ne", __sld_rbf(), "mul", d=True, conf="entropy", gf=True),
# inverse softmax sld
M("d_bin_sld_rbf_is", __sld_rbf(), "bin", d=True, conf="isoft", ),
M("d_bgf_sld_rbf_is", __sld_rbf(), "bin", d=True, conf="isoft", gf=True),
M("d_mul_sld_rbf_is", __sld_rbf(), "mul", d=True, conf="isoft", ),
M("d_m3w_sld_rbf_is", __sld_rbf(), "mul", d=True, conf="isoft", cf=True),
M("d_mgf_sld_rbf_is", __sld_rbf(), "mul", d=True, conf="isoft", gf=True),
# gs sld
G("d_bin_sld_rbf_gs", __sld_rbf(), "bin", d=True, pg="sld_rbf", search="spider", ),
G("d_bgf_sld_rbf_gs", __sld_rbf(), "bin", d=True, pg="sld_rbf", search="spider", gf=True),
G("d_mul_sld_rbf_gs", __sld_rbf(), "mul", d=True, pg="sld_rbf", search="spider", ),
G("d_m3w_sld_rbf_gs", __sld_rbf(), "mul", d=True, pg="sld_rbf", search="spider", cf=True),
G("d_mgf_sld_rbf_gs", __sld_rbf(), "mul", d=True, pg="sld_rbf", search="spider", gf=True),
]
__kde_lr_set = [
# base kde
M("bin_kde_lr", __kde_lr(), "bin" ),
M("mul_kde_lr", __kde_lr(), "mul" ),
M("m3w_kde_lr", __kde_lr(), "mul", cf=True),
# max_conf + entropy kde
M("bin_kde_lr_c", __kde_lr(), "bin", conf=["max_conf", "entropy"] ),
M("mul_kde_lr_c", __kde_lr(), "mul", conf=["max_conf", "entropy"] ),
M("m3w_kde_lr_c", __kde_lr(), "mul", conf=["max_conf", "entropy"], cf=True),
# max_conf kde
M("bin_kde_lr_mc", __kde_lr(), "bin", conf="max_conf", ),
M("mul_kde_lr_mc", __kde_lr(), "mul", conf="max_conf", ),
M("m3w_kde_lr_mc", __kde_lr(), "mul", conf="max_conf", cf=True),
# entropy kde
M("bin_kde_lr_ne", __kde_lr(), "bin", conf="entropy", ),
M("mul_kde_lr_ne", __kde_lr(), "mul", conf="entropy", ),
M("m3w_kde_lr_ne", __kde_lr(), "mul", conf="entropy", cf=True),
# inverse softmax kde
M("bin_kde_lr_is", __kde_lr(), "bin", conf="isoft", ),
M("mul_kde_lr_is", __kde_lr(), "mul", conf="isoft", ),
M("m3w_kde_lr_is", __kde_lr(), "mul", conf="isoft", cf=True),
# gs kde
G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="spider" ),
G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider" ),
G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider", cf=True),
]
__dense_kde_lr_set = [
# base kde
M("d_bin_kde_lr", __kde_lr(), "bin", d=True, ),
M("d_mul_kde_lr", __kde_lr(), "mul", d=True, ),
M("d_m3w_kde_lr", __kde_lr(), "mul", d=True, cf=True),
# max_conf + entropy kde
M("d_bin_kde_lr_c", __kde_lr(), "bin", d=True, conf=["max_conf", "entropy"] ),
M("d_mul_kde_lr_c", __kde_lr(), "mul", d=True, conf=["max_conf", "entropy"] ),
M("d_m3w_kde_lr_c", __kde_lr(), "mul", d=True, conf=["max_conf", "entropy"], cf=True),
# max_conf kde
M("d_bin_kde_lr_mc", __kde_lr(), "bin", d=True, conf="max_conf", ),
M("d_mul_kde_lr_mc", __kde_lr(), "mul", d=True, conf="max_conf", ),
M("d_m3w_kde_lr_mc", __kde_lr(), "mul", d=True, conf="max_conf", cf=True),
# entropy kde
M("d_bin_kde_lr_ne", __kde_lr(), "bin", d=True, conf="entropy", ),
M("d_mul_kde_lr_ne", __kde_lr(), "mul", d=True, conf="entropy", ),
M("d_m3w_kde_lr_ne", __kde_lr(), "mul", d=True, conf="entropy", cf=True),
# inverse softmax kde d=True,
M("d_bin_kde_lr_is", __kde_lr(), "bin", d=True, conf="isoft", ),
M("d_mul_kde_lr_is", __kde_lr(), "mul", d=True, conf="isoft", ),
M("d_m3w_kde_lr_is", __kde_lr(), "mul", d=True, conf="isoft", cf=True),
# gs kde
G("d_bin_kde_lr_gs", __kde_lr(), "bin", d=True, pg="kde_lr", search="spider" ),
G("d_mul_kde_lr_gs", __kde_lr(), "mul", d=True, pg="kde_lr", search="spider" ),
G("d_m3w_kde_lr_gs", __kde_lr(), "mul", d=True, pg="kde_lr", search="spider", cf=True),
]
__dense_kde_rbf_set = [
# base kde
M("d_bin_kde_rbf", __kde_rbf(), "bin", d=True, ),
M("d_mul_kde_rbf", __kde_rbf(), "mul", d=True, ),
M("d_m3w_kde_rbf", __kde_rbf(), "mul", d=True, cf=True),
# max_conf + entropy kde
M("d_bin_kde_rbf_c", __kde_rbf(), "bin", d=True, conf=["max_conf", "entropy"] ),
M("d_mul_kde_rbf_c", __kde_rbf(), "mul", d=True, conf=["max_conf", "entropy"] ),
M("d_m3w_kde_rbf_c", __kde_rbf(), "mul", d=True, conf=["max_conf", "entropy"], cf=True),
# max_conf kde
M("d_bin_kde_rbf_mc", __kde_rbf(), "bin", d=True, conf="max_conf", ),
M("d_mul_kde_rbf_mc", __kde_rbf(), "mul", d=True, conf="max_conf", ),
M("d_m3w_kde_rbf_mc", __kde_rbf(), "mul", d=True, conf="max_conf", cf=True),
# entropy kde
M("d_bin_kde_rbf_ne", __kde_rbf(), "bin", d=True, conf="entropy", ),
M("d_mul_kde_rbf_ne", __kde_rbf(), "mul", d=True, conf="entropy", ),
M("d_m3w_kde_rbf_ne", __kde_rbf(), "mul", d=True, conf="entropy", cf=True),
# inverse softmax kde
M("d_bin_kde_rbf_is", __kde_rbf(), "bin", d=True, conf="isoft", ),
M("d_mul_kde_rbf_is", __kde_rbf(), "mul", d=True, conf="isoft", ),
M("d_m3w_kde_rbf_is", __kde_rbf(), "mul", d=True, conf="isoft", cf=True),
# gs kde
G("d_bin_kde_rbf_gs", __kde_rbf(), "bin", d=True, pg="kde_rbf", search="spider" ),
G("d_mul_kde_rbf_gs", __kde_rbf(), "mul", d=True, pg="kde_rbf", search="spider" ),
G("d_m3w_kde_rbf_gs", __kde_rbf(), "mul", d=True, pg="kde_rbf", search="spider", cf=True),
]
# fmt: on
__methods_set = (
__sld_lr_set
+ __dense_sld_lr_set
+ __dense_sld_rbf_set
+ __kde_lr_set
+ __dense_kde_lr_set
+ __dense_kde_rbf_set
)
_methods = {m.name: m for m in __methods_set}