module refactored, methods updated

This commit is contained in:
Lorenzo Volpi 2023-11-22 19:20:37 +01:00
parent f7b566c4a4
commit 97bb7c514a
1 changed files with 123 additions and 357 deletions

View File

@ -1,48 +1,37 @@
import inspect
from functools import wraps
import numpy as np import numpy as np
from quapy.method.aggregative import CC, PACC, SLD from quapy.method.aggregative import PACC, SLD
from quapy.protocol import UPP, AbstractProtocol from quapy.protocol import UPP, AbstractProtocol
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
import quacc as qc import quacc as qc
from quacc.evaluation.report import EvaluationReport from quacc.evaluation.report import EvaluationReport
from quacc.method.model_selection import BQAEgsq, GridSearchAE, MCAEgsq from quacc.method.model_selection import GridSearchAE
from ..method.base import BQAE, MCAE, BaseAccuracyEstimator from ..method.base import BQAE, MCAE, BaseAccuracyEstimator
_methods = {} _param_grid = {
_sld_param_grid = { "sld": {
"q__classifier__C": np.logspace(-3, 3, 7), "q__classifier__C": np.logspace(-3, 3, 7),
"q__classifier__class_weight": [None, "balanced"], "q__classifier__class_weight": [None, "balanced"],
"q__recalib": [None, "bcts"], "q__recalib": [None, "bcts"],
"confidence": [["max_conf"], ["entropy"], ["max_conf", "entropy"]], "confidence": [["isoft"], ["max_conf", "entropy"]],
} },
_pacc_param_grid = { "pacc": {
"q__classifier__C": np.logspace(-3, 3, 7), "q__classifier__C": np.logspace(-3, 3, 7),
"q__classifier__class_weight": [None, "balanced"], "q__classifier__class_weight": [None, "balanced"],
"confidence": [["max_conf", "entropy"]], "confidence": [["isoft"], ["max_conf", "entropy"]],
},
} }
def method(func):
@wraps(func)
def wrapper(c_model, validation, protocol):
return func(c_model, validation, protocol)
_methods[func.__name__] = wrapper
return wrapper
def evaluation_report( def evaluation_report(
estimator: BaseAccuracyEstimator, estimator: BaseAccuracyEstimator, protocol: AbstractProtocol, method_name=None
protocol: AbstractProtocol,
) -> EvaluationReport: ) -> EvaluationReport:
method_name = inspect.stack()[1].function # method_name = inspect.stack()[1].function
report = EvaluationReport(name=method_name) report = EvaluationReport(name=method_name)
for sample in protocol(): for sample in protocol():
try:
e_sample = estimator.extend(sample) e_sample = estimator.extend(sample)
estim_prev = estimator.estimate(e_sample.eX) estim_prev = estimator.estimate(e_sample.eX)
acc_score = qc.error.acc(estim_prev) acc_score = qc.error.acc(estim_prev)
@ -54,297 +43,52 @@ def evaluation_report(
f1_score=f1_score, f1_score=f1_score,
f1=abs(qc.error.f1(e_sample.prevalence()) - f1_score), f1=abs(qc.error.f1(e_sample.prevalence()) - f1_score),
) )
except Exception as e:
print(f"sample prediction failed for method {method_name}: {e}")
report.append_row(
sample.prevalence(),
acc_score=np.nan,
acc=np.nan,
f1_score=np.nan,
f1=np.nan,
)
return report return report
@method class EvaluationMethod:
def bin_sld(c_model, validation, protocol) -> EvaluationReport: def __init__(self, name, q, est_c, conf=None, cf=False):
est = BQAE(c_model, SLD(LogisticRegression())).fit(validation) self.name = name
return evaluation_report( self.__name__ = name
estimator=est, self.q = q
protocol=protocol, self.est_c = est_c
) self.conf = conf
self.cf = cf
def __call__(self, c_model, validation, protocol) -> EvaluationReport:
@method est = self.est_c(
def mul_sld(c_model, validation, protocol) -> EvaluationReport:
est = MCAE(c_model, SLD(LogisticRegression())).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def mul3w_sld(c_model, validation, protocol) -> EvaluationReport:
est = MCAE(c_model, SLD(LogisticRegression()), collapse_false=True).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def binc_sld(c_model, validation, protocol) -> EvaluationReport:
est = BQAE(
c_model, c_model,
SLD(LogisticRegression()), self.q,
confidence=["max_conf", "entropy"], confidence=self.conf,
collapse_false=self.cf,
).fit(validation) ).fit(validation)
return evaluation_report( return evaluation_report(
estimator=est, estimator=est, protocol=protocol, method_name=self.name
protocol=protocol,
) )
@method class EvaluationMethodGridSearch(EvaluationMethod):
def mulc_sld(c_model, validation, protocol) -> EvaluationReport: def __init__(self, name, q, est_c, cf=False, pg="sld"):
est = MCAE( super().__init__(name, q, est_c, cf=cf)
c_model, self.pg = pg
SLD(LogisticRegression()),
confidence=["max_conf", "entropy"],
).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
def __call__(self, c_model, validation, protocol) -> EvaluationReport:
@method
def mul3wc_sld(c_model, validation, protocol) -> EvaluationReport:
est = MCAE(
c_model,
SLD(LogisticRegression()),
confidence=["max_conf", "entropy"],
collapse_false=True,
).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def binmc_sld(c_model, validation, protocol) -> EvaluationReport:
est = BQAE(
c_model,
SLD(LogisticRegression()),
confidence="max_conf",
).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def mulmc_sld(c_model, validation, protocol) -> EvaluationReport:
est = MCAE(
c_model,
SLD(LogisticRegression()),
confidence="max_conf",
).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def mul3wmc_sld(c_model, validation, protocol) -> EvaluationReport:
est = MCAE(
c_model,
SLD(LogisticRegression()),
confidence="max_conf",
collapse_false=True,
).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def binne_sld(c_model, validation, protocol) -> EvaluationReport:
est = BQAE(
c_model,
SLD(LogisticRegression()),
confidence="entropy",
).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def mulne_sld(c_model, validation, protocol) -> EvaluationReport:
est = MCAE(
c_model,
SLD(LogisticRegression()),
confidence="entropy",
).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def mul3wne_sld(c_model, validation, protocol) -> EvaluationReport:
est = MCAE(
c_model,
SLD(LogisticRegression()),
confidence="entropy",
collapse_false=True,
).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def bin_sld_gs(c_model, validation, protocol) -> EvaluationReport:
v_train, v_val = validation.split_stratified(0.6, random_state=0) v_train, v_val = validation.split_stratified(0.6, random_state=0)
model = BQAE(c_model, SLD(LogisticRegression())) model = self.est_c(c_model, self.q, collapse_false=self.cf)
__grid = _param_grid.get(self.pg, {})
est = GridSearchAE( est = GridSearchAE(
model=model, model=model,
param_grid=_sld_param_grid, param_grid=__grid,
refit=False,
protocol=UPP(v_val, repeats=100),
verbose=True,
).fit(v_train)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def mul_sld_gs(c_model, validation, protocol) -> EvaluationReport:
v_train, v_val = validation.split_stratified(0.6, random_state=0)
model = MCAE(c_model, SLD(LogisticRegression()))
est = GridSearchAE(
model=model,
param_grid=_sld_param_grid,
refit=False,
protocol=UPP(v_val, repeats=100),
verbose=True,
).fit(v_train)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def mul3w_sld_gs(c_model, validation, protocol) -> EvaluationReport:
v_train, v_val = validation.split_stratified(0.6, random_state=0)
model = MCAE(c_model, SLD(LogisticRegression()), collapse_false=True)
est = GridSearchAE(
model=model,
param_grid=_sld_param_grid,
refit=False,
protocol=UPP(v_val, repeats=100),
verbose=True,
).fit(v_train)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def bin_sld_gsq(c_model, validation, protocol) -> EvaluationReport:
est = BQAEgsq(
c_model,
SLD(LogisticRegression()),
param_grid={
"classifier__C": np.logspace(-3, 3, 7),
"classifier__class_weight": [None, "balanced"],
"recalib": [None, "bcts", "vs"],
},
refit=False,
verbose=False,
).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def mul_sld_gsq(c_model, validation, protocol) -> EvaluationReport:
est = MCAEgsq(
c_model,
SLD(LogisticRegression()),
param_grid={
"classifier__C": np.logspace(-3, 3, 7),
"classifier__class_weight": [None, "balanced"],
"recalib": [None, "bcts", "vs"],
},
refit=False,
verbose=False,
).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def bin_pacc(c_model, validation, protocol) -> EvaluationReport:
est = BQAE(c_model, PACC(LogisticRegression())).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def mul_pacc(c_model, validation, protocol) -> EvaluationReport:
est = MCAE(c_model, PACC(LogisticRegression())).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def binc_pacc(c_model, validation, protocol) -> EvaluationReport:
est = BQAE(
c_model,
PACC(LogisticRegression()),
confidence=["max_conf", "entropy"],
).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def mulc_pacc(c_model, validation, protocol) -> EvaluationReport:
est = MCAE(
c_model,
PACC(LogisticRegression()),
confidence=["max_conf", "entropy"],
).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method
def bin_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
v_train, v_val = validation.split_stratified(0.6, random_state=0)
model = BQAE(c_model, PACC(LogisticRegression()))
est = GridSearchAE(
model=model,
param_grid=_pacc_param_grid,
refit=False, refit=False,
protocol=UPP(v_val, repeats=100), protocol=UPP(v_val, repeats=100),
verbose=False, verbose=False,
@ -352,39 +96,61 @@ def bin_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
return evaluation_report( return evaluation_report(
estimator=est, estimator=est,
protocol=protocol, protocol=protocol,
method_name=self.name,
) )
@method M = EvaluationMethod
def mul_pacc_gs(c_model, validation, protocol) -> EvaluationReport: G = EvaluationMethodGridSearch
v_train, v_val = validation.split_stratified(0.6, random_state=0)
model = MCAE(c_model, PACC(LogisticRegression()))
est = GridSearchAE(
model=model,
param_grid=_pacc_param_grid,
refit=False,
protocol=UPP(v_val, repeats=100),
verbose=False,
).fit(v_train)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method def __sld_lr():
def bin_cc(c_model, validation, protocol) -> EvaluationReport: return SLD(LogisticRegression())
est = BQAE(c_model, CC(LogisticRegression())).fit(validation)
return evaluation_report(
estimator=est,
protocol=protocol,
)
@method def __sld_lsvc():
def mul_cc(c_model, validation, protocol) -> EvaluationReport: return SLD(LinearSVC())
est = MCAE(c_model, CC(LogisticRegression())).fit(validation)
return evaluation_report(
estimator=est, def __pacc_lr():
protocol=protocol, return PACC(LogisticRegression())
)
# fmt: off
__methods_set = [
# base sld
M("bin_sld", __sld_lr(), BQAE ),
M("mul_sld", __sld_lr(), MCAE ),
M("m3w_sld", __sld_lr(), MCAE, cf=True),
# max_conf + entropy sld
M("binc_sld", __sld_lr(), BQAE, conf=["max_conf", "entropy"] ),
M("mulc_sld", __sld_lr(), MCAE, conf=["max_conf", "entropy"] ),
M("m3wc_sld", __sld_lr(), MCAE, conf=["max_conf", "entropy"], cf=True),
# max_conf sld
M("binmc_sld", __sld_lr(), BQAE, conf="max_conf", ),
M("mulmc_sld", __sld_lr(), MCAE, conf="max_conf", ),
M("m3wmc_sld", __sld_lr(), MCAE, conf="max_conf", cf=True),
# entropy sld
M("binne_sld", __sld_lr(), BQAE, conf="entropy", ),
M("mulne_sld", __sld_lr(), MCAE, conf="entropy", ),
M("m3wne_sld", __sld_lr(), MCAE, conf="entropy", cf=True),
# inverse softmax sld
M("binis_sld", __sld_lr(), BQAE, conf="isoft", ),
M("mulis_sld", __sld_lr(), MCAE, conf="isoft", ),
M("m3wis_sld", __sld_lr(), MCAE, conf="isoft", cf=True),
# inverse softmax sld
M("binis_pacc", __pacc_lr(), BQAE, conf="isoft", ),
M("mulis_pacc", __pacc_lr(), MCAE, conf="isoft", ),
M("m3wis_pacc", __pacc_lr(), MCAE, conf="isoft", cf=True),
# gs sld
G("bin_sld_gs", __sld_lr(), BQAE, pg="sld" ),
G("mul_sld_gs", __sld_lr(), MCAE, pg="sld" ),
G("m3w_sld_gs", __sld_lr(), MCAE, pg="sld", cf=True),
# gs pacc
G("bin_pacc_gs", __pacc_lr(), BQAE, pg="pacc" ),
G("mul_pacc_gs", __pacc_lr(), MCAE, pg="pacc" ),
G("m3w_pacc_gs", __pacc_lr(), MCAE, pg="pacc", cf=True),
]
# fmt: on
_methods = {m.name: m for m in __methods_set}