211 lines
7.8 KiB
Python
211 lines
7.8 KiB
Python
from dataclasses import dataclass
|
|
from typing import List
|
|
|
|
import numpy as np
|
|
from quapy.method.aggregative import PACC, SLD, BaseQuantifier
|
|
from quapy.protocol import UPP, AbstractProtocol
|
|
from sklearn.linear_model import LogisticRegression
|
|
from sklearn.svm import LinearSVC
|
|
|
|
import quacc as qc
|
|
from quacc.environment import env
|
|
from quacc.evaluation.report import EvaluationReport
|
|
from quacc.method.base import BQAE, MCAE, BaseAccuracyEstimator
|
|
from quacc.method.model_selection import (
|
|
GridSearchAE,
|
|
HalvingSearchAE,
|
|
RandomizedSearchAE,
|
|
SpiderSearchAE,
|
|
)
|
|
from quacc.quantification import KDEy
|
|
|
|
_param_grid = {
|
|
"sld": {
|
|
"q__classifier__C": np.logspace(-3, 3, 7),
|
|
"q__classifier__class_weight": [None, "balanced"],
|
|
"q__recalib": [None, "bcts"],
|
|
# "q__recalib": [None],
|
|
"confidence": [None, ["isoft"], ["max_conf", "entropy"]],
|
|
},
|
|
"pacc": {
|
|
"q__classifier__C": np.logspace(-3, 3, 7),
|
|
"q__classifier__class_weight": [None, "balanced"],
|
|
"confidence": [None, ["isoft"], ["max_conf", "entropy"]],
|
|
},
|
|
"kde": {
|
|
"q__classifier__C": np.logspace(-3, 3, 7),
|
|
"q__classifier__class_weight": [None, "balanced"],
|
|
# "q__classifier__class_weight": [None],
|
|
"q__bandwidth": np.linspace(0.01, 0.2, 20),
|
|
"confidence": [None, ["isoft"]],
|
|
# "confidence": [None],
|
|
},
|
|
}
|
|
|
|
|
|
def evaluation_report(
|
|
estimator: BaseAccuracyEstimator, protocol: AbstractProtocol, method_name=None
|
|
) -> EvaluationReport:
|
|
# method_name = inspect.stack()[1].function
|
|
report = EvaluationReport(name=method_name)
|
|
for sample in protocol():
|
|
try:
|
|
e_sample = estimator.extend(sample)
|
|
estim_prev = estimator.estimate(e_sample.eX)
|
|
acc_score = qc.error.acc(estim_prev)
|
|
f1_score = qc.error.f1(estim_prev)
|
|
report.append_row(
|
|
sample.prevalence(),
|
|
acc_score=acc_score,
|
|
acc=abs(qc.error.acc(e_sample.prevalence()) - acc_score),
|
|
f1_score=f1_score,
|
|
f1=abs(qc.error.f1(e_sample.prevalence()) - f1_score),
|
|
)
|
|
except Exception as e:
|
|
print(f"sample prediction failed for method {method_name}: {e}")
|
|
report.append_row(
|
|
sample.prevalence(),
|
|
acc_score=np.nan,
|
|
acc=np.nan,
|
|
f1_score=np.nan,
|
|
f1=np.nan,
|
|
)
|
|
|
|
return report
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class EvaluationMethod:
|
|
name: str
|
|
q: BaseQuantifier
|
|
est_n: str
|
|
conf: List[str] | str = None
|
|
cf: bool = False
|
|
|
|
def get_est(self, c_model):
|
|
match self.est_n:
|
|
case "mul":
|
|
return MCAE(
|
|
c_model,
|
|
self.q,
|
|
confidence=self.conf,
|
|
collapse_false=self.cf,
|
|
)
|
|
case "bin":
|
|
return BQAE(c_model, self.q, confidence=self.conf)
|
|
|
|
def __call__(self, c_model, validation, protocol) -> EvaluationReport:
|
|
est = self.get_est(c_model).fit(validation)
|
|
return evaluation_report(
|
|
estimator=est, protocol=protocol, method_name=self.name
|
|
)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class EvaluationMethodGridSearch(EvaluationMethod):
|
|
pg: str = "sld"
|
|
search: str = "grid"
|
|
|
|
def get_search(self):
|
|
match self.search:
|
|
case "grid":
|
|
return GridSearchAE
|
|
case "spider":
|
|
return SpiderSearchAE
|
|
case _:
|
|
return GridSearchAE
|
|
|
|
def __call__(self, c_model, validation, protocol) -> EvaluationReport:
|
|
v_train, v_val = validation.split_stratified(0.6, random_state=env._R_SEED)
|
|
__grid = _param_grid.get(self.pg, {})
|
|
_search_class = self.get_search()
|
|
est = _search_class(
|
|
model=self.get_est(c_model),
|
|
param_grid=__grid,
|
|
refit=False,
|
|
protocol=UPP(v_val, repeats=100),
|
|
verbose=False,
|
|
).fit(v_train)
|
|
return evaluation_report(
|
|
estimator=est,
|
|
protocol=protocol,
|
|
method_name=self.name,
|
|
)
|
|
|
|
|
|
M = EvaluationMethod
|
|
G = EvaluationMethodGridSearch
|
|
|
|
|
|
def __sld_lr():
|
|
return SLD(LogisticRegression())
|
|
|
|
|
|
def __kde_lr():
|
|
return KDEy(LogisticRegression(), random_state=env._R_SEED)
|
|
|
|
|
|
def __sld_lsvc():
|
|
return SLD(LinearSVC())
|
|
|
|
|
|
def __pacc_lr():
|
|
return PACC(LogisticRegression())
|
|
|
|
|
|
# fmt: off
|
|
__methods_set = [
|
|
# base sld
|
|
M("bin_sld", __sld_lr(), "bin" ),
|
|
M("mul_sld", __sld_lr(), "mul" ),
|
|
M("m3w_sld", __sld_lr(), "mul", cf=True),
|
|
# max_conf + entropy sld
|
|
M("binc_sld", __sld_lr(), "bin", conf=["max_conf", "entropy"] ),
|
|
M("mulc_sld", __sld_lr(), "mul", conf=["max_conf", "entropy"] ),
|
|
M("m3wc_sld", __sld_lr(), "mul", conf=["max_conf", "entropy"], cf=True),
|
|
# max_conf sld
|
|
M("binmc_sld", __sld_lr(), "bin", conf="max_conf", ),
|
|
M("mulmc_sld", __sld_lr(), "mul", conf="max_conf", ),
|
|
M("m3wmc_sld", __sld_lr(), "mul", conf="max_conf", cf=True),
|
|
# entropy sld
|
|
M("binne_sld", __sld_lr(), "bin", conf="entropy", ),
|
|
M("mulne_sld", __sld_lr(), "mul", conf="entropy", ),
|
|
M("m3wne_sld", __sld_lr(), "mul", conf="entropy", cf=True),
|
|
# inverse softmax sld
|
|
M("binis_sld", __sld_lr(), "bin", conf="isoft", ),
|
|
M("mulis_sld", __sld_lr(), "mul", conf="isoft", ),
|
|
M("m3wis_sld", __sld_lr(), "mul", conf="isoft", cf=True),
|
|
# gs sld
|
|
G("bin_sld_gs", __sld_lr(), "bin", pg="sld" ),
|
|
G("mul_sld_gs", __sld_lr(), "mul", pg="sld" ),
|
|
G("m3w_sld_gs", __sld_lr(), "mul", pg="sld", cf=True),
|
|
|
|
# base kde
|
|
M("bin_kde", __kde_lr(), "bin" ),
|
|
M("mul_kde", __kde_lr(), "mul" ),
|
|
M("m3w_kde", __kde_lr(), "mul", cf=True),
|
|
# max_conf + entropy kde
|
|
M("binc_kde", __kde_lr(), "bin", conf=["max_conf", "entropy"] ),
|
|
M("mulc_kde", __kde_lr(), "mul", conf=["max_conf", "entropy"] ),
|
|
M("m3wc_kde", __kde_lr(), "mul", conf=["max_conf", "entropy"], cf=True),
|
|
# max_conf kde
|
|
M("binmc_kde", __kde_lr(), "bin", conf="max_conf", ),
|
|
M("mulmc_kde", __kde_lr(), "mul", conf="max_conf", ),
|
|
M("m3wmc_kde", __kde_lr(), "mul", conf="max_conf", cf=True),
|
|
# entropy kde
|
|
M("binne_kde", __kde_lr(), "bin", conf="entropy", ),
|
|
M("mulne_kde", __kde_lr(), "mul", conf="entropy", ),
|
|
M("m3wne_kde", __kde_lr(), "mul", conf="entropy", cf=True),
|
|
# inverse softmax kde
|
|
M("binis_kde", __kde_lr(), "bin", conf="isoft", ),
|
|
M("mulis_kde", __kde_lr(), "mul", conf="isoft", ),
|
|
M("m3wis_kde", __kde_lr(), "mul", conf="isoft", cf=True),
|
|
# gs kde
|
|
G("bin_kde_gs", __kde_lr(), "bin", pg="kde", search="spider" ),
|
|
G("mul_kde_gs", __kde_lr(), "mul", pg="kde", search="spider" ),
|
|
G("m3w_kde_gs", __kde_lr(), "mul", pg="kde", search="spider", cf=True),
|
|
]
|
|
# fmt: on
|
|
|
|
_methods = {m.name: m for m in __methods_set}
|