module refactored, methods updated

2023-11-22 19:20:37 +01:00 · 2023-11-22 19:20:37 +01:00 · 97bb7c514a
parent f7b566c4a4
commit 97bb7c514a
1 changed files with 123 additions and 357 deletions
--- a/quacc/evaluation/method.py
+++ b/quacc/evaluation/method.py
@ -1,390 +1,156 @@
-import inspect
-from functools import wraps
-
 import numpy as np
-from quapy.method.aggregative import CC, PACC, SLD
+from quapy.method.aggregative import PACC, SLD
 from quapy.protocol import UPP, AbstractProtocol
 from sklearn.linear_model import LogisticRegression
+from sklearn.svm import LinearSVC

 import quacc as qc
 from quacc.evaluation.report import EvaluationReport
-from quacc.method.model_selection import BQAEgsq, GridSearchAE, MCAEgsq
+from quacc.method.model_selection import GridSearchAE

 from ..method.base import BQAE, MCAE, BaseAccuracyEstimator

-_methods = {}
-_sld_param_grid = {
-    "q__classifier__C": np.logspace(-3, 3, 7),
-    "q__classifier__class_weight": [None, "balanced"],
-    "q__recalib": [None, "bcts"],
-    "confidence": [["max_conf"], ["entropy"], ["max_conf", "entropy"]],
+_param_grid = {
+    "sld": {
+        "q__classifier__C": np.logspace(-3, 3, 7),
+        "q__classifier__class_weight": [None, "balanced"],
+        "q__recalib": [None, "bcts"],
+        "confidence": [["isoft"], ["max_conf", "entropy"]],
+    },
+    "pacc": {
+        "q__classifier__C": np.logspace(-3, 3, 7),
+        "q__classifier__class_weight": [None, "balanced"],
+        "confidence": [["isoft"], ["max_conf", "entropy"]],
+    },
 }
-_pacc_param_grid = {
-    "q__classifier__C": np.logspace(-3, 3, 7),
-    "q__classifier__class_weight": [None, "balanced"],
-    "confidence": [["max_conf", "entropy"]],
-}
-
-
-def method(func):
-    @wraps(func)
-    def wrapper(c_model, validation, protocol):
-        return func(c_model, validation, protocol)
-
-    _methods[func.__name__] = wrapper
-
-    return wrapper


 def evaluation_report(
-    estimator: BaseAccuracyEstimator,
-    protocol: AbstractProtocol,
+    estimator: BaseAccuracyEstimator, protocol: AbstractProtocol, method_name=None
 ) -> EvaluationReport:
-    method_name = inspect.stack()[1].function
+    # method_name = inspect.stack()[1].function
    report = EvaluationReport(name=method_name)
    for sample in protocol():
-        e_sample = estimator.extend(sample)
-        estim_prev = estimator.estimate(e_sample.eX)
-        acc_score = qc.error.acc(estim_prev)
-        f1_score = qc.error.f1(estim_prev)
-        report.append_row(
-            sample.prevalence(),
-            acc_score=acc_score,
-            acc=abs(qc.error.acc(e_sample.prevalence()) - acc_score),
-            f1_score=f1_score,
-            f1=abs(qc.error.f1(e_sample.prevalence()) - f1_score),
-        )
+        try:
+            e_sample = estimator.extend(sample)
+            estim_prev = estimator.estimate(e_sample.eX)
+            acc_score = qc.error.acc(estim_prev)
+            f1_score = qc.error.f1(estim_prev)
+            report.append_row(
+                sample.prevalence(),
+                acc_score=acc_score,
+                acc=abs(qc.error.acc(e_sample.prevalence()) - acc_score),
+                f1_score=f1_score,
+                f1=abs(qc.error.f1(e_sample.prevalence()) - f1_score),
+            )
+        except Exception as e:
+            print(f"sample prediction failed for method {method_name}: {e}")
+            report.append_row(
+                sample.prevalence(),
+                acc_score=np.nan,
+                acc=np.nan,
+                f1_score=np.nan,
+                f1=np.nan,
+            )

    return report


-@method
-def bin_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = BQAE(c_model, SLD(LogisticRegression())).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
+class EvaluationMethod:
+    def __init__(self, name, q, est_c, conf=None, cf=False):
+        self.name = name
+        self.__name__ = name
+        self.q = q
+        self.est_c = est_c
+        self.conf = conf
+        self.cf = cf
+
+    def __call__(self, c_model, validation, protocol) -> EvaluationReport:
+        est = self.est_c(
+            c_model,
+            self.q,
+            confidence=self.conf,
+            collapse_false=self.cf,
+        ).fit(validation)
+        return evaluation_report(
+            estimator=est, protocol=protocol, method_name=self.name
+        )


-@method
-def mul_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(c_model, SLD(LogisticRegression())).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
+class EvaluationMethodGridSearch(EvaluationMethod):
+    def __init__(self, name, q, est_c, cf=False, pg="sld"):
+        super().__init__(name, q, est_c, cf=cf)
+        self.pg = pg
+
+    def __call__(self, c_model, validation, protocol) -> EvaluationReport:
+        v_train, v_val = validation.split_stratified(0.6, random_state=0)
+        model = self.est_c(c_model, self.q, collapse_false=self.cf)
+        __grid = _param_grid.get(self.pg, {})
+        est = GridSearchAE(
+            model=model,
+            param_grid=__grid,
+            refit=False,
+            protocol=UPP(v_val, repeats=100),
+            verbose=False,
+        ).fit(v_train)
+        return evaluation_report(
+            estimator=est,
+            protocol=protocol,
+            method_name=self.name,
+        )


-@method
-def mul3w_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(c_model, SLD(LogisticRegression()), collapse_false=True).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
+M = EvaluationMethod
+G = EvaluationMethodGridSearch


-@method
-def binc_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = BQAE(
-        c_model,
-        SLD(LogisticRegression()),
-        confidence=["max_conf", "entropy"],
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
+def __sld_lr():
+    return SLD(LogisticRegression())


-@method
-def mulc_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(
-        c_model,
-        SLD(LogisticRegression()),
-        confidence=["max_conf", "entropy"],
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
+def __sld_lsvc():
+    return SLD(LinearSVC())


-@method
-def mul3wc_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(
-        c_model,
-        SLD(LogisticRegression()),
-        confidence=["max_conf", "entropy"],
-        collapse_false=True,
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
+def __pacc_lr():
+    return PACC(LogisticRegression())


-@method
-def binmc_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = BQAE(
-        c_model,
-        SLD(LogisticRegression()),
-        confidence="max_conf",
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
+# fmt: off
+__methods_set = [
+    # base sld
+    M("bin_sld",     __sld_lr(),  BQAE                                       ),
+    M("mul_sld",     __sld_lr(),  MCAE                                       ),
+    M("m3w_sld",     __sld_lr(),  MCAE,                               cf=True),
+    # max_conf + entropy sld
+    M("binc_sld",    __sld_lr(),  BQAE, conf=["max_conf", "entropy"]         ),
+    M("mulc_sld",    __sld_lr(),  MCAE, conf=["max_conf", "entropy"]         ),
+    M("m3wc_sld",    __sld_lr(),  MCAE, conf=["max_conf", "entropy"], cf=True),
+    # max_conf sld
+    M("binmc_sld",   __sld_lr(),  BQAE, conf="max_conf",                     ),
+    M("mulmc_sld",   __sld_lr(),  MCAE, conf="max_conf",                     ),
+    M("m3wmc_sld",   __sld_lr(),  MCAE, conf="max_conf",              cf=True),
+    # entropy sld
+    M("binne_sld",   __sld_lr(),  BQAE, conf="entropy",                      ),
+    M("mulne_sld",   __sld_lr(),  MCAE, conf="entropy",                      ),
+    M("m3wne_sld",   __sld_lr(),  MCAE, conf="entropy",               cf=True),
+    # inverse softmax sld
+    M("binis_sld",   __sld_lr(),  BQAE, conf="isoft",                        ),
+    M("mulis_sld",   __sld_lr(),  MCAE, conf="isoft",                        ),
+    M("m3wis_sld",   __sld_lr(),  MCAE, conf="isoft",                 cf=True),
+    # inverse softmax sld
+    M("binis_pacc",  __pacc_lr(), BQAE, conf="isoft",                        ),
+    M("mulis_pacc",  __pacc_lr(), MCAE, conf="isoft",                        ),
+    M("m3wis_pacc",  __pacc_lr(), MCAE, conf="isoft",                 cf=True),
+    # gs sld
+    G("bin_sld_gs",  __sld_lr(),  BQAE, pg="sld"                             ),
+    G("mul_sld_gs",  __sld_lr(),  MCAE, pg="sld"                             ),
+    G("m3w_sld_gs",  __sld_lr(),  MCAE, pg="sld",                     cf=True),
+    # gs pacc
+    G("bin_pacc_gs", __pacc_lr(), BQAE, pg="pacc"                            ),
+    G("mul_pacc_gs", __pacc_lr(), MCAE, pg="pacc"                            ),
+    G("m3w_pacc_gs", __pacc_lr(), MCAE, pg="pacc",                    cf=True),
+]
+# fmt: on

-
-@method
-def mulmc_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(
-        c_model,
-        SLD(LogisticRegression()),
-        confidence="max_conf",
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mul3wmc_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(
-        c_model,
-        SLD(LogisticRegression()),
-        confidence="max_conf",
-        collapse_false=True,
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def binne_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = BQAE(
-        c_model,
-        SLD(LogisticRegression()),
-        confidence="entropy",
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mulne_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(
-        c_model,
-        SLD(LogisticRegression()),
-        confidence="entropy",
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mul3wne_sld(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(
-        c_model,
-        SLD(LogisticRegression()),
-        confidence="entropy",
-        collapse_false=True,
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def bin_sld_gs(c_model, validation, protocol) -> EvaluationReport:
-    v_train, v_val = validation.split_stratified(0.6, random_state=0)
-    model = BQAE(c_model, SLD(LogisticRegression()))
-    est = GridSearchAE(
-        model=model,
-        param_grid=_sld_param_grid,
-        refit=False,
-        protocol=UPP(v_val, repeats=100),
-        verbose=True,
-    ).fit(v_train)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mul_sld_gs(c_model, validation, protocol) -> EvaluationReport:
-    v_train, v_val = validation.split_stratified(0.6, random_state=0)
-    model = MCAE(c_model, SLD(LogisticRegression()))
-    est = GridSearchAE(
-        model=model,
-        param_grid=_sld_param_grid,
-        refit=False,
-        protocol=UPP(v_val, repeats=100),
-        verbose=True,
-    ).fit(v_train)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mul3w_sld_gs(c_model, validation, protocol) -> EvaluationReport:
-    v_train, v_val = validation.split_stratified(0.6, random_state=0)
-    model = MCAE(c_model, SLD(LogisticRegression()), collapse_false=True)
-    est = GridSearchAE(
-        model=model,
-        param_grid=_sld_param_grid,
-        refit=False,
-        protocol=UPP(v_val, repeats=100),
-        verbose=True,
-    ).fit(v_train)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def bin_sld_gsq(c_model, validation, protocol) -> EvaluationReport:
-    est = BQAEgsq(
-        c_model,
-        SLD(LogisticRegression()),
-        param_grid={
-            "classifier__C": np.logspace(-3, 3, 7),
-            "classifier__class_weight": [None, "balanced"],
-            "recalib": [None, "bcts", "vs"],
-        },
-        refit=False,
-        verbose=False,
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mul_sld_gsq(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAEgsq(
-        c_model,
-        SLD(LogisticRegression()),
-        param_grid={
-            "classifier__C": np.logspace(-3, 3, 7),
-            "classifier__class_weight": [None, "balanced"],
-            "recalib": [None, "bcts", "vs"],
-        },
-        refit=False,
-        verbose=False,
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def bin_pacc(c_model, validation, protocol) -> EvaluationReport:
-    est = BQAE(c_model, PACC(LogisticRegression())).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mul_pacc(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(c_model, PACC(LogisticRegression())).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def binc_pacc(c_model, validation, protocol) -> EvaluationReport:
-    est = BQAE(
-        c_model,
-        PACC(LogisticRegression()),
-        confidence=["max_conf", "entropy"],
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mulc_pacc(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(
-        c_model,
-        PACC(LogisticRegression()),
-        confidence=["max_conf", "entropy"],
-    ).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def bin_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
-    v_train, v_val = validation.split_stratified(0.6, random_state=0)
-    model = BQAE(c_model, PACC(LogisticRegression()))
-    est = GridSearchAE(
-        model=model,
-        param_grid=_pacc_param_grid,
-        refit=False,
-        protocol=UPP(v_val, repeats=100),
-        verbose=False,
-    ).fit(v_train)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mul_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
-    v_train, v_val = validation.split_stratified(0.6, random_state=0)
-    model = MCAE(c_model, PACC(LogisticRegression()))
-    est = GridSearchAE(
-        model=model,
-        param_grid=_pacc_param_grid,
-        refit=False,
-        protocol=UPP(v_val, repeats=100),
-        verbose=False,
-    ).fit(v_train)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def bin_cc(c_model, validation, protocol) -> EvaluationReport:
-    est = BQAE(c_model, CC(LogisticRegression())).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
-
-
-@method
-def mul_cc(c_model, validation, protocol) -> EvaluationReport:
-    est = MCAE(c_model, CC(LogisticRegression())).fit(validation)
-    return evaluation_report(
-        estimator=est,
-        protocol=protocol,
-    )
+_methods = {m.name: m for m in __methods_set}