stats updated, naive fixes

2024-02-01 12:24:16 +01:00 · 2024-02-01 12:24:16 +01:00 · 6afe3ddb31
parent 531d22573b
commit 6afe3ddb31
6 changed files with 106 additions and 7 deletions
--- a/conf.yaml
+++ b/conf.yaml
@ -338,6 +338,43 @@ d_kde_rbf_conf: &d_kde_rbf_conf
    - DATASET_NAME: rcv1
      DATASET_TARGET: CCAT

+cc_lr_conf: &cc_lr_conf
+  global:
+    METRICS: 
+      - acc
+      - f1
+    OUT_DIR_NAME: output/cc_lr
+    DATASET_N_PREVS: 9
+    COMP_ESTIMATORS:
+      # - bin_cc_lr
+      # - mul_cc_lr
+      # - m3w_cc_lr
+      # - bin_cc_lr_c
+      # - mul_cc_lr_c
+      # - m3w_cc_lr_c
+      # - bin_cc_lr_mc
+      # - mul_cc_lr_mc
+      # - m3w_cc_lr_mc
+      # - bin_cc_lr_ne
+      # - mul_cc_lr_ne
+      # - m3w_cc_lr_ne
+      # - bin_cc_lr_is
+      # - mul_cc_lr_is
+      # - m3w_cc_lr_is
+      # - bin_cc_lr_a
+      # - mul_cc_lr_a
+      # - m3w_cc_lr_a
+      - bin_cc_lr_gs
+      - mul_cc_lr_gs
+      - m3w_cc_lr_gs
+    N_JOBS: -2
+
+  confs: *main_confs
+  other_confs:
+    - DATASET_NAME: imdb
+    - DATASET_NAME: rcv1
+      DATASET_TARGET: CCAT
+
 baselines_conf: &baselines_conf
  global:
    METRICS: 
--- a/copy_res.sh
+++ b/copy_res.sh
@ -0,0 +1,9 @@
+#!/bin/bash
+
+# scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/kde_lr_gs ./output/
+# scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/baselines ./output/
+scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/cc_lr ./output/
+
+# scp -r ./output/kde_lr_gs volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
+# scp -r ./output/baselines volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
+scp -r ./output/cc_lr volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
--- a/quacc/evaluation/baseline.py
+++ b/quacc/evaluation/baseline.py
@ -85,14 +85,14 @@ def naive(
    report = EvaluationReport(name="naive")
    for test in protocol():
        test_preds = c_model_predict(test.X)
-        acc_score = metrics.accuracy_score(test.y, test_preds)
-        f1_score = metrics.f1_score(test.y, test_preds, average=f1_average)
-        meta_acc = abs(val_acc - acc_score)
-        meta_f1 = abs(val_f1 - f1_score)
+        test_acc = metrics.accuracy_score(test.y, test_preds)
+        test_f1 = metrics.f1_score(test.y, test_preds, average=f1_average)
+        meta_acc = abs(val_acc - test_acc)
+        meta_f1 = abs(val_f1 - test_f1)
        report.append_row(
            test.prevalence(),
-            acc_score=acc_score,
-            f1_score=f1_score,
+            acc_score=val_acc,
+            f1_score=val_f1,
            acc=meta_acc,
            f1=meta_f1,
        )
--- a/quacc/evaluation/method.py
+++ b/quacc/evaluation/method.py
@ -3,7 +3,7 @@ from typing import Callable, List, Union

 import numpy as np
 from matplotlib.pylab import rand
-from quapy.method.aggregative import PACC, SLD, BaseQuantifier
+from quapy.method.aggregative import CC, PACC, SLD, BaseQuantifier
 from quapy.protocol import UPP, AbstractProtocol, OnLabelledCollectionProtocol
 from sklearn.linear_model import LogisticRegression
 from sklearn.svm import SVC, LinearSVC
@ -53,6 +53,17 @@ def _param_grid(method, X_fit: np.ndarray):
                "q__classifier__class_weight": [None, "balanced"],
                "confidence": [None, ["isoft"], ["max_conf", "entropy"]],
            }
+        case "cc_lr":
+            return {
+                "q__classifier__C": np.logspace(-3, 3, 7),
+                "q__classifier__class_weight": [None, "balanced"],
+                "confidence": [
+                    None,
+                    ["isoft"],
+                    ["max_conf", "entropy"],
+                    ["max_conf", "entropy", "isoft"],
+                ],
+            }
        case "kde_lr":
            return {
                "q__classifier__C": np.logspace(-3, 3, 7),
@ -219,6 +230,10 @@ def __pacc_lr():
    return PACC(LogisticRegression())


+def __cc_lr():
+    return CC(LogisticRegression())
+
+
 # fmt: off

 __sld_lr_set = [
@ -448,6 +463,37 @@ __dense_kde_rbf_set = [
    G("d_m3w_kde_rbf_gs", __kde_rbf(), "mul", d=True, pg="kde_rbf", search="spider", cf=True),
 ]

+__cc_lr_set = [
+    # base cc
+    M("bin_cc_lr",    __cc_lr(), "bin"                                       ),
+    M("mul_cc_lr",    __cc_lr(), "mul"                                       ),
+    M("m3w_cc_lr",    __cc_lr(), "mul",                               cf=True),
+    # max_conf + entropy cc
+    M("bin_cc_lr_c",  __cc_lr(), "bin", conf=["max_conf", "entropy"]         ),
+    M("mul_cc_lr_c",  __cc_lr(), "mul", conf=["max_conf", "entropy"]         ),
+    M("m3w_cc_lr_c",  __cc_lr(), "mul", conf=["max_conf", "entropy"], cf=True),
+    # max_conf cc
+    M("bin_cc_lr_mc", __cc_lr(), "bin", conf="max_conf",                     ),
+    M("mul_cc_lr_mc", __cc_lr(), "mul", conf="max_conf",                     ),
+    M("m3w_cc_lr_mc", __cc_lr(), "mul", conf="max_conf",              cf=True),
+    # entropy cc
+    M("bin_cc_lr_ne", __cc_lr(), "bin", conf="entropy",                      ),
+    M("mul_cc_lr_ne", __cc_lr(), "mul", conf="entropy",                      ),
+    M("m3w_cc_lr_ne", __cc_lr(), "mul", conf="entropy",               cf=True),
+    # inverse softmax cc
+    M("bin_cc_lr_is", __cc_lr(), "bin", conf="isoft",                        ),
+    M("mul_cc_lr_is", __cc_lr(), "mul", conf="isoft",                        ),
+    M("m3w_cc_lr_is", __cc_lr(), "mul", conf="isoft",                 cf=True),
+    # cc all
+    M("bin_cc_lr_a",  __cc_lr(), "bin", conf=["max_conf", "entropy", "isoft"],         ),
+    M("mul_cc_lr_a",  __cc_lr(), "mul", conf=["max_conf", "entropy", "isoft"],         ),
+    M("m3w_cc_lr_a",  __cc_lr(), "mul", conf=["max_conf", "entropy", "isoft"],  cf=True),
+    # gs cc
+    G("bin_cc_lr_gs", __cc_lr(), "bin", pg="cc_lr", search="grid"         ),
+    G("mul_cc_lr_gs", __cc_lr(), "mul", pg="cc_lr", search="grid"         ),
+    G("m3w_cc_lr_gs", __cc_lr(), "mul", pg="cc_lr", search="grid", cf=True),
+    E("cc_lr_gs"),
+]

 # fmt: on

@ -458,6 +504,7 @@ __methods_set = (
    + __kde_lr_set
    + __dense_kde_lr_set
    + __dense_kde_rbf_set
+    + __cc_lr_set
    + [E("QuAcc")]
 )

--- a/quacc/evaluation/report.py
+++ b/quacc/evaluation/report.py
@ -140,6 +140,11 @@ class CompReport:
                "mul_kde_lr_gs",
                "m3w_kde_lr_gs",
            ],
+            "cc_lr_gs": [
+                "bin_cc_lr_gs",
+                "mul_cc_lr_gs",
+                "m3w_cc_lr_gs",
+            ],
            "QuAcc": [
                "bin_sld_lr_gs",
                "mul_sld_lr_gs",
--- a/quacc/evaluation/stats.py
+++ b/quacc/evaluation/stats.py
@ -25,6 +25,7 @@ def wilcoxon(
 ) -> pd.DataFrame:
    _data = r.data(metric, estimators)

+    _data = _data.dropna(axis=0, how="any")
    _wilcoxon = {}
    for est in _data.columns.unique(0):
        _wilcoxon[est] = [