From 6afe3ddb314976c1a836a881c846d4f3e56a20de Mon Sep 17 00:00:00 2001
From: Lorenzo Volpi <lorenzo.volpi@outlook.com>
Date: Thu, 1 Feb 2024 12:24:16 +0100
Subject: [PATCH] stats updated, naive fixes

---
 conf.yaml                    | 37 +++++++++++++++++++++++++++
 copy_res.sh                  |  9 +++++++
 quacc/evaluation/baseline.py | 12 ++++-----
 quacc/evaluation/method.py   | 49 +++++++++++++++++++++++++++++++++++-
 quacc/evaluation/report.py   |  5 ++++
 quacc/evaluation/stats.py    |  1 +
 6 files changed, 106 insertions(+), 7 deletions(-)
 create mode 100755 copy_res.sh

diff --git a/conf.yaml b/conf.yaml
index e6e7f4b..4dbbccd 100644
--- a/conf.yaml
+++ b/conf.yaml
@@ -338,6 +338,43 @@ d_kde_rbf_conf: &d_kde_rbf_conf
     - DATASET_NAME: rcv1
       DATASET_TARGET: CCAT
 
+cc_lr_conf: &cc_lr_conf
+  global:
+    METRICS: 
+      - acc
+      - f1
+    OUT_DIR_NAME: output/cc_lr
+    DATASET_N_PREVS: 9
+    COMP_ESTIMATORS:
+      # - bin_cc_lr
+      # - mul_cc_lr
+      # - m3w_cc_lr
+      # - bin_cc_lr_c
+      # - mul_cc_lr_c
+      # - m3w_cc_lr_c
+      # - bin_cc_lr_mc
+      # - mul_cc_lr_mc
+      # - m3w_cc_lr_mc
+      # - bin_cc_lr_ne
+      # - mul_cc_lr_ne
+      # - m3w_cc_lr_ne
+      # - bin_cc_lr_is
+      # - mul_cc_lr_is
+      # - m3w_cc_lr_is
+      # - bin_cc_lr_a
+      # - mul_cc_lr_a
+      # - m3w_cc_lr_a
+      - bin_cc_lr_gs
+      - mul_cc_lr_gs
+      - m3w_cc_lr_gs
+    N_JOBS: -2
+
+  confs: *main_confs
+  other_confs:
+    - DATASET_NAME: imdb
+    - DATASET_NAME: rcv1
+      DATASET_TARGET: CCAT
+
 baselines_conf: &baselines_conf
   global:
     METRICS: 
diff --git a/copy_res.sh b/copy_res.sh
new file mode 100755
index 0000000..1eb0d4e
--- /dev/null
+++ b/copy_res.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+# scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/kde_lr_gs ./output/
+# scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/baselines ./output/
+scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/cc_lr ./output/
+
+# scp -r ./output/kde_lr_gs volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
+# scp -r ./output/baselines volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
+scp -r ./output/cc_lr volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
diff --git a/quacc/evaluation/baseline.py b/quacc/evaluation/baseline.py
index 436dd47..7af7804 100644
--- a/quacc/evaluation/baseline.py
+++ b/quacc/evaluation/baseline.py
@@ -85,14 +85,14 @@ def naive(
     report = EvaluationReport(name="naive")
     for test in protocol():
         test_preds = c_model_predict(test.X)
-        acc_score = metrics.accuracy_score(test.y, test_preds)
-        f1_score = metrics.f1_score(test.y, test_preds, average=f1_average)
-        meta_acc = abs(val_acc - acc_score)
-        meta_f1 = abs(val_f1 - f1_score)
+        test_acc = metrics.accuracy_score(test.y, test_preds)
+        test_f1 = metrics.f1_score(test.y, test_preds, average=f1_average)
+        meta_acc = abs(val_acc - test_acc)
+        meta_f1 = abs(val_f1 - test_f1)
         report.append_row(
             test.prevalence(),
-            acc_score=acc_score,
-            f1_score=f1_score,
+            acc_score=val_acc,
+            f1_score=val_f1,
             acc=meta_acc,
             f1=meta_f1,
         )
diff --git a/quacc/evaluation/method.py b/quacc/evaluation/method.py
index 9942b65..de1025b 100644
--- a/quacc/evaluation/method.py
+++ b/quacc/evaluation/method.py
@@ -3,7 +3,7 @@ from typing import Callable, List, Union
 
 import numpy as np
 from matplotlib.pylab import rand
-from quapy.method.aggregative import PACC, SLD, BaseQuantifier
+from quapy.method.aggregative import CC, PACC, SLD, BaseQuantifier
 from quapy.protocol import UPP, AbstractProtocol, OnLabelledCollectionProtocol
 from sklearn.linear_model import LogisticRegression
 from sklearn.svm import SVC, LinearSVC
@@ -53,6 +53,17 @@ def _param_grid(method, X_fit: np.ndarray):
                 "q__classifier__class_weight": [None, "balanced"],
                 "confidence": [None, ["isoft"], ["max_conf", "entropy"]],
             }
+        case "cc_lr":
+            return {
+                "q__classifier__C": np.logspace(-3, 3, 7),
+                "q__classifier__class_weight": [None, "balanced"],
+                "confidence": [
+                    None,
+                    ["isoft"],
+                    ["max_conf", "entropy"],
+                    ["max_conf", "entropy", "isoft"],
+                ],
+            }
         case "kde_lr":
             return {
                 "q__classifier__C": np.logspace(-3, 3, 7),
@@ -219,6 +230,10 @@ def __pacc_lr():
     return PACC(LogisticRegression())
 
 
+def __cc_lr():
+    return CC(LogisticRegression())
+
+
 # fmt: off
 
 __sld_lr_set = [
@@ -448,6 +463,37 @@ __dense_kde_rbf_set = [
     G("d_m3w_kde_rbf_gs", __kde_rbf(), "mul", d=True, pg="kde_rbf", search="spider", cf=True),
 ]
 
+__cc_lr_set = [
+    # base cc
+    M("bin_cc_lr",    __cc_lr(), "bin"                                       ),
+    M("mul_cc_lr",    __cc_lr(), "mul"                                       ),
+    M("m3w_cc_lr",    __cc_lr(), "mul",                               cf=True),
+    # max_conf + entropy cc
+    M("bin_cc_lr_c",  __cc_lr(), "bin", conf=["max_conf", "entropy"]         ),
+    M("mul_cc_lr_c",  __cc_lr(), "mul", conf=["max_conf", "entropy"]         ),
+    M("m3w_cc_lr_c",  __cc_lr(), "mul", conf=["max_conf", "entropy"], cf=True),
+    # max_conf cc
+    M("bin_cc_lr_mc", __cc_lr(), "bin", conf="max_conf",                     ),
+    M("mul_cc_lr_mc", __cc_lr(), "mul", conf="max_conf",                     ),
+    M("m3w_cc_lr_mc", __cc_lr(), "mul", conf="max_conf",              cf=True),
+    # entropy cc
+    M("bin_cc_lr_ne", __cc_lr(), "bin", conf="entropy",                      ),
+    M("mul_cc_lr_ne", __cc_lr(), "mul", conf="entropy",                      ),
+    M("m3w_cc_lr_ne", __cc_lr(), "mul", conf="entropy",               cf=True),
+    # inverse softmax cc
+    M("bin_cc_lr_is", __cc_lr(), "bin", conf="isoft",                        ),
+    M("mul_cc_lr_is", __cc_lr(), "mul", conf="isoft",                        ),
+    M("m3w_cc_lr_is", __cc_lr(), "mul", conf="isoft",                 cf=True),
+    # cc all
+    M("bin_cc_lr_a",  __cc_lr(), "bin", conf=["max_conf", "entropy", "isoft"],         ),
+    M("mul_cc_lr_a",  __cc_lr(), "mul", conf=["max_conf", "entropy", "isoft"],         ),
+    M("m3w_cc_lr_a",  __cc_lr(), "mul", conf=["max_conf", "entropy", "isoft"],  cf=True),
+    # gs cc
+    G("bin_cc_lr_gs", __cc_lr(), "bin", pg="cc_lr", search="grid"         ),
+    G("mul_cc_lr_gs", __cc_lr(), "mul", pg="cc_lr", search="grid"         ),
+    G("m3w_cc_lr_gs", __cc_lr(), "mul", pg="cc_lr", search="grid", cf=True),
+    E("cc_lr_gs"),
+]
 
 # fmt: on
 
@@ -458,6 +504,7 @@ __methods_set = (
     + __kde_lr_set
     + __dense_kde_lr_set
     + __dense_kde_rbf_set
+    + __cc_lr_set
     + [E("QuAcc")]
 )
 
diff --git a/quacc/evaluation/report.py b/quacc/evaluation/report.py
index bfe394e..20df414 100644
--- a/quacc/evaluation/report.py
+++ b/quacc/evaluation/report.py
@@ -140,6 +140,11 @@ class CompReport:
                 "mul_kde_lr_gs",
                 "m3w_kde_lr_gs",
             ],
+            "cc_lr_gs": [
+                "bin_cc_lr_gs",
+                "mul_cc_lr_gs",
+                "m3w_cc_lr_gs",
+            ],
             "QuAcc": [
                 "bin_sld_lr_gs",
                 "mul_sld_lr_gs",
diff --git a/quacc/evaluation/stats.py b/quacc/evaluation/stats.py
index 1bf12d6..3ef3365 100644
--- a/quacc/evaluation/stats.py
+++ b/quacc/evaluation/stats.py
@@ -25,6 +25,7 @@ def wilcoxon(
 ) -> pd.DataFrame:
     _data = r.data(metric, estimators)
 
+    _data = _data.dropna(axis=0, how="any")
     _wilcoxon = {}
     for est in _data.columns.unique(0):
         _wilcoxon[est] = [