From feb9e0a59b6d694cb06697f6de71d937d0e0609b Mon Sep 17 00:00:00 2001
From: Lorenzo Volpi <lorenzo.volpi@outlook.com>
Date: Sat, 3 Feb 2024 12:27:54 +0100
Subject: [PATCH] tesi updated

---
 conf.yaml                      | 34 ++++++++++++++++--------
 copy_res.sh                    |  8 +++---
 qcdash/app.py                  | 25 +++++++++++++++---
 quacc/evaluation/estimators.py | 30 +++++++++++++++++++++
 quacc/plot/plot.py             | 29 +++++++++++++++-----
 quacc/plot/plotly.py           | 23 ++++++++++++++++
 selected_gs.py                 | 48 ++++++++++++++++++++++++++++++++++
 7 files changed, 173 insertions(+), 24 deletions(-)
 create mode 100644 selected_gs.py

diff --git a/conf.yaml b/conf.yaml
index 4dbbccd..f21d1aa 100644
--- a/conf.yaml
+++ b/conf.yaml
@@ -72,13 +72,13 @@ test_conf: &test_conf
 main:
   confs: &main_confs
     - DATASET_NAME: imdb
+  other_confs:
     - DATASET_NAME: rcv1
       DATASET_TARGET: CCAT
     - DATASET_NAME: rcv1
       DATASET_TARGET: GCAT
     - DATASET_NAME: rcv1
       DATASET_TARGET: MCAT
-  other_confs:
 
 sld_lr_conf: &sld_lr_conf
 
@@ -423,22 +423,34 @@ timing_conf: &timing_conf
       - bin_kde_lr_a
       - mul_kde_lr_a
       - m3w_kde_lr_a
+      - doc 
+      - atc_mc
+      - rca
+      - rca_star
+      - mandoline
+      - naive
+    N_JOBS: 1
+    PROTOCOL_REPEATS: 1
+
+  confs: *main_confs
+
+timing_gs_conf: &timing_gs_conf
+  global:
+    METRICS:
+      - acc
+      - f1
+    OUT_DIR_NAME: output/timing_gs
+    DATASET_N_PREVS: 1
+    COMP_ESTIMATORS:
       - bin_sld_lr_gs
       - mul_sld_lr_gs
       - m3w_sld_lr_gs
       - bin_kde_lr_gs
       - mul_kde_lr_gs
       - m3w_kde_lr_gs
-      - doc 
-      - atc_mc
-      - rca
-      - rca_star
-      - mandoline
-    N_JOBS: 1
-    PROTOCOL_N_PREVS: 1,
-    PROTOCOL_REPEATS: 1,
-    SAMPLE_SIZE: 1000,
+    N_JOBS: -1
+    PROTOCOL_REPEATS: 1
 
   confs: *main_confs
 
-exec: *baselines_conf
+exec: *timing_gs_conf
diff --git a/copy_res.sh b/copy_res.sh
index 1eb0d4e..8418a84 100755
--- a/copy_res.sh
+++ b/copy_res.sh
@@ -1,9 +1,9 @@
 #!/bin/bash
 
 # scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/kde_lr_gs ./output/
-# scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/baselines ./output/
-scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/cc_lr ./output/
+# scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/cc_lr ./output/
+scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/baselines ./output/
 
 # scp -r ./output/kde_lr_gs volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
-# scp -r ./output/baselines volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
-scp -r ./output/cc_lr volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
+# scp -r ./output/cc_lr volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
+scp -r ./output/baselines volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
diff --git a/qcdash/app.py b/qcdash/app.py
index 9bc9486..8fae568 100644
--- a/qcdash/app.py
+++ b/qcdash/app.py
@@ -13,7 +13,7 @@ from dash import Dash, Input, Output, State, callback, ctx, dash_table, dcc, htm
 from dash.dash_table.Format import Align, Format, Scheme
 
 from quacc import plot
-from quacc.evaluation.estimators import CE
+from quacc.evaluation.estimators import CE, _renames
 from quacc.evaluation.report import CompReport, DatasetReport
 from quacc.evaluation.stats import wilcoxon
 
@@ -26,6 +26,23 @@ def _get_prev_str(prev: np.ndarray):
     return str(tuple(np.around(prev, decimals=2)))
 
 
+def rename_estimators(estimators, rev=False):
+    _rnm = _renames
+    if rev:
+        _rnm = {v: k for k, v in _renames.items()}
+
+    new_estimators = []
+    for c in estimators:
+        nc = c
+        for old, new in _rnm.items():
+            if c.startswith(old):
+                nc = new + c[len(old) :]
+
+        new_estimators.append(nc)
+
+    return new_estimators
+
+
 def get_datasets(root: str | Path) -> List[DatasetReport]:
     def load_dataset(dataset):
         dataset = Path(dataset)
@@ -153,7 +170,7 @@ def get_DataTable(df, mode):
     columns = {
         c: dict(
             id=c,
-            name=_index_name[mode] if c == "index" else c,
+            name=_index_name[mode] if c == "index" else rename_estimators([c])[0],
             type="numeric",
             format=columns_format,
         )
@@ -412,12 +429,13 @@ def update_estimators(href, dataset, metric, curr_estimators, root):
             old_estimators = json.loads(old_estimators)
         except JSONDecodeError:
             old_estimators = []
+    old_estimators = rename_estimators(old_estimators, rev=True)
     valid_estimators: np.ndarray = dr.data(metric=metric).columns.unique(0).to_numpy()
     new_estimators = valid_estimators[
         np.isin(valid_estimators, old_estimators)
     ].tolist()
     valid_estimators = CE.name.sort(valid_estimators.tolist())
-    return valid_estimators, new_estimators
+    return rename_estimators(valid_estimators), rename_estimators(new_estimators)
 
 
 @callback(
@@ -473,6 +491,7 @@ def update_content(dataset, metric, estimators, view, mode, root):
         quote_via=quote,
     )
     dr = get_dr(root, dataset)
+    estimators = rename_estimators(estimators, rev=True)
     match mode:
         case m if m.endswith("table"):
             df = get_table(
diff --git a/quacc/evaluation/estimators.py b/quacc/evaluation/estimators.py
index ed76a01..6f5ff4d 100644
--- a/quacc/evaluation/estimators.py
+++ b/quacc/evaluation/estimators.py
@@ -78,3 +78,33 @@ class CompEstimator:
 
 
 CE = CompEstimator()
+
+_renames = {
+    "bin_sld_lr": "(2x2)_SLD_LR",
+    "mul_sld_lr": "(1x4)_SLD_LR",
+    "m3w_sld_lr": "(1x3)_SLD_LR",
+    "d_bin_sld_lr": "d_(2x2)_SLD_LR",
+    "d_mul_sld_lr": "d_(1x4)_SLD_LR",
+    "d_m3w_sld_lr": "d_(1x3)_SLD_LR",
+    "d_bin_sld_rbf": "(2x2)_SLD_RBF",
+    "d_mul_sld_rbf": "(1x4)_SLD_RBF",
+    "d_m3w_sld_rbf": "(1x3)_SLD_RBF",
+    "sld_lr": "SLD_LR",
+    "bin_kde_lr": "(2x2)_KDEy_LR",
+    "mul_kde_lr": "(1x4)_KDEy_LR",
+    "m3w_kde_lr": "(1x3)_KDEy_LR",
+    "d_bin_kde_lr": "d_(2x2)_KDEy_LR",
+    "d_mul_kde_lr": "d_(1x4)_KDEy_LR",
+    "d_m3w_kde_lr": "d_(1x3)_KDEy_LR",
+    "bin_cc_lr": "(2x2)_CC_LR",
+    "mul_cc_lr": "(1x4)_CC_LR",
+    "m3w_cc_lr": "(1x3)_CC_LR",
+    "kde_lr": "KDEy_LR",
+    "cc_lr": "CC_LR",
+    "atc_mc": "ATC",
+    "doc": "DoC",
+    "mandoline": "Mandoline",
+    "rca": "RCA",
+    "rca_star": "RCA*",
+    "naive": "Naive",
+}
diff --git a/quacc/plot/plot.py b/quacc/plot/plot.py
index ef88a7c..fa7c082 100644
--- a/quacc/plot/plot.py
+++ b/quacc/plot/plot.py
@@ -39,8 +39,16 @@ def plot_delta(
     else:
         title = f"{_base_title}_{name}_avg_{avg}_{metric}"
 
-    x_label = f"{'test' if avg is None or avg == 'train' else 'train'} prevalence"
-    y_label = f"{metric} error"
+    if avg is None or avg == "train":
+        x_label = "Test Prevalence"
+    else:
+        x_label = "Train Prevalence"
+    if metric == "acc":
+        y_label = "Prediction Error for Vanilla Accuracy"
+    elif metric == "f1":
+        y_label = "Prediction Error for F1"
+    else:
+        y_label = f"{metric} error"
     fig = backend.plot_delta(
         base_prevs,
         columns,
@@ -81,8 +89,12 @@ def plot_diagonal(
     else:
         title = f"diagonal_{name}_{metric}"
 
-    x_label = f"true {metric}"
-    y_label = f"estim. {metric}"
+    if metric == "acc":
+        x_label = "True Vanilla Accuracy"
+        y_label = "Estimated Vanilla Accuracy"
+    else:
+        x_label = f"true {metric}"
+        y_label = f"estim. {metric}"
     fig = backend.plot_diagonal(
         reference,
         columns,
@@ -123,8 +135,13 @@ def plot_shift(
     else:
         title = f"shift_{name}_avg_{metric}"
 
-    x_label = "dataset shift"
-    y_label = f"{metric} error"
+    x_label = "Amount of Prior Probability Shift"
+    if metric == "acc":
+        y_label = "Prediction Error for Vanilla Accuracy"
+    elif metric == "f1":
+        y_label = "Prediction Error for F1"
+    else:
+        y_label = f"{metric} error"
     fig = backend.plot_shift(
         shift_prevs,
         columns,
diff --git a/quacc/plot/plotly.py b/quacc/plot/plotly.py
index d1cbb26..9a62f22 100644
--- a/quacc/plot/plotly.py
+++ b/quacc/plot/plotly.py
@@ -5,6 +5,7 @@ import numpy as np
 import plotly
 import plotly.graph_objects as go
 
+from quacc.evaluation.estimators import _renames
 from quacc.plot.base import BasePlot
 
 
@@ -50,6 +51,7 @@ class PlotlyPlot(BasePlot):
 
     def __init__(self, theme=None):
         self.theme = PlotlyPlot.__themes[theme]
+        self.rename = True
 
     def hex_to_rgb(self, hex: str, t: float | None = None):
         hex = hex.lstrip("#")
@@ -85,6 +87,24 @@ class PlotlyPlot(BasePlot):
     def save_fig(self, fig, base_path, title) -> Path:
         return None
 
+    def rename_plots(
+        self,
+        columns,
+    ):
+        if not self.rename:
+            return columns
+
+        new_columns = []
+        for c in columns:
+            nc = c
+            for old, new in _renames.items():
+                if c.startswith(old):
+                    nc = new + c[len(old) :]
+
+            new_columns.append(nc)
+
+        return np.array(new_columns)
+
     def plot_delta(
         self,
         base_prevs,
@@ -102,6 +122,7 @@ class PlotlyPlot(BasePlot):
         if isinstance(base_prevs[0], float):
             base_prevs = np.around([(1 - bp, bp) for bp in base_prevs], decimals=4)
         x = [str(tuple(bp)) for bp in base_prevs]
+        columns = self.rename_plots(columns)
         line_colors = self.get_colors(len(columns))
         for name, delta in zip(columns, data):
             color = next(line_colors)
@@ -150,6 +171,7 @@ class PlotlyPlot(BasePlot):
     ) -> go.Figure:
         fig = go.Figure()
         x = reference
+        columns = self.rename_plots(columns)
         line_colors = self.get_colors(len(columns))
 
         _edges = (np.min([np.min(x), np.min(data)]), np.max([np.max(x), np.max(data)]))
@@ -211,6 +233,7 @@ class PlotlyPlot(BasePlot):
         fig = go.Figure()
         # x = shift_prevs[:, pos_class]
         x = shift_prevs
+        columns = self.rename_plots(columns)
         line_colors = self.get_colors(len(columns))
         for name, delta in zip(columns, data):
             col_idx = (columns == name).nonzero()[0][0]
diff --git a/selected_gs.py b/selected_gs.py
new file mode 100644
index 0000000..8d00222
--- /dev/null
+++ b/selected_gs.py
@@ -0,0 +1,48 @@
+import numpy as np
+
+from quacc.evaluation.report import DatasetReport
+
+datasets = [
+    "imdb/imdb.pickle",
+    "rcv1_CCAT/rcv1_CCAT.pickle",
+    "rcv1_GCAT/rcv1_GCAT.pickle",
+    "rcv1_MCAT/rcv1_MCAT.pickle",
+]
+
+gs = {
+    "sld_lr_gs": [
+        "bin_sld_lr_gs",
+        "mul_sld_lr_gs",
+        "m3w_sld_lr_gs",
+    ],
+    "kde_lr_gs": [
+        "bin_kde_lr_gs",
+        "mul_kde_lr_gs",
+        "m3w_kde_lr_gs",
+    ],
+}
+
+for dst in datasets:
+    dr = DatasetReport.unpickle("output/main/" + dst)
+    print(f"{dst}\n")
+    for name, methods in gs.items():
+        print(f"{name}")
+        sel_methods = [
+            {k: v for k, v in cr.fit_scores.items() if k in methods} for cr in dr.crs
+        ]
+
+        best_methods = [
+            list(ms.keys())[np.argmin(list(ms.values()))] for ms in sel_methods
+        ]
+        m_cnt = []
+        for m in methods:
+            m_cnt.append((np.array(best_methods) == m).nonzero()[0].shape[0])
+        m_cnt = np.array(m_cnt)
+        m_freq = m_cnt / len(best_methods)
+
+        for n in methods:
+            print(n, end="\t")
+        print()
+        for v in m_freq:
+            print(f"{v*100:.2f}", end="\t")
+        print("\n\n")