From 9bc120830979dcea8ead65e345bffd65f8533158 Mon Sep 17 00:00:00 2001
From: Lorenzo Volpi <lorenzo.volpi@outlook.com>
Date: Thu, 4 Apr 2024 17:02:25 +0200
Subject: [PATCH] experiments created, report refactoring started

---
 quacc/experiments/generators.py | 131 ++++++++++++++++++++++++++++++++
 quacc/experiments/report.py     | 101 ++++++++++++++++++++++++
 quacc/experiments/run.py        | 124 ++++++++++++++++++++++++++++++
 quacc/experiments/util.py       |  62 +++++++++++++++
 4 files changed, 418 insertions(+)
 create mode 100644 quacc/experiments/generators.py
 create mode 100644 quacc/experiments/report.py
 create mode 100644 quacc/experiments/run.py
 create mode 100644 quacc/experiments/util.py

diff --git a/quacc/experiments/generators.py b/quacc/experiments/generators.py
new file mode 100644
index 0000000..17f550f
--- /dev/null
+++ b/quacc/experiments/generators.py
@@ -0,0 +1,131 @@
+import os
+
+import numpy as np
+import quapy as qp
+from quapy.data.base import LabelledCollection
+from quapy.data.datasets import (
+    TWITTER_SENTIMENT_DATASETS_TEST,
+    UCI_MULTICLASS_DATASETS,
+)
+from quapy.method.aggregative import EMQ
+from sklearn.linear_model import LogisticRegression
+
+from quacc.dataset import DatasetProvider as DP
+from quacc.error import macrof1_fn, vanilla_acc_fn
+from quacc.experiments.util import getpath
+from quacc.models.base import ClassifierAccuracyPrediction
+from quacc.models.baselines import ATC, DoC
+from quacc.models.cont_table import CAPContingencyTable, ContTableTransferCAP, NaiveCAP
+
+
+def gen_classifiers():
+    param_grid = {"C": np.logspace(-4, -4, 9), "class_weight": ["balanced", None]}
+
+    yield "LR", LogisticRegression()
+    # yield 'LR-opt', GridSearchCV(LogisticRegression(), param_grid, cv=5, n_jobs=-1)
+    # yield 'NB', GaussianNB()
+    # yield 'SVM(rbf)', SVC()
+    # yield 'SVM(linear)', LinearSVC()
+
+
+def gen_multi_datasets(
+    only_names=False,
+) -> [str, [LabelledCollection, LabelledCollection, LabelledCollection]]:
+    for dataset_name in np.setdiff1d(UCI_MULTICLASS_DATASETS, ["wine-quality"]):
+        if only_names:
+            yield dataset_name, None
+        else:
+            yield dataset_name, DP.uci_multiclass(dataset_name)
+
+    # yields the 20 newsgroups dataset
+    if only_names:
+        yield "20news", None
+    else:
+        yield "20news", DP.news20()
+
+    # yields the T1B@LeQua2022 (training) dataset
+    if only_names:
+        yield "T1B-LeQua2022", None
+    else:
+        yield "T1B-LeQua2022", DP.t1b_lequa2022()
+
+
+def gen_tweet_datasets(
+    only_names=False,
+) -> [str, [LabelledCollection, LabelledCollection, LabelledCollection]]:
+    for dataset_name in TWITTER_SENTIMENT_DATASETS_TEST:
+        if only_names:
+            yield dataset_name, None
+        else:
+            yield dataset_name, DP.twitter(dataset_name)
+
+
+def gen_bin_datasets(
+    only_names=False,
+) -> [str, [LabelledCollection, LabelledCollection, LabelledCollection]]:
+    if only_names:
+        for dataset_name in ["imdb", "CCAT", "GCAT", "MCAT"]:
+            yield dataset_name, None
+    else:
+        yield "imdb", DP.imdb()
+        for rcv1_name in [
+            "CCAT",
+            "GCAT",
+            "MCAT",
+        ]:
+            yield rcv1_name, DP.rcv1(rcv1_name)
+
+
+def gen_CAP(h, acc_fn, with_oracle=False) -> [str, ClassifierAccuracyPrediction]:
+    ### CAP methods ###
+    # yield 'SebCAP', SebastianiCAP(h, acc_fn, ACC)
+    # yield 'SebCAP-SLD', SebastianiCAP(h, acc_fn, EMQ, predict_train_prev=not with_oracle)
+    # yield 'SebCAP-KDE', SebastianiCAP(h, acc_fn, KDEyML)
+    # yield 'SebCAPweight', SebastianiCAP(h, acc_fn, ACC, alpha=0)
+    # yield 'PabCAP', PabloCAP(h, acc_fn, ACC)
+    # yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median')
+
+    ### baselines ###
+    yield "ATC-MC", ATC(h, acc_fn, scoring_fn="maxconf")
+    # yield 'ATC-NE', ATC(h, acc_fn, scoring_fn='neg_entropy')
+    yield "DoC", DoC(h, acc_fn, sample_size=qp.environ["SAMPLE_SIZE"])
+
+
+def gen_CAP_cont_table(h) -> [str, CAPContingencyTable]:
+    acc_fn = None
+    yield "Naive", NaiveCAP(h, acc_fn)
+    yield "CT-PPS-EMQ", ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()))
+    # yield 'CT-PPS-KDE', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.01))
+    # yield 'CT-PPS-KDE05', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.05))
+    # yield 'QuAcc(EMQ)nxn-noX', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_posteriors=True, add_X=False)
+    # yield 'QuAcc(EMQ)nxn', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()))
+    # yield 'QuAcc(EMQ)nxn-MC', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxconf=True)
+    # yield 'QuAcc(EMQ)nxn-NE', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_negentropy=True)
+    # yield 'QuAcc(EMQ)nxn-MIS', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxinfsoft=True)
+    # yield 'QuAcc(EMQ)1xn2', QuAcc1xN2(h, acc_fn, EMQ(LogisticRegression()))
+    # yield 'QuAcc(EMQ)1xn2', QuAcc1xN2(h, acc_fn, EMQ(LogisticRegression()))
+    # yield 'CT-PPSh-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()), reuse_h=True)
+    # yield 'Equations-ACCh', NsquaredEquationsCAP(h, acc_fn, ACC, reuse_h=True)
+    # yield 'Equations-ACC', NsquaredEquationsCAP(h, acc_fn, ACC)
+    # yield 'Equations-SLD', NsquaredEquationsCAP(h, acc_fn, EMQ)
+
+
+def get_method_names():
+    mock_h = LogisticRegression()
+    return [m for m, _ in gen_CAP(mock_h, None)] + [
+        m for m, _ in gen_CAP_cont_table(mock_h)
+    ]
+
+
+def gen_acc_measure():
+    yield "vanilla_accuracy", vanilla_acc_fn
+    yield "macro-F1", macrof1_fn
+
+
+def any_missing(basedir, cls_name, dataset_name, method_name):
+    for acc_name, _ in gen_acc_measure():
+        if not os.path.exists(
+            getpath(basedir, cls_name, acc_name, dataset_name, method_name)
+        ):
+            return True
+    return False
diff --git a/quacc/experiments/report.py b/quacc/experiments/report.py
new file mode 100644
index 0000000..e81739d
--- /dev/null
+++ b/quacc/experiments/report.py
@@ -0,0 +1,101 @@
+import os
+
+from quacc.experiments.util import getpath
+from quacc.utils.commons import load_json_file, save_json_file
+
+
+class TestReport:
+    def __init__(
+        self,
+        cls_name,
+        acc_name,
+        dataset_name,
+        method_name,
+    ):
+        self.cls_name = cls_name
+        self.acc_name = acc_name
+        self.dataset_name = dataset_name
+        self.method_name = method_name
+
+    def path(self, basedir):
+        return getpath(
+            basedir, self.cls_name, self.acc_name, self.dataset_name, self.method_name
+        )
+
+    def add_result(self, test_prevs, true_accs, estim_accs, t_train, t_test_ave):
+        self.test_prevs = test_prevs
+        self.true_accs = true_accs
+        self.estim_accs = estim_accs
+        self.t_train = t_train
+        self.t_test_ave = t_test_ave
+        return self
+
+    def save_json(self, basedir):
+        if not all([hasattr(self, _attr) for _attr in ["true_accs", "estim_accs"]]):
+            raise AttributeError("Incomplete report cannot be dumped")
+
+        result = {
+            "cls_name": self.cls_name,
+            "acc_name": self.acc_name,
+            "dataset_name": self.dataset_name,
+            "method_name": self.method_name,
+            "t_train": self.t_train,
+            "t_test_ave": self.t_test,
+            "true_accs": self.true_accs,
+            "estim_accs": self.estim_accs,
+        }
+
+        result_path = self.path(basedir)
+        save_json_file(result_path, result)
+
+    @classmethod
+    def load_json(cls, path) -> "TestReport":
+        def _test_report_hook(_dict):
+            return TestReport(
+                cls_name=_dict["cls_name"],
+                acc_name=_dict["acc_name"],
+                dataset_name=_dict["dataset_name"],
+                method_name=_dict["method_name"],
+            ).add_result(
+                true_accs=_dict["true_accs"],
+                estim_accs=_dict["estim_accs"],
+                t_train=_dict["t_train"],
+                t_test_ave=_dict["t_test_ave"],
+            )
+
+        return load_json_file(path, object_hook=_test_report_hook)
+
+
+class Report:
+    def __init__(self, tests: list[TestReport]):
+        self.tests = tests
+
+    @classmethod
+    def load_tests(cls, path):
+        if not os.path.isdir(path):
+            raise ValueError("Cannot load test results: invalid directory")
+
+        _tests = []
+        for f in os.listdir(path):
+            if f.endswith(".json"):
+                _tests.append(TestReport.load_json(f))
+
+        return Report(_tests)
+
+    def _filter_by_dataset(self):
+        pass
+
+    def _filer_by_acc(self):
+        pass
+
+    def _filter_by_methods(self):
+        pass
+
+    def train_table(self):
+        pass
+
+    def test_table(self):
+        pass
+
+    def shift_table(self):
+        pass
diff --git a/quacc/experiments/run.py b/quacc/experiments/run.py
new file mode 100644
index 0000000..d9ba1d0
--- /dev/null
+++ b/quacc/experiments/run.py
@@ -0,0 +1,124 @@
+import itertools
+import os
+
+import quapy as qp
+from ClassifierAccuracy.util.plotting import plot_diagonal
+from quapy.protocol import UPP
+
+from quacc.dataset import save_dataset_stats
+from quacc.experiments.generators import (
+    any_missing,
+    gen_acc_measure,
+    gen_bin_datasets,
+    gen_CAP,
+    gen_CAP_cont_table,
+    gen_classifiers,
+    gen_multi_datasets,
+    gen_tweet_datasets,
+)
+from quacc.experiments.report import TestReport
+from quacc.experiments.util import (
+    fit_method,
+    predictionsCAP,
+    predictionsCAPcont_table,
+    prevs_from_prot,
+    true_acc,
+)
+
+PROBLEM = "binary"
+ORACLE = False
+basedir = PROBLEM + ("-oracle" if ORACLE else "")
+
+
+if PROBLEM == "binary":
+    qp.environ["SAMPLE_SIZE"] = 1000
+    NUM_TEST = 1000
+    gen_datasets = gen_bin_datasets
+elif PROBLEM == "multiclass":
+    qp.environ["SAMPLE_SIZE"] = 250
+    NUM_TEST = 1000
+    gen_datasets = gen_multi_datasets
+elif PROBLEM == "tweet":
+    qp.environ["SAMPLE_SIZE"] = 100
+    NUM_TEST = 1000
+    gen_datasets = gen_tweet_datasets
+
+
+for (cls_name, h), (dataset_name, (L, V, U)) in itertools.product(
+    gen_classifiers(), gen_datasets()
+):
+    print(f"training {cls_name} in {dataset_name}")
+    h.fit(*L.Xy)
+
+    # test generation protocol
+    test_prot = UPP(
+        U, repeats=NUM_TEST, return_type="labelled_collection", random_state=0
+    )
+
+    # compute some stats of the dataset
+    save_dataset_stats(f"dataset_stats/{dataset_name}.json", test_prot, L, V)
+
+    # precompute the actual accuracy values
+    true_accs = {}
+    for acc_name, acc_fn in gen_acc_measure():
+        true_accs[acc_name] = [true_acc(h, acc_fn, Ui) for Ui in test_prot()]
+
+    # instances of ClassifierAccuracyPrediction are bound to the evaluation measure, so they
+    # must be nested in the acc-for
+    for acc_name, acc_fn in gen_acc_measure():
+        print(f"\tfor measure {acc_name}")
+        for method_name, method in gen_CAP(h, acc_fn, with_oracle=ORACLE):
+            report = TestReport(cls_name, acc_name, dataset_name, method_name)
+            if os.path.exists(report.path(basedir)):
+                print(f"\t\t{method_name}-{acc_name} exists, skipping")
+                continue
+
+            print(f"\t\t{method_name} computing...")
+            method, t_train = fit_method(method, V)
+            estim_accs, t_test_ave = predictionsCAP(method, test_prot, ORACLE)
+            test_prevs = prevs_from_prot(test_prot)
+            report.add_result(
+                test_prevs=test_prevs,
+                true_accs=true_accs[acc_name],
+                estim_accs=estim_accs,
+                t_train=t_train,
+                t_test_ave=t_test_ave,
+            ).save_json(basedir)
+
+    # instances of CAPContingencyTable instead are generic, and the evaluation measure can
+    # be nested to the predictions to speed up things
+    for method_name, method in gen_CAP_cont_table(h):
+        if not any_missing(basedir, cls_name, dataset_name, method_name):
+            print(
+                f"\t\tmethod {method_name} has all results already computed. Skipping."
+            )
+            continue
+
+        print(f"\t\tmethod {method_name} computing...")
+
+        method, t_train = fit_method(method, V)
+        estim_accs_dict, t_test_ave = predictionsCAPcont_table(
+            method, test_prot, gen_acc_measure, ORACLE
+        )
+        for acc_name in estim_accs_dict.keys():
+            report = TestReport(cls_name, acc_name, dataset_name, method_name)
+            report.add_result(
+                true_accs=true_accs[acc_name],
+                estim_accs=estim_accs,
+                t_train=t_train,
+                t_test_ave=t_test_ave,
+            ).save_json(basedir)
+
+    print()
+
+# generate diagonal plots
+print("generating plots")
+for (cls_name, _), (acc_name, _) in itertools.product(
+    gen_classifiers(), gen_acc_measure()
+):
+    plot_diagonal(basedir, cls_name, acc_name)
+    for dataset_name, _ in gen_datasets(only_names=True):
+        plot_diagonal(basedir, cls_name, acc_name, dataset_name=dataset_name)
+
+print("generating tables")
+# gen_tables(basedir, datasets=[d for d, _ in gen_datasets(only_names=True)])
diff --git a/quacc/experiments/util.py b/quacc/experiments/util.py
new file mode 100644
index 0000000..d2af476
--- /dev/null
+++ b/quacc/experiments/util.py
@@ -0,0 +1,62 @@
+import os
+from time import time
+
+from quapy.data.base import LabelledCollection
+from sklearn.base import BaseEstimator
+from sklearn.metrics import confusion_matrix
+
+
+def getpath(basedir, cls_name, acc_name, dataset_name, method_name):
+    return f"results/{basedir}/{cls_name}/{acc_name}/{dataset_name}/{method_name}.json"
+
+
+def fit_method(method, V):
+    tinit = time()
+    method.fit(V)
+    t_train = time() - tinit
+    return method, t_train
+
+
+def predictionsCAP(method, test_prot, oracle=False):
+    tinit = time()
+    if not oracle:
+        estim_accs = [method.predict(Ui.X) for Ui in test_prot()]
+    else:
+        estim_accs = [
+            method.predict(Ui.X, oracle_prev=Ui.prevalence()) for Ui in test_prot()
+        ]
+    t_test_ave = (time() - tinit) / test_prot.total()
+    return estim_accs, t_test_ave
+
+
+def predictionsCAPcont_table(method, test_prot, gen_acc_measure, oracle=False):
+    estim_accs_dict = {}
+    tinit = time()
+    if not oracle:
+        estim_tables = [method.predict_ct(Ui.X) for Ui in test_prot()]
+    else:
+        estim_tables = [
+            method.predict_ct(Ui.X, oracle_prev=Ui.prevalence()) for Ui in test_prot()
+        ]
+    for acc_name, acc_fn in gen_acc_measure():
+        estim_accs_dict[acc_name] = [acc_fn(cont_table) for cont_table in estim_tables]
+    t_test_ave = (time() - tinit) / test_prot.total()
+    return estim_accs_dict, t_test_ave
+
+
+def prevs_from_prot(prot):
+    return [Ui.prevalence() for Ui in prot()]
+
+
+def true_acc(h: BaseEstimator, acc_fn: callable, U: LabelledCollection):
+    y_pred = h.predict(U.X)
+    y_true = U.y
+    conf_table = confusion_matrix(y_true, y_pred=y_pred, labels=U.classes_)
+    return acc_fn(conf_table)
+
+
+def get_acc_name(acc_name):
+    return {
+        "Vanilla Accuracy": "vanilla_accuracy",
+        "Macro F1": "macro-F1",
+    }