From 9bc120830979dcea8ead65e345bffd65f8533158 Mon Sep 17 00:00:00 2001 From: Lorenzo Volpi Date: Thu, 4 Apr 2024 17:02:25 +0200 Subject: [PATCH] experiments created, report refactoring started --- quacc/experiments/generators.py | 131 ++++++++++++++++++++++++++++++++ quacc/experiments/report.py | 101 ++++++++++++++++++++++++ quacc/experiments/run.py | 124 ++++++++++++++++++++++++++++++ quacc/experiments/util.py | 62 +++++++++++++++ 4 files changed, 418 insertions(+) create mode 100644 quacc/experiments/generators.py create mode 100644 quacc/experiments/report.py create mode 100644 quacc/experiments/run.py create mode 100644 quacc/experiments/util.py diff --git a/quacc/experiments/generators.py b/quacc/experiments/generators.py new file mode 100644 index 0000000..17f550f --- /dev/null +++ b/quacc/experiments/generators.py @@ -0,0 +1,131 @@ +import os + +import numpy as np +import quapy as qp +from quapy.data.base import LabelledCollection +from quapy.data.datasets import ( + TWITTER_SENTIMENT_DATASETS_TEST, + UCI_MULTICLASS_DATASETS, +) +from quapy.method.aggregative import EMQ +from sklearn.linear_model import LogisticRegression + +from quacc.dataset import DatasetProvider as DP +from quacc.error import macrof1_fn, vanilla_acc_fn +from quacc.experiments.util import getpath +from quacc.models.base import ClassifierAccuracyPrediction +from quacc.models.baselines import ATC, DoC +from quacc.models.cont_table import CAPContingencyTable, ContTableTransferCAP, NaiveCAP + + +def gen_classifiers(): + param_grid = {"C": np.logspace(-4, -4, 9), "class_weight": ["balanced", None]} + + yield "LR", LogisticRegression() + # yield 'LR-opt', GridSearchCV(LogisticRegression(), param_grid, cv=5, n_jobs=-1) + # yield 'NB', GaussianNB() + # yield 'SVM(rbf)', SVC() + # yield 'SVM(linear)', LinearSVC() + + +def gen_multi_datasets( + only_names=False, +) -> [str, [LabelledCollection, LabelledCollection, LabelledCollection]]: + for dataset_name in np.setdiff1d(UCI_MULTICLASS_DATASETS, ["wine-quality"]): + if only_names: + yield dataset_name, None + else: + yield dataset_name, DP.uci_multiclass(dataset_name) + + # yields the 20 newsgroups dataset + if only_names: + yield "20news", None + else: + yield "20news", DP.news20() + + # yields the T1B@LeQua2022 (training) dataset + if only_names: + yield "T1B-LeQua2022", None + else: + yield "T1B-LeQua2022", DP.t1b_lequa2022() + + +def gen_tweet_datasets( + only_names=False, +) -> [str, [LabelledCollection, LabelledCollection, LabelledCollection]]: + for dataset_name in TWITTER_SENTIMENT_DATASETS_TEST: + if only_names: + yield dataset_name, None + else: + yield dataset_name, DP.twitter(dataset_name) + + +def gen_bin_datasets( + only_names=False, +) -> [str, [LabelledCollection, LabelledCollection, LabelledCollection]]: + if only_names: + for dataset_name in ["imdb", "CCAT", "GCAT", "MCAT"]: + yield dataset_name, None + else: + yield "imdb", DP.imdb() + for rcv1_name in [ + "CCAT", + "GCAT", + "MCAT", + ]: + yield rcv1_name, DP.rcv1(rcv1_name) + + +def gen_CAP(h, acc_fn, with_oracle=False) -> [str, ClassifierAccuracyPrediction]: + ### CAP methods ### + # yield 'SebCAP', SebastianiCAP(h, acc_fn, ACC) + # yield 'SebCAP-SLD', SebastianiCAP(h, acc_fn, EMQ, predict_train_prev=not with_oracle) + # yield 'SebCAP-KDE', SebastianiCAP(h, acc_fn, KDEyML) + # yield 'SebCAPweight', SebastianiCAP(h, acc_fn, ACC, alpha=0) + # yield 'PabCAP', PabloCAP(h, acc_fn, ACC) + # yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median') + + ### baselines ### + yield "ATC-MC", ATC(h, acc_fn, scoring_fn="maxconf") + # yield 'ATC-NE', ATC(h, acc_fn, scoring_fn='neg_entropy') + yield "DoC", DoC(h, acc_fn, sample_size=qp.environ["SAMPLE_SIZE"]) + + +def gen_CAP_cont_table(h) -> [str, CAPContingencyTable]: + acc_fn = None + yield "Naive", NaiveCAP(h, acc_fn) + yield "CT-PPS-EMQ", ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression())) + # yield 'CT-PPS-KDE', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.01)) + # yield 'CT-PPS-KDE05', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.05)) + # yield 'QuAcc(EMQ)nxn-noX', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_posteriors=True, add_X=False) + # yield 'QuAcc(EMQ)nxn', QuAccNxN(h, acc_fn, EMQ(LogisticRegression())) + # yield 'QuAcc(EMQ)nxn-MC', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxconf=True) + # yield 'QuAcc(EMQ)nxn-NE', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_negentropy=True) + # yield 'QuAcc(EMQ)nxn-MIS', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxinfsoft=True) + # yield 'QuAcc(EMQ)1xn2', QuAcc1xN2(h, acc_fn, EMQ(LogisticRegression())) + # yield 'QuAcc(EMQ)1xn2', QuAcc1xN2(h, acc_fn, EMQ(LogisticRegression())) + # yield 'CT-PPSh-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()), reuse_h=True) + # yield 'Equations-ACCh', NsquaredEquationsCAP(h, acc_fn, ACC, reuse_h=True) + # yield 'Equations-ACC', NsquaredEquationsCAP(h, acc_fn, ACC) + # yield 'Equations-SLD', NsquaredEquationsCAP(h, acc_fn, EMQ) + + +def get_method_names(): + mock_h = LogisticRegression() + return [m for m, _ in gen_CAP(mock_h, None)] + [ + m for m, _ in gen_CAP_cont_table(mock_h) + ] + + +def gen_acc_measure(): + yield "vanilla_accuracy", vanilla_acc_fn + yield "macro-F1", macrof1_fn + + +def any_missing(basedir, cls_name, dataset_name, method_name): + for acc_name, _ in gen_acc_measure(): + if not os.path.exists( + getpath(basedir, cls_name, acc_name, dataset_name, method_name) + ): + return True + return False diff --git a/quacc/experiments/report.py b/quacc/experiments/report.py new file mode 100644 index 0000000..e81739d --- /dev/null +++ b/quacc/experiments/report.py @@ -0,0 +1,101 @@ +import os + +from quacc.experiments.util import getpath +from quacc.utils.commons import load_json_file, save_json_file + + +class TestReport: + def __init__( + self, + cls_name, + acc_name, + dataset_name, + method_name, + ): + self.cls_name = cls_name + self.acc_name = acc_name + self.dataset_name = dataset_name + self.method_name = method_name + + def path(self, basedir): + return getpath( + basedir, self.cls_name, self.acc_name, self.dataset_name, self.method_name + ) + + def add_result(self, test_prevs, true_accs, estim_accs, t_train, t_test_ave): + self.test_prevs = test_prevs + self.true_accs = true_accs + self.estim_accs = estim_accs + self.t_train = t_train + self.t_test_ave = t_test_ave + return self + + def save_json(self, basedir): + if not all([hasattr(self, _attr) for _attr in ["true_accs", "estim_accs"]]): + raise AttributeError("Incomplete report cannot be dumped") + + result = { + "cls_name": self.cls_name, + "acc_name": self.acc_name, + "dataset_name": self.dataset_name, + "method_name": self.method_name, + "t_train": self.t_train, + "t_test_ave": self.t_test, + "true_accs": self.true_accs, + "estim_accs": self.estim_accs, + } + + result_path = self.path(basedir) + save_json_file(result_path, result) + + @classmethod + def load_json(cls, path) -> "TestReport": + def _test_report_hook(_dict): + return TestReport( + cls_name=_dict["cls_name"], + acc_name=_dict["acc_name"], + dataset_name=_dict["dataset_name"], + method_name=_dict["method_name"], + ).add_result( + true_accs=_dict["true_accs"], + estim_accs=_dict["estim_accs"], + t_train=_dict["t_train"], + t_test_ave=_dict["t_test_ave"], + ) + + return load_json_file(path, object_hook=_test_report_hook) + + +class Report: + def __init__(self, tests: list[TestReport]): + self.tests = tests + + @classmethod + def load_tests(cls, path): + if not os.path.isdir(path): + raise ValueError("Cannot load test results: invalid directory") + + _tests = [] + for f in os.listdir(path): + if f.endswith(".json"): + _tests.append(TestReport.load_json(f)) + + return Report(_tests) + + def _filter_by_dataset(self): + pass + + def _filer_by_acc(self): + pass + + def _filter_by_methods(self): + pass + + def train_table(self): + pass + + def test_table(self): + pass + + def shift_table(self): + pass diff --git a/quacc/experiments/run.py b/quacc/experiments/run.py new file mode 100644 index 0000000..d9ba1d0 --- /dev/null +++ b/quacc/experiments/run.py @@ -0,0 +1,124 @@ +import itertools +import os + +import quapy as qp +from ClassifierAccuracy.util.plotting import plot_diagonal +from quapy.protocol import UPP + +from quacc.dataset import save_dataset_stats +from quacc.experiments.generators import ( + any_missing, + gen_acc_measure, + gen_bin_datasets, + gen_CAP, + gen_CAP_cont_table, + gen_classifiers, + gen_multi_datasets, + gen_tweet_datasets, +) +from quacc.experiments.report import TestReport +from quacc.experiments.util import ( + fit_method, + predictionsCAP, + predictionsCAPcont_table, + prevs_from_prot, + true_acc, +) + +PROBLEM = "binary" +ORACLE = False +basedir = PROBLEM + ("-oracle" if ORACLE else "") + + +if PROBLEM == "binary": + qp.environ["SAMPLE_SIZE"] = 1000 + NUM_TEST = 1000 + gen_datasets = gen_bin_datasets +elif PROBLEM == "multiclass": + qp.environ["SAMPLE_SIZE"] = 250 + NUM_TEST = 1000 + gen_datasets = gen_multi_datasets +elif PROBLEM == "tweet": + qp.environ["SAMPLE_SIZE"] = 100 + NUM_TEST = 1000 + gen_datasets = gen_tweet_datasets + + +for (cls_name, h), (dataset_name, (L, V, U)) in itertools.product( + gen_classifiers(), gen_datasets() +): + print(f"training {cls_name} in {dataset_name}") + h.fit(*L.Xy) + + # test generation protocol + test_prot = UPP( + U, repeats=NUM_TEST, return_type="labelled_collection", random_state=0 + ) + + # compute some stats of the dataset + save_dataset_stats(f"dataset_stats/{dataset_name}.json", test_prot, L, V) + + # precompute the actual accuracy values + true_accs = {} + for acc_name, acc_fn in gen_acc_measure(): + true_accs[acc_name] = [true_acc(h, acc_fn, Ui) for Ui in test_prot()] + + # instances of ClassifierAccuracyPrediction are bound to the evaluation measure, so they + # must be nested in the acc-for + for acc_name, acc_fn in gen_acc_measure(): + print(f"\tfor measure {acc_name}") + for method_name, method in gen_CAP(h, acc_fn, with_oracle=ORACLE): + report = TestReport(cls_name, acc_name, dataset_name, method_name) + if os.path.exists(report.path(basedir)): + print(f"\t\t{method_name}-{acc_name} exists, skipping") + continue + + print(f"\t\t{method_name} computing...") + method, t_train = fit_method(method, V) + estim_accs, t_test_ave = predictionsCAP(method, test_prot, ORACLE) + test_prevs = prevs_from_prot(test_prot) + report.add_result( + test_prevs=test_prevs, + true_accs=true_accs[acc_name], + estim_accs=estim_accs, + t_train=t_train, + t_test_ave=t_test_ave, + ).save_json(basedir) + + # instances of CAPContingencyTable instead are generic, and the evaluation measure can + # be nested to the predictions to speed up things + for method_name, method in gen_CAP_cont_table(h): + if not any_missing(basedir, cls_name, dataset_name, method_name): + print( + f"\t\tmethod {method_name} has all results already computed. Skipping." + ) + continue + + print(f"\t\tmethod {method_name} computing...") + + method, t_train = fit_method(method, V) + estim_accs_dict, t_test_ave = predictionsCAPcont_table( + method, test_prot, gen_acc_measure, ORACLE + ) + for acc_name in estim_accs_dict.keys(): + report = TestReport(cls_name, acc_name, dataset_name, method_name) + report.add_result( + true_accs=true_accs[acc_name], + estim_accs=estim_accs, + t_train=t_train, + t_test_ave=t_test_ave, + ).save_json(basedir) + + print() + +# generate diagonal plots +print("generating plots") +for (cls_name, _), (acc_name, _) in itertools.product( + gen_classifiers(), gen_acc_measure() +): + plot_diagonal(basedir, cls_name, acc_name) + for dataset_name, _ in gen_datasets(only_names=True): + plot_diagonal(basedir, cls_name, acc_name, dataset_name=dataset_name) + +print("generating tables") +# gen_tables(basedir, datasets=[d for d, _ in gen_datasets(only_names=True)]) diff --git a/quacc/experiments/util.py b/quacc/experiments/util.py new file mode 100644 index 0000000..d2af476 --- /dev/null +++ b/quacc/experiments/util.py @@ -0,0 +1,62 @@ +import os +from time import time + +from quapy.data.base import LabelledCollection +from sklearn.base import BaseEstimator +from sklearn.metrics import confusion_matrix + + +def getpath(basedir, cls_name, acc_name, dataset_name, method_name): + return f"results/{basedir}/{cls_name}/{acc_name}/{dataset_name}/{method_name}.json" + + +def fit_method(method, V): + tinit = time() + method.fit(V) + t_train = time() - tinit + return method, t_train + + +def predictionsCAP(method, test_prot, oracle=False): + tinit = time() + if not oracle: + estim_accs = [method.predict(Ui.X) for Ui in test_prot()] + else: + estim_accs = [ + method.predict(Ui.X, oracle_prev=Ui.prevalence()) for Ui in test_prot() + ] + t_test_ave = (time() - tinit) / test_prot.total() + return estim_accs, t_test_ave + + +def predictionsCAPcont_table(method, test_prot, gen_acc_measure, oracle=False): + estim_accs_dict = {} + tinit = time() + if not oracle: + estim_tables = [method.predict_ct(Ui.X) for Ui in test_prot()] + else: + estim_tables = [ + method.predict_ct(Ui.X, oracle_prev=Ui.prevalence()) for Ui in test_prot() + ] + for acc_name, acc_fn in gen_acc_measure(): + estim_accs_dict[acc_name] = [acc_fn(cont_table) for cont_table in estim_tables] + t_test_ave = (time() - tinit) / test_prot.total() + return estim_accs_dict, t_test_ave + + +def prevs_from_prot(prot): + return [Ui.prevalence() for Ui in prot()] + + +def true_acc(h: BaseEstimator, acc_fn: callable, U: LabelledCollection): + y_pred = h.predict(U.X) + y_true = U.y + conf_table = confusion_matrix(y_true, y_pred=y_pred, labels=U.classes_) + return acc_fn(conf_table) + + +def get_acc_name(acc_name): + return { + "Vanilla Accuracy": "vanilla_accuracy", + "Macro F1": "macro-F1", + }