experiments created, report refactoring started

2024-04-04 17:02:25 +02:00 · 2024-04-04 17:02:25 +02:00 · 9bc1208309
parent 51867f3e9c
commit 9bc1208309
4 changed files with 418 additions and 0 deletions
--- a/quacc/experiments/generators.py
+++ b/quacc/experiments/generators.py
@ -0,0 +1,131 @@
 import os
 import numpy as np
 import quapy as qp
 from quapy.data.base import LabelledCollection
 from quapy.data.datasets import (
    TWITTER_SENTIMENT_DATASETS_TEST,
    UCI_MULTICLASS_DATASETS,
 )
 from quapy.method.aggregative import EMQ
 from sklearn.linear_model import LogisticRegression
 from quacc.dataset import DatasetProvider as DP
 from quacc.error import macrof1_fn, vanilla_acc_fn
 from quacc.experiments.util import getpath
 from quacc.models.base import ClassifierAccuracyPrediction
 from quacc.models.baselines import ATC, DoC
 from quacc.models.cont_table import CAPContingencyTable, ContTableTransferCAP, NaiveCAP
 def gen_classifiers():
    param_grid = {"C": np.logspace(-4, -4, 9), "class_weight": ["balanced", None]}
    yield "LR", LogisticRegression()
    # yield 'LR-opt', GridSearchCV(LogisticRegression(), param_grid, cv=5, n_jobs=-1)
    # yield 'NB', GaussianNB()
    # yield 'SVM(rbf)', SVC()
    # yield 'SVM(linear)', LinearSVC()
 def gen_multi_datasets(
    only_names=False,
 ) -> [str, [LabelledCollection, LabelledCollection, LabelledCollection]]:
    for dataset_name in np.setdiff1d(UCI_MULTICLASS_DATASETS, ["wine-quality"]):
        if only_names:
            yield dataset_name, None
        else:
            yield dataset_name, DP.uci_multiclass(dataset_name)
    # yields the 20 newsgroups dataset
    if only_names:
        yield "20news", None
    else:
        yield "20news", DP.news20()
    # yields the T1B@LeQua2022 (training) dataset
    if only_names:
        yield "T1B-LeQua2022", None
    else:
        yield "T1B-LeQua2022", DP.t1b_lequa2022()
 def gen_tweet_datasets(
    only_names=False,
 ) -> [str, [LabelledCollection, LabelledCollection, LabelledCollection]]:
    for dataset_name in TWITTER_SENTIMENT_DATASETS_TEST:
        if only_names:
            yield dataset_name, None
        else:
            yield dataset_name, DP.twitter(dataset_name)
 def gen_bin_datasets(
    only_names=False,
 ) -> [str, [LabelledCollection, LabelledCollection, LabelledCollection]]:
    if only_names:
        for dataset_name in ["imdb", "CCAT", "GCAT", "MCAT"]:
            yield dataset_name, None
    else:
        yield "imdb", DP.imdb()
        for rcv1_name in [
            "CCAT",
            "GCAT",
            "MCAT",
        ]:
            yield rcv1_name, DP.rcv1(rcv1_name)
 def gen_CAP(h, acc_fn, with_oracle=False) -> [str, ClassifierAccuracyPrediction]:
    ### CAP methods ###
    # yield 'SebCAP', SebastianiCAP(h, acc_fn, ACC)
    # yield 'SebCAP-SLD', SebastianiCAP(h, acc_fn, EMQ, predict_train_prev=not with_oracle)
    # yield 'SebCAP-KDE', SebastianiCAP(h, acc_fn, KDEyML)
    # yield 'SebCAPweight', SebastianiCAP(h, acc_fn, ACC, alpha=0)
    # yield 'PabCAP', PabloCAP(h, acc_fn, ACC)
    # yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median')
    ### baselines ###
    yield "ATC-MC", ATC(h, acc_fn, scoring_fn="maxconf")
    # yield 'ATC-NE', ATC(h, acc_fn, scoring_fn='neg_entropy')
    yield "DoC", DoC(h, acc_fn, sample_size=qp.environ["SAMPLE_SIZE"])
 def gen_CAP_cont_table(h) -> [str, CAPContingencyTable]:
    acc_fn = None
    yield "Naive", NaiveCAP(h, acc_fn)
    yield "CT-PPS-EMQ", ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()))
    # yield 'CT-PPS-KDE', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.01))
    # yield 'CT-PPS-KDE05', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.05))
    # yield 'QuAcc(EMQ)nxn-noX', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_posteriors=True, add_X=False)
    # yield 'QuAcc(EMQ)nxn', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()))
    # yield 'QuAcc(EMQ)nxn-MC', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxconf=True)
    # yield 'QuAcc(EMQ)nxn-NE', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_negentropy=True)
    # yield 'QuAcc(EMQ)nxn-MIS', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxinfsoft=True)
    # yield 'QuAcc(EMQ)1xn2', QuAcc1xN2(h, acc_fn, EMQ(LogisticRegression()))
    # yield 'QuAcc(EMQ)1xn2', QuAcc1xN2(h, acc_fn, EMQ(LogisticRegression()))
    # yield 'CT-PPSh-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()), reuse_h=True)
    # yield 'Equations-ACCh', NsquaredEquationsCAP(h, acc_fn, ACC, reuse_h=True)
    # yield 'Equations-ACC', NsquaredEquationsCAP(h, acc_fn, ACC)
    # yield 'Equations-SLD', NsquaredEquationsCAP(h, acc_fn, EMQ)
 def get_method_names():
    mock_h = LogisticRegression()
    return [m for m, _ in gen_CAP(mock_h, None)] + [
        m for m, _ in gen_CAP_cont_table(mock_h)
    ]
 def gen_acc_measure():
    yield "vanilla_accuracy", vanilla_acc_fn
    yield "macro-F1", macrof1_fn
 def any_missing(basedir, cls_name, dataset_name, method_name):
    for acc_name, _ in gen_acc_measure():
        if not os.path.exists(
            getpath(basedir, cls_name, acc_name, dataset_name, method_name)
        ):
            return True
    return False
--- a/quacc/experiments/report.py
+++ b/quacc/experiments/report.py
@ -0,0 +1,101 @@
 import os
 from quacc.experiments.util import getpath
 from quacc.utils.commons import load_json_file, save_json_file
 class TestReport:
    def __init__(
        self,
        cls_name,
        acc_name,
        dataset_name,
        method_name,
    ):
        self.cls_name = cls_name
        self.acc_name = acc_name
        self.dataset_name = dataset_name
        self.method_name = method_name
    def path(self, basedir):
        return getpath(
            basedir, self.cls_name, self.acc_name, self.dataset_name, self.method_name
        )
    def add_result(self, test_prevs, true_accs, estim_accs, t_train, t_test_ave):
        self.test_prevs = test_prevs
        self.true_accs = true_accs
        self.estim_accs = estim_accs
        self.t_train = t_train
        self.t_test_ave = t_test_ave
        return self
    def save_json(self, basedir):
        if not all([hasattr(self, _attr) for _attr in ["true_accs", "estim_accs"]]):
            raise AttributeError("Incomplete report cannot be dumped")
        result = {
            "cls_name": self.cls_name,
            "acc_name": self.acc_name,
            "dataset_name": self.dataset_name,
            "method_name": self.method_name,
            "t_train": self.t_train,
            "t_test_ave": self.t_test,
            "true_accs": self.true_accs,
            "estim_accs": self.estim_accs,
        }
        result_path = self.path(basedir)
        save_json_file(result_path, result)
    @classmethod
    def load_json(cls, path) -> "TestReport":
        def _test_report_hook(_dict):
            return TestReport(
                cls_name=_dict["cls_name"],
                acc_name=_dict["acc_name"],
                dataset_name=_dict["dataset_name"],
                method_name=_dict["method_name"],
            ).add_result(
                true_accs=_dict["true_accs"],
                estim_accs=_dict["estim_accs"],
                t_train=_dict["t_train"],
                t_test_ave=_dict["t_test_ave"],
            )
        return load_json_file(path, object_hook=_test_report_hook)
 class Report:
    def __init__(self, tests: list[TestReport]):
        self.tests = tests
    @classmethod
    def load_tests(cls, path):
        if not os.path.isdir(path):
            raise ValueError("Cannot load test results: invalid directory")
        _tests = []
        for f in os.listdir(path):
            if f.endswith(".json"):
                _tests.append(TestReport.load_json(f))
        return Report(_tests)
    def _filter_by_dataset(self):
        pass
    def _filer_by_acc(self):
        pass
    def _filter_by_methods(self):
        pass
    def train_table(self):
        pass
    def test_table(self):
        pass
    def shift_table(self):
        pass
--- a/quacc/experiments/run.py
+++ b/quacc/experiments/run.py
@ -0,0 +1,124 @@
 import itertools
 import os
 import quapy as qp
 from ClassifierAccuracy.util.plotting import plot_diagonal
 from quapy.protocol import UPP
 from quacc.dataset import save_dataset_stats
 from quacc.experiments.generators import (
    any_missing,
    gen_acc_measure,
    gen_bin_datasets,
    gen_CAP,
    gen_CAP_cont_table,
    gen_classifiers,
    gen_multi_datasets,
    gen_tweet_datasets,
 )
 from quacc.experiments.report import TestReport
 from quacc.experiments.util import (
    fit_method,
    predictionsCAP,
    predictionsCAPcont_table,
    prevs_from_prot,
    true_acc,
 )
 PROBLEM = "binary"
 ORACLE = False
 basedir = PROBLEM + ("-oracle" if ORACLE else "")
 if PROBLEM == "binary":
    qp.environ["SAMPLE_SIZE"] = 1000
    NUM_TEST = 1000
    gen_datasets = gen_bin_datasets
 elif PROBLEM == "multiclass":
    qp.environ["SAMPLE_SIZE"] = 250
    NUM_TEST = 1000
    gen_datasets = gen_multi_datasets
 elif PROBLEM == "tweet":
    qp.environ["SAMPLE_SIZE"] = 100
    NUM_TEST = 1000
    gen_datasets = gen_tweet_datasets
 for (cls_name, h), (dataset_name, (L, V, U)) in itertools.product(
    gen_classifiers(), gen_datasets()
 ):
    print(f"training {cls_name} in {dataset_name}")
    h.fit(*L.Xy)
    # test generation protocol
    test_prot = UPP(
        U, repeats=NUM_TEST, return_type="labelled_collection", random_state=0
    )
    # compute some stats of the dataset
    save_dataset_stats(f"dataset_stats/{dataset_name}.json", test_prot, L, V)
    # precompute the actual accuracy values
    true_accs = {}
    for acc_name, acc_fn in gen_acc_measure():
        true_accs[acc_name] = [true_acc(h, acc_fn, Ui) for Ui in test_prot()]
    # instances of ClassifierAccuracyPrediction are bound to the evaluation measure, so they
    # must be nested in the acc-for
    for acc_name, acc_fn in gen_acc_measure():
        print(f"\tfor measure {acc_name}")
        for method_name, method in gen_CAP(h, acc_fn, with_oracle=ORACLE):
            report = TestReport(cls_name, acc_name, dataset_name, method_name)
            if os.path.exists(report.path(basedir)):
                print(f"\t\t{method_name}-{acc_name} exists, skipping")
                continue
            print(f"\t\t{method_name} computing...")
            method, t_train = fit_method(method, V)
            estim_accs, t_test_ave = predictionsCAP(method, test_prot, ORACLE)
            test_prevs = prevs_from_prot(test_prot)
            report.add_result(
                test_prevs=test_prevs,
                true_accs=true_accs[acc_name],
                estim_accs=estim_accs,
                t_train=t_train,
                t_test_ave=t_test_ave,
            ).save_json(basedir)
    # instances of CAPContingencyTable instead are generic, and the evaluation measure can
    # be nested to the predictions to speed up things
    for method_name, method in gen_CAP_cont_table(h):
        if not any_missing(basedir, cls_name, dataset_name, method_name):
            print(
                f"\t\tmethod {method_name} has all results already computed. Skipping."
            )
            continue
        print(f"\t\tmethod {method_name} computing...")
        method, t_train = fit_method(method, V)
        estim_accs_dict, t_test_ave = predictionsCAPcont_table(
            method, test_prot, gen_acc_measure, ORACLE
        )
        for acc_name in estim_accs_dict.keys():
            report = TestReport(cls_name, acc_name, dataset_name, method_name)
            report.add_result(
                true_accs=true_accs[acc_name],
                estim_accs=estim_accs,
                t_train=t_train,
                t_test_ave=t_test_ave,
            ).save_json(basedir)
    print()
 # generate diagonal plots
 print("generating plots")
 for (cls_name, _), (acc_name, _) in itertools.product(
    gen_classifiers(), gen_acc_measure()
 ):
    plot_diagonal(basedir, cls_name, acc_name)
    for dataset_name, _ in gen_datasets(only_names=True):
        plot_diagonal(basedir, cls_name, acc_name, dataset_name=dataset_name)
 print("generating tables")
 # gen_tables(basedir, datasets=[d for d, _ in gen_datasets(only_names=True)])
--- a/quacc/experiments/util.py
+++ b/quacc/experiments/util.py
@ -0,0 +1,62 @@
 import os
 from time import time
 from quapy.data.base import LabelledCollection
 from sklearn.base import BaseEstimator
 from sklearn.metrics import confusion_matrix
 def getpath(basedir, cls_name, acc_name, dataset_name, method_name):
    return f"results/{basedir}/{cls_name}/{acc_name}/{dataset_name}/{method_name}.json"
 def fit_method(method, V):
    tinit = time()
    method.fit(V)
    t_train = time() - tinit
    return method, t_train
 def predictionsCAP(method, test_prot, oracle=False):
    tinit = time()
    if not oracle:
        estim_accs = [method.predict(Ui.X) for Ui in test_prot()]
    else:
        estim_accs = [
            method.predict(Ui.X, oracle_prev=Ui.prevalence()) for Ui in test_prot()
        ]
    t_test_ave = (time() - tinit) / test_prot.total()
    return estim_accs, t_test_ave
 def predictionsCAPcont_table(method, test_prot, gen_acc_measure, oracle=False):
    estim_accs_dict = {}
    tinit = time()
    if not oracle:
        estim_tables = [method.predict_ct(Ui.X) for Ui in test_prot()]
    else:
        estim_tables = [
            method.predict_ct(Ui.X, oracle_prev=Ui.prevalence()) for Ui in test_prot()
        ]
    for acc_name, acc_fn in gen_acc_measure():
        estim_accs_dict[acc_name] = [acc_fn(cont_table) for cont_table in estim_tables]
    t_test_ave = (time() - tinit) / test_prot.total()
    return estim_accs_dict, t_test_ave
 def prevs_from_prot(prot):
    return [Ui.prevalence() for Ui in prot()]
 def true_acc(h: BaseEstimator, acc_fn: callable, U: LabelledCollection):
    y_pred = h.predict(U.X)
    y_true = U.y
    conf_table = confusion_matrix(y_true, y_pred=y_pred, labels=U.classes_)
    return acc_fn(conf_table)
 def get_acc_name(acc_name):
    return {
        "Vanilla Accuracy": "vanilla_accuracy",
        "Macro F1": "macro-F1",
    }