experiments created, report refactoring started
This commit is contained in:
parent
51867f3e9c
commit
9bc1208309
|
@ -0,0 +1,131 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import quapy as qp
|
||||||
|
from quapy.data.base import LabelledCollection
|
||||||
|
from quapy.data.datasets import (
|
||||||
|
TWITTER_SENTIMENT_DATASETS_TEST,
|
||||||
|
UCI_MULTICLASS_DATASETS,
|
||||||
|
)
|
||||||
|
from quapy.method.aggregative import EMQ
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
|
from quacc.dataset import DatasetProvider as DP
|
||||||
|
from quacc.error import macrof1_fn, vanilla_acc_fn
|
||||||
|
from quacc.experiments.util import getpath
|
||||||
|
from quacc.models.base import ClassifierAccuracyPrediction
|
||||||
|
from quacc.models.baselines import ATC, DoC
|
||||||
|
from quacc.models.cont_table import CAPContingencyTable, ContTableTransferCAP, NaiveCAP
|
||||||
|
|
||||||
|
|
||||||
|
def gen_classifiers():
|
||||||
|
param_grid = {"C": np.logspace(-4, -4, 9), "class_weight": ["balanced", None]}
|
||||||
|
|
||||||
|
yield "LR", LogisticRegression()
|
||||||
|
# yield 'LR-opt', GridSearchCV(LogisticRegression(), param_grid, cv=5, n_jobs=-1)
|
||||||
|
# yield 'NB', GaussianNB()
|
||||||
|
# yield 'SVM(rbf)', SVC()
|
||||||
|
# yield 'SVM(linear)', LinearSVC()
|
||||||
|
|
||||||
|
|
||||||
|
def gen_multi_datasets(
|
||||||
|
only_names=False,
|
||||||
|
) -> [str, [LabelledCollection, LabelledCollection, LabelledCollection]]:
|
||||||
|
for dataset_name in np.setdiff1d(UCI_MULTICLASS_DATASETS, ["wine-quality"]):
|
||||||
|
if only_names:
|
||||||
|
yield dataset_name, None
|
||||||
|
else:
|
||||||
|
yield dataset_name, DP.uci_multiclass(dataset_name)
|
||||||
|
|
||||||
|
# yields the 20 newsgroups dataset
|
||||||
|
if only_names:
|
||||||
|
yield "20news", None
|
||||||
|
else:
|
||||||
|
yield "20news", DP.news20()
|
||||||
|
|
||||||
|
# yields the T1B@LeQua2022 (training) dataset
|
||||||
|
if only_names:
|
||||||
|
yield "T1B-LeQua2022", None
|
||||||
|
else:
|
||||||
|
yield "T1B-LeQua2022", DP.t1b_lequa2022()
|
||||||
|
|
||||||
|
|
||||||
|
def gen_tweet_datasets(
|
||||||
|
only_names=False,
|
||||||
|
) -> [str, [LabelledCollection, LabelledCollection, LabelledCollection]]:
|
||||||
|
for dataset_name in TWITTER_SENTIMENT_DATASETS_TEST:
|
||||||
|
if only_names:
|
||||||
|
yield dataset_name, None
|
||||||
|
else:
|
||||||
|
yield dataset_name, DP.twitter(dataset_name)
|
||||||
|
|
||||||
|
|
||||||
|
def gen_bin_datasets(
|
||||||
|
only_names=False,
|
||||||
|
) -> [str, [LabelledCollection, LabelledCollection, LabelledCollection]]:
|
||||||
|
if only_names:
|
||||||
|
for dataset_name in ["imdb", "CCAT", "GCAT", "MCAT"]:
|
||||||
|
yield dataset_name, None
|
||||||
|
else:
|
||||||
|
yield "imdb", DP.imdb()
|
||||||
|
for rcv1_name in [
|
||||||
|
"CCAT",
|
||||||
|
"GCAT",
|
||||||
|
"MCAT",
|
||||||
|
]:
|
||||||
|
yield rcv1_name, DP.rcv1(rcv1_name)
|
||||||
|
|
||||||
|
|
||||||
|
def gen_CAP(h, acc_fn, with_oracle=False) -> [str, ClassifierAccuracyPrediction]:
|
||||||
|
### CAP methods ###
|
||||||
|
# yield 'SebCAP', SebastianiCAP(h, acc_fn, ACC)
|
||||||
|
# yield 'SebCAP-SLD', SebastianiCAP(h, acc_fn, EMQ, predict_train_prev=not with_oracle)
|
||||||
|
# yield 'SebCAP-KDE', SebastianiCAP(h, acc_fn, KDEyML)
|
||||||
|
# yield 'SebCAPweight', SebastianiCAP(h, acc_fn, ACC, alpha=0)
|
||||||
|
# yield 'PabCAP', PabloCAP(h, acc_fn, ACC)
|
||||||
|
# yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median')
|
||||||
|
|
||||||
|
### baselines ###
|
||||||
|
yield "ATC-MC", ATC(h, acc_fn, scoring_fn="maxconf")
|
||||||
|
# yield 'ATC-NE', ATC(h, acc_fn, scoring_fn='neg_entropy')
|
||||||
|
yield "DoC", DoC(h, acc_fn, sample_size=qp.environ["SAMPLE_SIZE"])
|
||||||
|
|
||||||
|
|
||||||
|
def gen_CAP_cont_table(h) -> [str, CAPContingencyTable]:
|
||||||
|
acc_fn = None
|
||||||
|
yield "Naive", NaiveCAP(h, acc_fn)
|
||||||
|
yield "CT-PPS-EMQ", ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()))
|
||||||
|
# yield 'CT-PPS-KDE', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.01))
|
||||||
|
# yield 'CT-PPS-KDE05', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.05))
|
||||||
|
# yield 'QuAcc(EMQ)nxn-noX', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_posteriors=True, add_X=False)
|
||||||
|
# yield 'QuAcc(EMQ)nxn', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()))
|
||||||
|
# yield 'QuAcc(EMQ)nxn-MC', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxconf=True)
|
||||||
|
# yield 'QuAcc(EMQ)nxn-NE', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_negentropy=True)
|
||||||
|
# yield 'QuAcc(EMQ)nxn-MIS', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxinfsoft=True)
|
||||||
|
# yield 'QuAcc(EMQ)1xn2', QuAcc1xN2(h, acc_fn, EMQ(LogisticRegression()))
|
||||||
|
# yield 'QuAcc(EMQ)1xn2', QuAcc1xN2(h, acc_fn, EMQ(LogisticRegression()))
|
||||||
|
# yield 'CT-PPSh-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()), reuse_h=True)
|
||||||
|
# yield 'Equations-ACCh', NsquaredEquationsCAP(h, acc_fn, ACC, reuse_h=True)
|
||||||
|
# yield 'Equations-ACC', NsquaredEquationsCAP(h, acc_fn, ACC)
|
||||||
|
# yield 'Equations-SLD', NsquaredEquationsCAP(h, acc_fn, EMQ)
|
||||||
|
|
||||||
|
|
||||||
|
def get_method_names():
|
||||||
|
mock_h = LogisticRegression()
|
||||||
|
return [m for m, _ in gen_CAP(mock_h, None)] + [
|
||||||
|
m for m, _ in gen_CAP_cont_table(mock_h)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def gen_acc_measure():
|
||||||
|
yield "vanilla_accuracy", vanilla_acc_fn
|
||||||
|
yield "macro-F1", macrof1_fn
|
||||||
|
|
||||||
|
|
||||||
|
def any_missing(basedir, cls_name, dataset_name, method_name):
|
||||||
|
for acc_name, _ in gen_acc_measure():
|
||||||
|
if not os.path.exists(
|
||||||
|
getpath(basedir, cls_name, acc_name, dataset_name, method_name)
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
return False
|
|
@ -0,0 +1,101 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
from quacc.experiments.util import getpath
|
||||||
|
from quacc.utils.commons import load_json_file, save_json_file
|
||||||
|
|
||||||
|
|
||||||
|
class TestReport:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
cls_name,
|
||||||
|
acc_name,
|
||||||
|
dataset_name,
|
||||||
|
method_name,
|
||||||
|
):
|
||||||
|
self.cls_name = cls_name
|
||||||
|
self.acc_name = acc_name
|
||||||
|
self.dataset_name = dataset_name
|
||||||
|
self.method_name = method_name
|
||||||
|
|
||||||
|
def path(self, basedir):
|
||||||
|
return getpath(
|
||||||
|
basedir, self.cls_name, self.acc_name, self.dataset_name, self.method_name
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_result(self, test_prevs, true_accs, estim_accs, t_train, t_test_ave):
|
||||||
|
self.test_prevs = test_prevs
|
||||||
|
self.true_accs = true_accs
|
||||||
|
self.estim_accs = estim_accs
|
||||||
|
self.t_train = t_train
|
||||||
|
self.t_test_ave = t_test_ave
|
||||||
|
return self
|
||||||
|
|
||||||
|
def save_json(self, basedir):
|
||||||
|
if not all([hasattr(self, _attr) for _attr in ["true_accs", "estim_accs"]]):
|
||||||
|
raise AttributeError("Incomplete report cannot be dumped")
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"cls_name": self.cls_name,
|
||||||
|
"acc_name": self.acc_name,
|
||||||
|
"dataset_name": self.dataset_name,
|
||||||
|
"method_name": self.method_name,
|
||||||
|
"t_train": self.t_train,
|
||||||
|
"t_test_ave": self.t_test,
|
||||||
|
"true_accs": self.true_accs,
|
||||||
|
"estim_accs": self.estim_accs,
|
||||||
|
}
|
||||||
|
|
||||||
|
result_path = self.path(basedir)
|
||||||
|
save_json_file(result_path, result)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def load_json(cls, path) -> "TestReport":
|
||||||
|
def _test_report_hook(_dict):
|
||||||
|
return TestReport(
|
||||||
|
cls_name=_dict["cls_name"],
|
||||||
|
acc_name=_dict["acc_name"],
|
||||||
|
dataset_name=_dict["dataset_name"],
|
||||||
|
method_name=_dict["method_name"],
|
||||||
|
).add_result(
|
||||||
|
true_accs=_dict["true_accs"],
|
||||||
|
estim_accs=_dict["estim_accs"],
|
||||||
|
t_train=_dict["t_train"],
|
||||||
|
t_test_ave=_dict["t_test_ave"],
|
||||||
|
)
|
||||||
|
|
||||||
|
return load_json_file(path, object_hook=_test_report_hook)
|
||||||
|
|
||||||
|
|
||||||
|
class Report:
|
||||||
|
def __init__(self, tests: list[TestReport]):
|
||||||
|
self.tests = tests
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def load_tests(cls, path):
|
||||||
|
if not os.path.isdir(path):
|
||||||
|
raise ValueError("Cannot load test results: invalid directory")
|
||||||
|
|
||||||
|
_tests = []
|
||||||
|
for f in os.listdir(path):
|
||||||
|
if f.endswith(".json"):
|
||||||
|
_tests.append(TestReport.load_json(f))
|
||||||
|
|
||||||
|
return Report(_tests)
|
||||||
|
|
||||||
|
def _filter_by_dataset(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _filer_by_acc(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _filter_by_methods(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def train_table(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test_table(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def shift_table(self):
|
||||||
|
pass
|
|
@ -0,0 +1,124 @@
|
||||||
|
import itertools
|
||||||
|
import os
|
||||||
|
|
||||||
|
import quapy as qp
|
||||||
|
from ClassifierAccuracy.util.plotting import plot_diagonal
|
||||||
|
from quapy.protocol import UPP
|
||||||
|
|
||||||
|
from quacc.dataset import save_dataset_stats
|
||||||
|
from quacc.experiments.generators import (
|
||||||
|
any_missing,
|
||||||
|
gen_acc_measure,
|
||||||
|
gen_bin_datasets,
|
||||||
|
gen_CAP,
|
||||||
|
gen_CAP_cont_table,
|
||||||
|
gen_classifiers,
|
||||||
|
gen_multi_datasets,
|
||||||
|
gen_tweet_datasets,
|
||||||
|
)
|
||||||
|
from quacc.experiments.report import TestReport
|
||||||
|
from quacc.experiments.util import (
|
||||||
|
fit_method,
|
||||||
|
predictionsCAP,
|
||||||
|
predictionsCAPcont_table,
|
||||||
|
prevs_from_prot,
|
||||||
|
true_acc,
|
||||||
|
)
|
||||||
|
|
||||||
|
PROBLEM = "binary"
|
||||||
|
ORACLE = False
|
||||||
|
basedir = PROBLEM + ("-oracle" if ORACLE else "")
|
||||||
|
|
||||||
|
|
||||||
|
if PROBLEM == "binary":
|
||||||
|
qp.environ["SAMPLE_SIZE"] = 1000
|
||||||
|
NUM_TEST = 1000
|
||||||
|
gen_datasets = gen_bin_datasets
|
||||||
|
elif PROBLEM == "multiclass":
|
||||||
|
qp.environ["SAMPLE_SIZE"] = 250
|
||||||
|
NUM_TEST = 1000
|
||||||
|
gen_datasets = gen_multi_datasets
|
||||||
|
elif PROBLEM == "tweet":
|
||||||
|
qp.environ["SAMPLE_SIZE"] = 100
|
||||||
|
NUM_TEST = 1000
|
||||||
|
gen_datasets = gen_tweet_datasets
|
||||||
|
|
||||||
|
|
||||||
|
for (cls_name, h), (dataset_name, (L, V, U)) in itertools.product(
|
||||||
|
gen_classifiers(), gen_datasets()
|
||||||
|
):
|
||||||
|
print(f"training {cls_name} in {dataset_name}")
|
||||||
|
h.fit(*L.Xy)
|
||||||
|
|
||||||
|
# test generation protocol
|
||||||
|
test_prot = UPP(
|
||||||
|
U, repeats=NUM_TEST, return_type="labelled_collection", random_state=0
|
||||||
|
)
|
||||||
|
|
||||||
|
# compute some stats of the dataset
|
||||||
|
save_dataset_stats(f"dataset_stats/{dataset_name}.json", test_prot, L, V)
|
||||||
|
|
||||||
|
# precompute the actual accuracy values
|
||||||
|
true_accs = {}
|
||||||
|
for acc_name, acc_fn in gen_acc_measure():
|
||||||
|
true_accs[acc_name] = [true_acc(h, acc_fn, Ui) for Ui in test_prot()]
|
||||||
|
|
||||||
|
# instances of ClassifierAccuracyPrediction are bound to the evaluation measure, so they
|
||||||
|
# must be nested in the acc-for
|
||||||
|
for acc_name, acc_fn in gen_acc_measure():
|
||||||
|
print(f"\tfor measure {acc_name}")
|
||||||
|
for method_name, method in gen_CAP(h, acc_fn, with_oracle=ORACLE):
|
||||||
|
report = TestReport(cls_name, acc_name, dataset_name, method_name)
|
||||||
|
if os.path.exists(report.path(basedir)):
|
||||||
|
print(f"\t\t{method_name}-{acc_name} exists, skipping")
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f"\t\t{method_name} computing...")
|
||||||
|
method, t_train = fit_method(method, V)
|
||||||
|
estim_accs, t_test_ave = predictionsCAP(method, test_prot, ORACLE)
|
||||||
|
test_prevs = prevs_from_prot(test_prot)
|
||||||
|
report.add_result(
|
||||||
|
test_prevs=test_prevs,
|
||||||
|
true_accs=true_accs[acc_name],
|
||||||
|
estim_accs=estim_accs,
|
||||||
|
t_train=t_train,
|
||||||
|
t_test_ave=t_test_ave,
|
||||||
|
).save_json(basedir)
|
||||||
|
|
||||||
|
# instances of CAPContingencyTable instead are generic, and the evaluation measure can
|
||||||
|
# be nested to the predictions to speed up things
|
||||||
|
for method_name, method in gen_CAP_cont_table(h):
|
||||||
|
if not any_missing(basedir, cls_name, dataset_name, method_name):
|
||||||
|
print(
|
||||||
|
f"\t\tmethod {method_name} has all results already computed. Skipping."
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f"\t\tmethod {method_name} computing...")
|
||||||
|
|
||||||
|
method, t_train = fit_method(method, V)
|
||||||
|
estim_accs_dict, t_test_ave = predictionsCAPcont_table(
|
||||||
|
method, test_prot, gen_acc_measure, ORACLE
|
||||||
|
)
|
||||||
|
for acc_name in estim_accs_dict.keys():
|
||||||
|
report = TestReport(cls_name, acc_name, dataset_name, method_name)
|
||||||
|
report.add_result(
|
||||||
|
true_accs=true_accs[acc_name],
|
||||||
|
estim_accs=estim_accs,
|
||||||
|
t_train=t_train,
|
||||||
|
t_test_ave=t_test_ave,
|
||||||
|
).save_json(basedir)
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
# generate diagonal plots
|
||||||
|
print("generating plots")
|
||||||
|
for (cls_name, _), (acc_name, _) in itertools.product(
|
||||||
|
gen_classifiers(), gen_acc_measure()
|
||||||
|
):
|
||||||
|
plot_diagonal(basedir, cls_name, acc_name)
|
||||||
|
for dataset_name, _ in gen_datasets(only_names=True):
|
||||||
|
plot_diagonal(basedir, cls_name, acc_name, dataset_name=dataset_name)
|
||||||
|
|
||||||
|
print("generating tables")
|
||||||
|
# gen_tables(basedir, datasets=[d for d, _ in gen_datasets(only_names=True)])
|
|
@ -0,0 +1,62 @@
|
||||||
|
import os
|
||||||
|
from time import time
|
||||||
|
|
||||||
|
from quapy.data.base import LabelledCollection
|
||||||
|
from sklearn.base import BaseEstimator
|
||||||
|
from sklearn.metrics import confusion_matrix
|
||||||
|
|
||||||
|
|
||||||
|
def getpath(basedir, cls_name, acc_name, dataset_name, method_name):
|
||||||
|
return f"results/{basedir}/{cls_name}/{acc_name}/{dataset_name}/{method_name}.json"
|
||||||
|
|
||||||
|
|
||||||
|
def fit_method(method, V):
|
||||||
|
tinit = time()
|
||||||
|
method.fit(V)
|
||||||
|
t_train = time() - tinit
|
||||||
|
return method, t_train
|
||||||
|
|
||||||
|
|
||||||
|
def predictionsCAP(method, test_prot, oracle=False):
|
||||||
|
tinit = time()
|
||||||
|
if not oracle:
|
||||||
|
estim_accs = [method.predict(Ui.X) for Ui in test_prot()]
|
||||||
|
else:
|
||||||
|
estim_accs = [
|
||||||
|
method.predict(Ui.X, oracle_prev=Ui.prevalence()) for Ui in test_prot()
|
||||||
|
]
|
||||||
|
t_test_ave = (time() - tinit) / test_prot.total()
|
||||||
|
return estim_accs, t_test_ave
|
||||||
|
|
||||||
|
|
||||||
|
def predictionsCAPcont_table(method, test_prot, gen_acc_measure, oracle=False):
|
||||||
|
estim_accs_dict = {}
|
||||||
|
tinit = time()
|
||||||
|
if not oracle:
|
||||||
|
estim_tables = [method.predict_ct(Ui.X) for Ui in test_prot()]
|
||||||
|
else:
|
||||||
|
estim_tables = [
|
||||||
|
method.predict_ct(Ui.X, oracle_prev=Ui.prevalence()) for Ui in test_prot()
|
||||||
|
]
|
||||||
|
for acc_name, acc_fn in gen_acc_measure():
|
||||||
|
estim_accs_dict[acc_name] = [acc_fn(cont_table) for cont_table in estim_tables]
|
||||||
|
t_test_ave = (time() - tinit) / test_prot.total()
|
||||||
|
return estim_accs_dict, t_test_ave
|
||||||
|
|
||||||
|
|
||||||
|
def prevs_from_prot(prot):
|
||||||
|
return [Ui.prevalence() for Ui in prot()]
|
||||||
|
|
||||||
|
|
||||||
|
def true_acc(h: BaseEstimator, acc_fn: callable, U: LabelledCollection):
|
||||||
|
y_pred = h.predict(U.X)
|
||||||
|
y_true = U.y
|
||||||
|
conf_table = confusion_matrix(y_true, y_pred=y_pred, labels=U.classes_)
|
||||||
|
return acc_fn(conf_table)
|
||||||
|
|
||||||
|
|
||||||
|
def get_acc_name(acc_name):
|
||||||
|
return {
|
||||||
|
"Vanilla Accuracy": "vanilla_accuracy",
|
||||||
|
"Macro F1": "macro-F1",
|
||||||
|
}
|
Loading…
Reference in New Issue