From 6afe3ddb314976c1a836a881c846d4f3e56a20de Mon Sep 17 00:00:00 2001 From: Lorenzo Volpi Date: Thu, 1 Feb 2024 12:24:16 +0100 Subject: [PATCH] stats updated, naive fixes --- conf.yaml | 37 +++++++++++++++++++++++++++ copy_res.sh | 9 +++++++ quacc/evaluation/baseline.py | 12 ++++----- quacc/evaluation/method.py | 49 +++++++++++++++++++++++++++++++++++- quacc/evaluation/report.py | 5 ++++ quacc/evaluation/stats.py | 1 + 6 files changed, 106 insertions(+), 7 deletions(-) create mode 100755 copy_res.sh diff --git a/conf.yaml b/conf.yaml index e6e7f4b..4dbbccd 100644 --- a/conf.yaml +++ b/conf.yaml @@ -338,6 +338,43 @@ d_kde_rbf_conf: &d_kde_rbf_conf - DATASET_NAME: rcv1 DATASET_TARGET: CCAT +cc_lr_conf: &cc_lr_conf + global: + METRICS: + - acc + - f1 + OUT_DIR_NAME: output/cc_lr + DATASET_N_PREVS: 9 + COMP_ESTIMATORS: + # - bin_cc_lr + # - mul_cc_lr + # - m3w_cc_lr + # - bin_cc_lr_c + # - mul_cc_lr_c + # - m3w_cc_lr_c + # - bin_cc_lr_mc + # - mul_cc_lr_mc + # - m3w_cc_lr_mc + # - bin_cc_lr_ne + # - mul_cc_lr_ne + # - m3w_cc_lr_ne + # - bin_cc_lr_is + # - mul_cc_lr_is + # - m3w_cc_lr_is + # - bin_cc_lr_a + # - mul_cc_lr_a + # - m3w_cc_lr_a + - bin_cc_lr_gs + - mul_cc_lr_gs + - m3w_cc_lr_gs + N_JOBS: -2 + + confs: *main_confs + other_confs: + - DATASET_NAME: imdb + - DATASET_NAME: rcv1 + DATASET_TARGET: CCAT + baselines_conf: &baselines_conf global: METRICS: diff --git a/copy_res.sh b/copy_res.sh new file mode 100755 index 0000000..1eb0d4e --- /dev/null +++ b/copy_res.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/kde_lr_gs ./output/ +# scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/baselines ./output/ +scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/cc_lr ./output/ + +# scp -r ./output/kde_lr_gs volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/ +# scp -r ./output/baselines volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/ +scp -r ./output/cc_lr volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/ diff --git a/quacc/evaluation/baseline.py b/quacc/evaluation/baseline.py index 436dd47..7af7804 100644 --- a/quacc/evaluation/baseline.py +++ b/quacc/evaluation/baseline.py @@ -85,14 +85,14 @@ def naive( report = EvaluationReport(name="naive") for test in protocol(): test_preds = c_model_predict(test.X) - acc_score = metrics.accuracy_score(test.y, test_preds) - f1_score = metrics.f1_score(test.y, test_preds, average=f1_average) - meta_acc = abs(val_acc - acc_score) - meta_f1 = abs(val_f1 - f1_score) + test_acc = metrics.accuracy_score(test.y, test_preds) + test_f1 = metrics.f1_score(test.y, test_preds, average=f1_average) + meta_acc = abs(val_acc - test_acc) + meta_f1 = abs(val_f1 - test_f1) report.append_row( test.prevalence(), - acc_score=acc_score, - f1_score=f1_score, + acc_score=val_acc, + f1_score=val_f1, acc=meta_acc, f1=meta_f1, ) diff --git a/quacc/evaluation/method.py b/quacc/evaluation/method.py index 9942b65..de1025b 100644 --- a/quacc/evaluation/method.py +++ b/quacc/evaluation/method.py @@ -3,7 +3,7 @@ from typing import Callable, List, Union import numpy as np from matplotlib.pylab import rand -from quapy.method.aggregative import PACC, SLD, BaseQuantifier +from quapy.method.aggregative import CC, PACC, SLD, BaseQuantifier from quapy.protocol import UPP, AbstractProtocol, OnLabelledCollectionProtocol from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC, LinearSVC @@ -53,6 +53,17 @@ def _param_grid(method, X_fit: np.ndarray): "q__classifier__class_weight": [None, "balanced"], "confidence": [None, ["isoft"], ["max_conf", "entropy"]], } + case "cc_lr": + return { + "q__classifier__C": np.logspace(-3, 3, 7), + "q__classifier__class_weight": [None, "balanced"], + "confidence": [ + None, + ["isoft"], + ["max_conf", "entropy"], + ["max_conf", "entropy", "isoft"], + ], + } case "kde_lr": return { "q__classifier__C": np.logspace(-3, 3, 7), @@ -219,6 +230,10 @@ def __pacc_lr(): return PACC(LogisticRegression()) +def __cc_lr(): + return CC(LogisticRegression()) + + # fmt: off __sld_lr_set = [ @@ -448,6 +463,37 @@ __dense_kde_rbf_set = [ G("d_m3w_kde_rbf_gs", __kde_rbf(), "mul", d=True, pg="kde_rbf", search="spider", cf=True), ] +__cc_lr_set = [ + # base cc + M("bin_cc_lr", __cc_lr(), "bin" ), + M("mul_cc_lr", __cc_lr(), "mul" ), + M("m3w_cc_lr", __cc_lr(), "mul", cf=True), + # max_conf + entropy cc + M("bin_cc_lr_c", __cc_lr(), "bin", conf=["max_conf", "entropy"] ), + M("mul_cc_lr_c", __cc_lr(), "mul", conf=["max_conf", "entropy"] ), + M("m3w_cc_lr_c", __cc_lr(), "mul", conf=["max_conf", "entropy"], cf=True), + # max_conf cc + M("bin_cc_lr_mc", __cc_lr(), "bin", conf="max_conf", ), + M("mul_cc_lr_mc", __cc_lr(), "mul", conf="max_conf", ), + M("m3w_cc_lr_mc", __cc_lr(), "mul", conf="max_conf", cf=True), + # entropy cc + M("bin_cc_lr_ne", __cc_lr(), "bin", conf="entropy", ), + M("mul_cc_lr_ne", __cc_lr(), "mul", conf="entropy", ), + M("m3w_cc_lr_ne", __cc_lr(), "mul", conf="entropy", cf=True), + # inverse softmax cc + M("bin_cc_lr_is", __cc_lr(), "bin", conf="isoft", ), + M("mul_cc_lr_is", __cc_lr(), "mul", conf="isoft", ), + M("m3w_cc_lr_is", __cc_lr(), "mul", conf="isoft", cf=True), + # cc all + M("bin_cc_lr_a", __cc_lr(), "bin", conf=["max_conf", "entropy", "isoft"], ), + M("mul_cc_lr_a", __cc_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ), + M("m3w_cc_lr_a", __cc_lr(), "mul", conf=["max_conf", "entropy", "isoft"], cf=True), + # gs cc + G("bin_cc_lr_gs", __cc_lr(), "bin", pg="cc_lr", search="grid" ), + G("mul_cc_lr_gs", __cc_lr(), "mul", pg="cc_lr", search="grid" ), + G("m3w_cc_lr_gs", __cc_lr(), "mul", pg="cc_lr", search="grid", cf=True), + E("cc_lr_gs"), +] # fmt: on @@ -458,6 +504,7 @@ __methods_set = ( + __kde_lr_set + __dense_kde_lr_set + __dense_kde_rbf_set + + __cc_lr_set + [E("QuAcc")] ) diff --git a/quacc/evaluation/report.py b/quacc/evaluation/report.py index bfe394e..20df414 100644 --- a/quacc/evaluation/report.py +++ b/quacc/evaluation/report.py @@ -140,6 +140,11 @@ class CompReport: "mul_kde_lr_gs", "m3w_kde_lr_gs", ], + "cc_lr_gs": [ + "bin_cc_lr_gs", + "mul_cc_lr_gs", + "m3w_cc_lr_gs", + ], "QuAcc": [ "bin_sld_lr_gs", "mul_sld_lr_gs", diff --git a/quacc/evaluation/stats.py b/quacc/evaluation/stats.py index 1bf12d6..3ef3365 100644 --- a/quacc/evaluation/stats.py +++ b/quacc/evaluation/stats.py @@ -25,6 +25,7 @@ def wilcoxon( ) -> pd.DataFrame: _data = r.data(metric, estimators) + _data = _data.dropna(axis=0, how="any") _wilcoxon = {} for est in _data.columns.unique(0): _wilcoxon[est] = [