docker merged
This commit is contained in:
commit
5d82419ce8
76
conf.yaml
76
conf.yaml
|
@ -72,6 +72,10 @@ test_conf: &test_conf
|
||||||
main:
|
main:
|
||||||
confs: &main_confs
|
confs: &main_confs
|
||||||
- DATASET_NAME: imdb
|
- DATASET_NAME: imdb
|
||||||
|
<<<<<<< HEAD
|
||||||
|
=======
|
||||||
|
other_confs:
|
||||||
|
>>>>>>> docker
|
||||||
- DATASET_NAME: rcv1
|
- DATASET_NAME: rcv1
|
||||||
DATASET_TARGET: CCAT
|
DATASET_TARGET: CCAT
|
||||||
- DATASET_NAME: rcv1
|
- DATASET_NAME: rcv1
|
||||||
|
@ -338,6 +342,43 @@ d_kde_rbf_conf: &d_kde_rbf_conf
|
||||||
- DATASET_NAME: rcv1
|
- DATASET_NAME: rcv1
|
||||||
DATASET_TARGET: CCAT
|
DATASET_TARGET: CCAT
|
||||||
|
|
||||||
|
cc_lr_conf: &cc_lr_conf
|
||||||
|
global:
|
||||||
|
METRICS:
|
||||||
|
- acc
|
||||||
|
- f1
|
||||||
|
OUT_DIR_NAME: output/cc_lr
|
||||||
|
DATASET_N_PREVS: 9
|
||||||
|
COMP_ESTIMATORS:
|
||||||
|
# - bin_cc_lr
|
||||||
|
# - mul_cc_lr
|
||||||
|
# - m3w_cc_lr
|
||||||
|
# - bin_cc_lr_c
|
||||||
|
# - mul_cc_lr_c
|
||||||
|
# - m3w_cc_lr_c
|
||||||
|
# - bin_cc_lr_mc
|
||||||
|
# - mul_cc_lr_mc
|
||||||
|
# - m3w_cc_lr_mc
|
||||||
|
# - bin_cc_lr_ne
|
||||||
|
# - mul_cc_lr_ne
|
||||||
|
# - m3w_cc_lr_ne
|
||||||
|
# - bin_cc_lr_is
|
||||||
|
# - mul_cc_lr_is
|
||||||
|
# - m3w_cc_lr_is
|
||||||
|
# - bin_cc_lr_a
|
||||||
|
# - mul_cc_lr_a
|
||||||
|
# - m3w_cc_lr_a
|
||||||
|
- bin_cc_lr_gs
|
||||||
|
- mul_cc_lr_gs
|
||||||
|
- m3w_cc_lr_gs
|
||||||
|
N_JOBS: -2
|
||||||
|
|
||||||
|
confs: *main_confs
|
||||||
|
other_confs:
|
||||||
|
- DATASET_NAME: imdb
|
||||||
|
- DATASET_NAME: rcv1
|
||||||
|
DATASET_TARGET: CCAT
|
||||||
|
|
||||||
baselines_conf: &baselines_conf
|
baselines_conf: &baselines_conf
|
||||||
global:
|
global:
|
||||||
METRICS:
|
METRICS:
|
||||||
|
@ -349,9 +390,12 @@ baselines_conf: &baselines_conf
|
||||||
- doc
|
- doc
|
||||||
- atc_mc
|
- atc_mc
|
||||||
- naive
|
- naive
|
||||||
|
<<<<<<< HEAD
|
||||||
# - mandoline
|
# - mandoline
|
||||||
# - rca
|
# - rca
|
||||||
# - rca_star
|
# - rca_star
|
||||||
|
=======
|
||||||
|
>>>>>>> docker
|
||||||
N_JOBS: -2
|
N_JOBS: -2
|
||||||
|
|
||||||
confs: *main_confs
|
confs: *main_confs
|
||||||
|
@ -389,22 +433,34 @@ timing_conf: &timing_conf
|
||||||
- bin_kde_lr_a
|
- bin_kde_lr_a
|
||||||
- mul_kde_lr_a
|
- mul_kde_lr_a
|
||||||
- m3w_kde_lr_a
|
- m3w_kde_lr_a
|
||||||
|
- doc
|
||||||
|
- atc_mc
|
||||||
|
- rca
|
||||||
|
- rca_star
|
||||||
|
- mandoline
|
||||||
|
- naive
|
||||||
|
N_JOBS: 1
|
||||||
|
PROTOCOL_REPEATS: 1
|
||||||
|
|
||||||
|
confs: *main_confs
|
||||||
|
|
||||||
|
timing_gs_conf: &timing_gs_conf
|
||||||
|
global:
|
||||||
|
METRICS:
|
||||||
|
- acc
|
||||||
|
- f1
|
||||||
|
OUT_DIR_NAME: output/timing_gs
|
||||||
|
DATASET_N_PREVS: 1
|
||||||
|
COMP_ESTIMATORS:
|
||||||
- bin_sld_lr_gs
|
- bin_sld_lr_gs
|
||||||
- mul_sld_lr_gs
|
- mul_sld_lr_gs
|
||||||
- m3w_sld_lr_gs
|
- m3w_sld_lr_gs
|
||||||
- bin_kde_lr_gs
|
- bin_kde_lr_gs
|
||||||
- mul_kde_lr_gs
|
- mul_kde_lr_gs
|
||||||
- m3w_kde_lr_gs
|
- m3w_kde_lr_gs
|
||||||
- doc
|
N_JOBS: -1
|
||||||
- atc_mc
|
PROTOCOL_REPEATS: 1
|
||||||
- rca
|
|
||||||
- rca_star
|
|
||||||
- mandoline
|
|
||||||
N_JOBS: 1
|
|
||||||
PROTOCOL_N_PREVS: 1,
|
|
||||||
PROTOCOL_REPEATS: 1,
|
|
||||||
SAMPLE_SIZE: 1000,
|
|
||||||
|
|
||||||
confs: *main_confs
|
confs: *main_confs
|
||||||
|
|
||||||
exec: *baselines_conf
|
exec: *timing_gs_conf
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/kde_lr_gs ./output/
|
||||||
|
# scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/cc_lr ./output/
|
||||||
|
scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/baselines ./output/
|
||||||
|
|
||||||
|
# scp -r ./output/kde_lr_gs volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
|
||||||
|
# scp -r ./output/cc_lr volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
|
||||||
|
scp -r ./output/baselines volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
|
2
log
2
log
|
@ -3,6 +3,8 @@
|
||||||
if [[ "${1}" == "r" ]]; then
|
if [[ "${1}" == "r" ]]; then
|
||||||
scp volpi@ilona.isti.cnr.it:~/tesi/quacc.log ~/tesi/remote.log &>/dev/null
|
scp volpi@ilona.isti.cnr.it:~/tesi/quacc.log ~/tesi/remote.log &>/dev/null
|
||||||
ssh volpi@ilona.isti.cnr.it tail -n 500 -f /home/volpi/tesi/quacc.log | bat -P --language=log
|
ssh volpi@ilona.isti.cnr.it tail -n 500 -f /home/volpi/tesi/quacc.log | bat -P --language=log
|
||||||
|
elif [[ "${1}" == "d" ]]; then
|
||||||
|
ssh andreaesuli@edge-nd1.isti.cnr.it tail -n 500 -f /home/andreaesuli/raid/lorenzo/quacc.log | bat -P --language=log
|
||||||
else
|
else
|
||||||
tail -n 500 -f /home/lorev/tesi/quacc.log | bat --paging=never --language log
|
tail -n 500 -f /home/lorev/tesi/quacc.log | bat --paging=never --language log
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -13,7 +13,7 @@ from dash import Dash, Input, Output, State, callback, ctx, dash_table, dcc, htm
|
||||||
from dash.dash_table.Format import Align, Format, Scheme
|
from dash.dash_table.Format import Align, Format, Scheme
|
||||||
|
|
||||||
from quacc import plot
|
from quacc import plot
|
||||||
from quacc.evaluation.estimators import CE
|
from quacc.evaluation.estimators import CE, _renames
|
||||||
from quacc.evaluation.report import CompReport, DatasetReport
|
from quacc.evaluation.report import CompReport, DatasetReport
|
||||||
from quacc.evaluation.stats import wilcoxon
|
from quacc.evaluation.stats import wilcoxon
|
||||||
|
|
||||||
|
@ -26,6 +26,23 @@ def _get_prev_str(prev: np.ndarray):
|
||||||
return str(tuple(np.around(prev, decimals=2)))
|
return str(tuple(np.around(prev, decimals=2)))
|
||||||
|
|
||||||
|
|
||||||
|
def rename_estimators(estimators, rev=False):
|
||||||
|
_rnm = _renames
|
||||||
|
if rev:
|
||||||
|
_rnm = {v: k for k, v in _renames.items()}
|
||||||
|
|
||||||
|
new_estimators = []
|
||||||
|
for c in estimators:
|
||||||
|
nc = c
|
||||||
|
for old, new in _rnm.items():
|
||||||
|
if c.startswith(old):
|
||||||
|
nc = new + c[len(old) :]
|
||||||
|
|
||||||
|
new_estimators.append(nc)
|
||||||
|
|
||||||
|
return new_estimators
|
||||||
|
|
||||||
|
|
||||||
def get_datasets(root: str | Path) -> List[DatasetReport]:
|
def get_datasets(root: str | Path) -> List[DatasetReport]:
|
||||||
def load_dataset(dataset):
|
def load_dataset(dataset):
|
||||||
dataset = Path(dataset)
|
dataset = Path(dataset)
|
||||||
|
@ -153,7 +170,7 @@ def get_DataTable(df, mode):
|
||||||
columns = {
|
columns = {
|
||||||
c: dict(
|
c: dict(
|
||||||
id=c,
|
id=c,
|
||||||
name=_index_name[mode] if c == "index" else c,
|
name=_index_name[mode] if c == "index" else rename_estimators([c])[0],
|
||||||
type="numeric",
|
type="numeric",
|
||||||
format=columns_format,
|
format=columns_format,
|
||||||
)
|
)
|
||||||
|
@ -412,12 +429,13 @@ def update_estimators(href, dataset, metric, curr_estimators, root):
|
||||||
old_estimators = json.loads(old_estimators)
|
old_estimators = json.loads(old_estimators)
|
||||||
except JSONDecodeError:
|
except JSONDecodeError:
|
||||||
old_estimators = []
|
old_estimators = []
|
||||||
|
old_estimators = rename_estimators(old_estimators, rev=True)
|
||||||
valid_estimators: np.ndarray = dr.data(metric=metric).columns.unique(0).to_numpy()
|
valid_estimators: np.ndarray = dr.data(metric=metric).columns.unique(0).to_numpy()
|
||||||
new_estimators = valid_estimators[
|
new_estimators = valid_estimators[
|
||||||
np.isin(valid_estimators, old_estimators)
|
np.isin(valid_estimators, old_estimators)
|
||||||
].tolist()
|
].tolist()
|
||||||
valid_estimators = CE.name.sort(valid_estimators.tolist())
|
valid_estimators = CE.name.sort(valid_estimators.tolist())
|
||||||
return valid_estimators, new_estimators
|
return rename_estimators(valid_estimators), rename_estimators(new_estimators)
|
||||||
|
|
||||||
|
|
||||||
@callback(
|
@callback(
|
||||||
|
@ -473,6 +491,7 @@ def update_content(dataset, metric, estimators, view, mode, root):
|
||||||
quote_via=quote,
|
quote_via=quote,
|
||||||
)
|
)
|
||||||
dr = get_dr(root, dataset)
|
dr = get_dr(root, dataset)
|
||||||
|
estimators = rename_estimators(estimators, rev=True)
|
||||||
match mode:
|
match mode:
|
||||||
case m if m.endswith("table"):
|
case m if m.endswith("table"):
|
||||||
df = get_table(
|
df = get_table(
|
||||||
|
|
|
@ -126,7 +126,9 @@ class DatasetProvider:
|
||||||
|
|
||||||
# provare min_df=5
|
# provare min_df=5
|
||||||
def __imdb(self, **kwargs):
|
def __imdb(self, **kwargs):
|
||||||
return qp.datasets.fetch_reviews("imdb", tfidf=True, min_df=3).train_test
|
return qp.datasets.fetch_reviews(
|
||||||
|
"imdb", data_home="./quapy_data", tfidf=True, min_df=3
|
||||||
|
).train_test
|
||||||
|
|
||||||
def __rcv1(self, target, **kwargs):
|
def __rcv1(self, target, **kwargs):
|
||||||
n_train = 23149
|
n_train = 23149
|
||||||
|
@ -135,7 +137,7 @@ class DatasetProvider:
|
||||||
if target is None or target not in available_targets:
|
if target is None or target not in available_targets:
|
||||||
raise ValueError(f"Invalid target {target}")
|
raise ValueError(f"Invalid target {target}")
|
||||||
|
|
||||||
dataset = fetch_rcv1()
|
dataset = fetch_rcv1(data_home="./scikit_learn_data")
|
||||||
target_index = np.where(dataset.target_names == target)[0]
|
target_index = np.where(dataset.target_names == target)[0]
|
||||||
all_train_d = dataset.data[:n_train, :]
|
all_train_d = dataset.data[:n_train, :]
|
||||||
test_d = dataset.data[n_train:, :]
|
test_d = dataset.data[n_train:, :]
|
||||||
|
|
|
@ -85,14 +85,14 @@ def naive(
|
||||||
report = EvaluationReport(name="naive")
|
report = EvaluationReport(name="naive")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
test_preds = c_model_predict(test.X)
|
test_preds = c_model_predict(test.X)
|
||||||
acc_score = metrics.accuracy_score(test.y, test_preds)
|
test_acc = metrics.accuracy_score(test.y, test_preds)
|
||||||
f1_score = metrics.f1_score(test.y, test_preds, average=f1_average)
|
test_f1 = metrics.f1_score(test.y, test_preds, average=f1_average)
|
||||||
meta_acc = abs(val_acc - acc_score)
|
meta_acc = abs(val_acc - test_acc)
|
||||||
meta_f1 = abs(val_f1 - f1_score)
|
meta_f1 = abs(val_f1 - test_f1)
|
||||||
report.append_row(
|
report.append_row(
|
||||||
test.prevalence(),
|
test.prevalence(),
|
||||||
acc_score=acc_score,
|
acc_score=val_acc,
|
||||||
f1_score=f1_score,
|
f1_score=val_f1,
|
||||||
acc=meta_acc,
|
acc=meta_acc,
|
||||||
f1=meta_f1,
|
f1=meta_f1,
|
||||||
)
|
)
|
||||||
|
|
|
@ -78,3 +78,33 @@ class CompEstimator:
|
||||||
|
|
||||||
|
|
||||||
CE = CompEstimator()
|
CE = CompEstimator()
|
||||||
|
|
||||||
|
_renames = {
|
||||||
|
"bin_sld_lr": "(2x2)_SLD_LR",
|
||||||
|
"mul_sld_lr": "(1x4)_SLD_LR",
|
||||||
|
"m3w_sld_lr": "(1x3)_SLD_LR",
|
||||||
|
"d_bin_sld_lr": "d_(2x2)_SLD_LR",
|
||||||
|
"d_mul_sld_lr": "d_(1x4)_SLD_LR",
|
||||||
|
"d_m3w_sld_lr": "d_(1x3)_SLD_LR",
|
||||||
|
"d_bin_sld_rbf": "(2x2)_SLD_RBF",
|
||||||
|
"d_mul_sld_rbf": "(1x4)_SLD_RBF",
|
||||||
|
"d_m3w_sld_rbf": "(1x3)_SLD_RBF",
|
||||||
|
"sld_lr": "SLD_LR",
|
||||||
|
"bin_kde_lr": "(2x2)_KDEy_LR",
|
||||||
|
"mul_kde_lr": "(1x4)_KDEy_LR",
|
||||||
|
"m3w_kde_lr": "(1x3)_KDEy_LR",
|
||||||
|
"d_bin_kde_lr": "d_(2x2)_KDEy_LR",
|
||||||
|
"d_mul_kde_lr": "d_(1x4)_KDEy_LR",
|
||||||
|
"d_m3w_kde_lr": "d_(1x3)_KDEy_LR",
|
||||||
|
"bin_cc_lr": "(2x2)_CC_LR",
|
||||||
|
"mul_cc_lr": "(1x4)_CC_LR",
|
||||||
|
"m3w_cc_lr": "(1x3)_CC_LR",
|
||||||
|
"kde_lr": "KDEy_LR",
|
||||||
|
"cc_lr": "CC_LR",
|
||||||
|
"atc_mc": "ATC",
|
||||||
|
"doc": "DoC",
|
||||||
|
"mandoline": "Mandoline",
|
||||||
|
"rca": "RCA",
|
||||||
|
"rca_star": "RCA*",
|
||||||
|
"naive": "Naive",
|
||||||
|
}
|
||||||
|
|
|
@ -3,7 +3,7 @@ from typing import Callable, List, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from matplotlib.pylab import rand
|
from matplotlib.pylab import rand
|
||||||
from quapy.method.aggregative import PACC, SLD, BaseQuantifier
|
from quapy.method.aggregative import CC, PACC, SLD, BaseQuantifier
|
||||||
from quapy.protocol import UPP, AbstractProtocol, OnLabelledCollectionProtocol
|
from quapy.protocol import UPP, AbstractProtocol, OnLabelledCollectionProtocol
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
from sklearn.svm import SVC, LinearSVC
|
from sklearn.svm import SVC, LinearSVC
|
||||||
|
@ -53,6 +53,17 @@ def _param_grid(method, X_fit: np.ndarray):
|
||||||
"q__classifier__class_weight": [None, "balanced"],
|
"q__classifier__class_weight": [None, "balanced"],
|
||||||
"confidence": [None, ["isoft"], ["max_conf", "entropy"]],
|
"confidence": [None, ["isoft"], ["max_conf", "entropy"]],
|
||||||
}
|
}
|
||||||
|
case "cc_lr":
|
||||||
|
return {
|
||||||
|
"q__classifier__C": np.logspace(-3, 3, 7),
|
||||||
|
"q__classifier__class_weight": [None, "balanced"],
|
||||||
|
"confidence": [
|
||||||
|
None,
|
||||||
|
["isoft"],
|
||||||
|
["max_conf", "entropy"],
|
||||||
|
["max_conf", "entropy", "isoft"],
|
||||||
|
],
|
||||||
|
}
|
||||||
case "kde_lr":
|
case "kde_lr":
|
||||||
return {
|
return {
|
||||||
"q__classifier__C": np.logspace(-3, 3, 7),
|
"q__classifier__C": np.logspace(-3, 3, 7),
|
||||||
|
@ -219,6 +230,10 @@ def __pacc_lr():
|
||||||
return PACC(LogisticRegression())
|
return PACC(LogisticRegression())
|
||||||
|
|
||||||
|
|
||||||
|
def __cc_lr():
|
||||||
|
return CC(LogisticRegression())
|
||||||
|
|
||||||
|
|
||||||
# fmt: off
|
# fmt: off
|
||||||
|
|
||||||
__sld_lr_set = [
|
__sld_lr_set = [
|
||||||
|
@ -380,9 +395,9 @@ __kde_lr_set = [
|
||||||
M("mul_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ),
|
M("mul_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ),
|
||||||
M("m3w_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], cf=True),
|
M("m3w_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], cf=True),
|
||||||
# gs kde
|
# gs kde
|
||||||
G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="spider" ),
|
G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="grid" ),
|
||||||
G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider" ),
|
G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid" ),
|
||||||
G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider", cf=True),
|
G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid", cf=True),
|
||||||
E("kde_lr_gs"),
|
E("kde_lr_gs"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -448,6 +463,37 @@ __dense_kde_rbf_set = [
|
||||||
G("d_m3w_kde_rbf_gs", __kde_rbf(), "mul", d=True, pg="kde_rbf", search="spider", cf=True),
|
G("d_m3w_kde_rbf_gs", __kde_rbf(), "mul", d=True, pg="kde_rbf", search="spider", cf=True),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
__cc_lr_set = [
|
||||||
|
# base cc
|
||||||
|
M("bin_cc_lr", __cc_lr(), "bin" ),
|
||||||
|
M("mul_cc_lr", __cc_lr(), "mul" ),
|
||||||
|
M("m3w_cc_lr", __cc_lr(), "mul", cf=True),
|
||||||
|
# max_conf + entropy cc
|
||||||
|
M("bin_cc_lr_c", __cc_lr(), "bin", conf=["max_conf", "entropy"] ),
|
||||||
|
M("mul_cc_lr_c", __cc_lr(), "mul", conf=["max_conf", "entropy"] ),
|
||||||
|
M("m3w_cc_lr_c", __cc_lr(), "mul", conf=["max_conf", "entropy"], cf=True),
|
||||||
|
# max_conf cc
|
||||||
|
M("bin_cc_lr_mc", __cc_lr(), "bin", conf="max_conf", ),
|
||||||
|
M("mul_cc_lr_mc", __cc_lr(), "mul", conf="max_conf", ),
|
||||||
|
M("m3w_cc_lr_mc", __cc_lr(), "mul", conf="max_conf", cf=True),
|
||||||
|
# entropy cc
|
||||||
|
M("bin_cc_lr_ne", __cc_lr(), "bin", conf="entropy", ),
|
||||||
|
M("mul_cc_lr_ne", __cc_lr(), "mul", conf="entropy", ),
|
||||||
|
M("m3w_cc_lr_ne", __cc_lr(), "mul", conf="entropy", cf=True),
|
||||||
|
# inverse softmax cc
|
||||||
|
M("bin_cc_lr_is", __cc_lr(), "bin", conf="isoft", ),
|
||||||
|
M("mul_cc_lr_is", __cc_lr(), "mul", conf="isoft", ),
|
||||||
|
M("m3w_cc_lr_is", __cc_lr(), "mul", conf="isoft", cf=True),
|
||||||
|
# cc all
|
||||||
|
M("bin_cc_lr_a", __cc_lr(), "bin", conf=["max_conf", "entropy", "isoft"], ),
|
||||||
|
M("mul_cc_lr_a", __cc_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ),
|
||||||
|
M("m3w_cc_lr_a", __cc_lr(), "mul", conf=["max_conf", "entropy", "isoft"], cf=True),
|
||||||
|
# gs cc
|
||||||
|
G("bin_cc_lr_gs", __cc_lr(), "bin", pg="cc_lr", search="grid" ),
|
||||||
|
G("mul_cc_lr_gs", __cc_lr(), "mul", pg="cc_lr", search="grid" ),
|
||||||
|
G("m3w_cc_lr_gs", __cc_lr(), "mul", pg="cc_lr", search="grid", cf=True),
|
||||||
|
E("cc_lr_gs"),
|
||||||
|
]
|
||||||
|
|
||||||
# fmt: on
|
# fmt: on
|
||||||
|
|
||||||
|
@ -458,6 +504,8 @@ __methods_set = (
|
||||||
+ __kde_lr_set
|
+ __kde_lr_set
|
||||||
+ __dense_kde_lr_set
|
+ __dense_kde_lr_set
|
||||||
+ __dense_kde_rbf_set
|
+ __dense_kde_rbf_set
|
||||||
|
+ __cc_lr_set
|
||||||
|
+ [E("QuAcc")]
|
||||||
)
|
)
|
||||||
|
|
||||||
_methods = {m.name: m for m in __methods_set}
|
_methods = {m.name: m for m in __methods_set}
|
||||||
|
|
|
@ -140,6 +140,19 @@ class CompReport:
|
||||||
"mul_kde_lr_gs",
|
"mul_kde_lr_gs",
|
||||||
"m3w_kde_lr_gs",
|
"m3w_kde_lr_gs",
|
||||||
],
|
],
|
||||||
|
"cc_lr_gs": [
|
||||||
|
"bin_cc_lr_gs",
|
||||||
|
"mul_cc_lr_gs",
|
||||||
|
"m3w_cc_lr_gs",
|
||||||
|
],
|
||||||
|
"QuAcc": [
|
||||||
|
"bin_sld_lr_gs",
|
||||||
|
"mul_sld_lr_gs",
|
||||||
|
"m3w_sld_lr_gs",
|
||||||
|
"bin_kde_lr_gs",
|
||||||
|
"mul_kde_lr_gs",
|
||||||
|
"m3w_kde_lr_gs",
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
for name, methods in _mapping.items():
|
for name, methods in _mapping.items():
|
||||||
|
|
|
@ -25,6 +25,7 @@ def wilcoxon(
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
_data = r.data(metric, estimators)
|
_data = r.data(metric, estimators)
|
||||||
|
|
||||||
|
_data = _data.dropna(axis=0, how="any")
|
||||||
_wilcoxon = {}
|
_wilcoxon = {}
|
||||||
for est in _data.columns.unique(0):
|
for est in _data.columns.unique(0):
|
||||||
_wilcoxon[est] = [
|
_wilcoxon[est] = [
|
||||||
|
|
|
@ -39,8 +39,16 @@ def plot_delta(
|
||||||
else:
|
else:
|
||||||
title = f"{_base_title}_{name}_avg_{avg}_{metric}"
|
title = f"{_base_title}_{name}_avg_{avg}_{metric}"
|
||||||
|
|
||||||
x_label = f"{'test' if avg is None or avg == 'train' else 'train'} prevalence"
|
if avg is None or avg == "train":
|
||||||
y_label = f"{metric} error"
|
x_label = "Test Prevalence"
|
||||||
|
else:
|
||||||
|
x_label = "Train Prevalence"
|
||||||
|
if metric == "acc":
|
||||||
|
y_label = "Prediction Error for Vanilla Accuracy"
|
||||||
|
elif metric == "f1":
|
||||||
|
y_label = "Prediction Error for F1"
|
||||||
|
else:
|
||||||
|
y_label = f"{metric} error"
|
||||||
fig = backend.plot_delta(
|
fig = backend.plot_delta(
|
||||||
base_prevs,
|
base_prevs,
|
||||||
columns,
|
columns,
|
||||||
|
@ -81,8 +89,12 @@ def plot_diagonal(
|
||||||
else:
|
else:
|
||||||
title = f"diagonal_{name}_{metric}"
|
title = f"diagonal_{name}_{metric}"
|
||||||
|
|
||||||
x_label = f"true {metric}"
|
if metric == "acc":
|
||||||
y_label = f"estim. {metric}"
|
x_label = "True Vanilla Accuracy"
|
||||||
|
y_label = "Estimated Vanilla Accuracy"
|
||||||
|
else:
|
||||||
|
x_label = f"true {metric}"
|
||||||
|
y_label = f"estim. {metric}"
|
||||||
fig = backend.plot_diagonal(
|
fig = backend.plot_diagonal(
|
||||||
reference,
|
reference,
|
||||||
columns,
|
columns,
|
||||||
|
@ -123,8 +135,13 @@ def plot_shift(
|
||||||
else:
|
else:
|
||||||
title = f"shift_{name}_avg_{metric}"
|
title = f"shift_{name}_avg_{metric}"
|
||||||
|
|
||||||
x_label = "dataset shift"
|
x_label = "Amount of Prior Probability Shift"
|
||||||
y_label = f"{metric} error"
|
if metric == "acc":
|
||||||
|
y_label = "Prediction Error for Vanilla Accuracy"
|
||||||
|
elif metric == "f1":
|
||||||
|
y_label = "Prediction Error for F1"
|
||||||
|
else:
|
||||||
|
y_label = f"{metric} error"
|
||||||
fig = backend.plot_shift(
|
fig = backend.plot_shift(
|
||||||
shift_prevs,
|
shift_prevs,
|
||||||
columns,
|
columns,
|
||||||
|
|
|
@ -5,6 +5,7 @@ import numpy as np
|
||||||
import plotly
|
import plotly
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
from quacc.evaluation.estimators import _renames
|
||||||
from quacc.plot.base import BasePlot
|
from quacc.plot.base import BasePlot
|
||||||
|
|
||||||
|
|
||||||
|
@ -50,6 +51,7 @@ class PlotlyPlot(BasePlot):
|
||||||
|
|
||||||
def __init__(self, theme=None):
|
def __init__(self, theme=None):
|
||||||
self.theme = PlotlyPlot.__themes[theme]
|
self.theme = PlotlyPlot.__themes[theme]
|
||||||
|
self.rename = True
|
||||||
|
|
||||||
def hex_to_rgb(self, hex: str, t: float | None = None):
|
def hex_to_rgb(self, hex: str, t: float | None = None):
|
||||||
hex = hex.lstrip("#")
|
hex = hex.lstrip("#")
|
||||||
|
@ -85,6 +87,24 @@ class PlotlyPlot(BasePlot):
|
||||||
def save_fig(self, fig, base_path, title) -> Path:
|
def save_fig(self, fig, base_path, title) -> Path:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def rename_plots(
|
||||||
|
self,
|
||||||
|
columns,
|
||||||
|
):
|
||||||
|
if not self.rename:
|
||||||
|
return columns
|
||||||
|
|
||||||
|
new_columns = []
|
||||||
|
for c in columns:
|
||||||
|
nc = c
|
||||||
|
for old, new in _renames.items():
|
||||||
|
if c.startswith(old):
|
||||||
|
nc = new + c[len(old) :]
|
||||||
|
|
||||||
|
new_columns.append(nc)
|
||||||
|
|
||||||
|
return np.array(new_columns)
|
||||||
|
|
||||||
def plot_delta(
|
def plot_delta(
|
||||||
self,
|
self,
|
||||||
base_prevs,
|
base_prevs,
|
||||||
|
@ -102,6 +122,7 @@ class PlotlyPlot(BasePlot):
|
||||||
if isinstance(base_prevs[0], float):
|
if isinstance(base_prevs[0], float):
|
||||||
base_prevs = np.around([(1 - bp, bp) for bp in base_prevs], decimals=4)
|
base_prevs = np.around([(1 - bp, bp) for bp in base_prevs], decimals=4)
|
||||||
x = [str(tuple(bp)) for bp in base_prevs]
|
x = [str(tuple(bp)) for bp in base_prevs]
|
||||||
|
columns = self.rename_plots(columns)
|
||||||
line_colors = self.get_colors(len(columns))
|
line_colors = self.get_colors(len(columns))
|
||||||
for name, delta in zip(columns, data):
|
for name, delta in zip(columns, data):
|
||||||
color = next(line_colors)
|
color = next(line_colors)
|
||||||
|
@ -150,6 +171,7 @@ class PlotlyPlot(BasePlot):
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
fig = go.Figure()
|
fig = go.Figure()
|
||||||
x = reference
|
x = reference
|
||||||
|
columns = self.rename_plots(columns)
|
||||||
line_colors = self.get_colors(len(columns))
|
line_colors = self.get_colors(len(columns))
|
||||||
|
|
||||||
_edges = (np.min([np.min(x), np.min(data)]), np.max([np.max(x), np.max(data)]))
|
_edges = (np.min([np.min(x), np.min(data)]), np.max([np.max(x), np.max(data)]))
|
||||||
|
@ -211,6 +233,7 @@ class PlotlyPlot(BasePlot):
|
||||||
fig = go.Figure()
|
fig = go.Figure()
|
||||||
# x = shift_prevs[:, pos_class]
|
# x = shift_prevs[:, pos_class]
|
||||||
x = shift_prevs
|
x = shift_prevs
|
||||||
|
columns = self.rename_plots(columns)
|
||||||
line_colors = self.get_colors(len(columns))
|
line_colors = self.get_colors(len(columns))
|
||||||
for name, delta in zip(columns, data):
|
for name, delta in zip(columns, data):
|
||||||
col_idx = (columns == name).nonzero()[0][0]
|
col_idx = (columns == name).nonzero()[0][0]
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
# Additional covariates percentage
|
||||||
|
|
||||||
|
Rate of usage of additional covariates, recalibration and "balanced" class_weight
|
||||||
|
during grid search:
|
||||||
|
|
||||||
|
| method | av % | recalib % | rebalance % |
|
||||||
|
| --------------: | :----: | :-------: | :---------: |
|
||||||
|
| imdb_sld_lr | 81.49% | 77.78% | 59.26% |
|
||||||
|
| imdb_kde_lr | 71.43% | NA | 88.18% |
|
||||||
|
| rcv1_CCAT_sld_lr| 62.97% | 70.38% | 77.78% |
|
||||||
|
| rcv1_CCAT_kde_lr| 78.06% | NA | 84.82% |
|
||||||
|
| rcv1_GCAT_sld_lr| 76.93% | 61.54% | 65.39% |
|
||||||
|
| rcv1_GCAT_kde_lr| 71.36% | NA | 78.65% |
|
||||||
|
| rcv1_MCAT_sld_lr| 62.97% | 48.15% | 74.08% |
|
||||||
|
| rcv1_MCAT_kde_lr| 71.03% | NA | 68.70% |
|
4
run.py
4
run.py
|
@ -15,3 +15,7 @@ def run():
|
||||||
run_local()
|
run_local()
|
||||||
elif args.remote:
|
elif args.remote:
|
||||||
run_remote(detatch=args.detatch)
|
run_remote(detatch=args.detatch)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run()
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from quacc.evaluation.report import DatasetReport
|
||||||
|
|
||||||
|
datasets = [
|
||||||
|
"imdb/imdb.pickle",
|
||||||
|
"rcv1_CCAT/rcv1_CCAT.pickle",
|
||||||
|
"rcv1_GCAT/rcv1_GCAT.pickle",
|
||||||
|
"rcv1_MCAT/rcv1_MCAT.pickle",
|
||||||
|
]
|
||||||
|
|
||||||
|
gs = {
|
||||||
|
"sld_lr_gs": [
|
||||||
|
"bin_sld_lr_gs",
|
||||||
|
"mul_sld_lr_gs",
|
||||||
|
"m3w_sld_lr_gs",
|
||||||
|
],
|
||||||
|
"kde_lr_gs": [
|
||||||
|
"bin_kde_lr_gs",
|
||||||
|
"mul_kde_lr_gs",
|
||||||
|
"m3w_kde_lr_gs",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
for dst in datasets:
|
||||||
|
dr = DatasetReport.unpickle("output/main/" + dst)
|
||||||
|
print(f"{dst}\n")
|
||||||
|
for name, methods in gs.items():
|
||||||
|
print(f"{name}")
|
||||||
|
sel_methods = [
|
||||||
|
{k: v for k, v in cr.fit_scores.items() if k in methods} for cr in dr.crs
|
||||||
|
]
|
||||||
|
|
||||||
|
best_methods = [
|
||||||
|
list(ms.keys())[np.argmin(list(ms.values()))] for ms in sel_methods
|
||||||
|
]
|
||||||
|
m_cnt = []
|
||||||
|
for m in methods:
|
||||||
|
m_cnt.append((np.array(best_methods) == m).nonzero()[0].shape[0])
|
||||||
|
m_cnt = np.array(m_cnt)
|
||||||
|
m_freq = m_cnt / len(best_methods)
|
||||||
|
|
||||||
|
for n in methods:
|
||||||
|
print(n, end="\t")
|
||||||
|
print()
|
||||||
|
for v in m_freq:
|
||||||
|
print(f"{v*100:.2f}", end="\t")
|
||||||
|
print("\n\n")
|
Loading…
Reference in New Issue