docker merged

This commit is contained in:
Lorenzo Volpi 2024-02-03 12:36:41 +01:00
commit 5d82419ce8
15 changed files with 318 additions and 31 deletions

View File

@ -72,6 +72,10 @@ test_conf: &test_conf
main: main:
confs: &main_confs confs: &main_confs
- DATASET_NAME: imdb - DATASET_NAME: imdb
<<<<<<< HEAD
=======
other_confs:
>>>>>>> docker
- DATASET_NAME: rcv1 - DATASET_NAME: rcv1
DATASET_TARGET: CCAT DATASET_TARGET: CCAT
- DATASET_NAME: rcv1 - DATASET_NAME: rcv1
@ -338,6 +342,43 @@ d_kde_rbf_conf: &d_kde_rbf_conf
- DATASET_NAME: rcv1 - DATASET_NAME: rcv1
DATASET_TARGET: CCAT DATASET_TARGET: CCAT
cc_lr_conf: &cc_lr_conf
global:
METRICS:
- acc
- f1
OUT_DIR_NAME: output/cc_lr
DATASET_N_PREVS: 9
COMP_ESTIMATORS:
# - bin_cc_lr
# - mul_cc_lr
# - m3w_cc_lr
# - bin_cc_lr_c
# - mul_cc_lr_c
# - m3w_cc_lr_c
# - bin_cc_lr_mc
# - mul_cc_lr_mc
# - m3w_cc_lr_mc
# - bin_cc_lr_ne
# - mul_cc_lr_ne
# - m3w_cc_lr_ne
# - bin_cc_lr_is
# - mul_cc_lr_is
# - m3w_cc_lr_is
# - bin_cc_lr_a
# - mul_cc_lr_a
# - m3w_cc_lr_a
- bin_cc_lr_gs
- mul_cc_lr_gs
- m3w_cc_lr_gs
N_JOBS: -2
confs: *main_confs
other_confs:
- DATASET_NAME: imdb
- DATASET_NAME: rcv1
DATASET_TARGET: CCAT
baselines_conf: &baselines_conf baselines_conf: &baselines_conf
global: global:
METRICS: METRICS:
@ -349,9 +390,12 @@ baselines_conf: &baselines_conf
- doc - doc
- atc_mc - atc_mc
- naive - naive
<<<<<<< HEAD
# - mandoline # - mandoline
# - rca # - rca
# - rca_star # - rca_star
=======
>>>>>>> docker
N_JOBS: -2 N_JOBS: -2
confs: *main_confs confs: *main_confs
@ -389,22 +433,34 @@ timing_conf: &timing_conf
- bin_kde_lr_a - bin_kde_lr_a
- mul_kde_lr_a - mul_kde_lr_a
- m3w_kde_lr_a - m3w_kde_lr_a
- doc
- atc_mc
- rca
- rca_star
- mandoline
- naive
N_JOBS: 1
PROTOCOL_REPEATS: 1
confs: *main_confs
timing_gs_conf: &timing_gs_conf
global:
METRICS:
- acc
- f1
OUT_DIR_NAME: output/timing_gs
DATASET_N_PREVS: 1
COMP_ESTIMATORS:
- bin_sld_lr_gs - bin_sld_lr_gs
- mul_sld_lr_gs - mul_sld_lr_gs
- m3w_sld_lr_gs - m3w_sld_lr_gs
- bin_kde_lr_gs - bin_kde_lr_gs
- mul_kde_lr_gs - mul_kde_lr_gs
- m3w_kde_lr_gs - m3w_kde_lr_gs
- doc N_JOBS: -1
- atc_mc PROTOCOL_REPEATS: 1
- rca
- rca_star
- mandoline
N_JOBS: 1
PROTOCOL_N_PREVS: 1,
PROTOCOL_REPEATS: 1,
SAMPLE_SIZE: 1000,
confs: *main_confs confs: *main_confs
exec: *baselines_conf exec: *timing_gs_conf

9
copy_res.sh Executable file
View File

@ -0,0 +1,9 @@
#!/bin/bash
# scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/kde_lr_gs ./output/
# scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/cc_lr ./output/
scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/baselines ./output/
# scp -r ./output/kde_lr_gs volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
# scp -r ./output/cc_lr volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/
scp -r ./output/baselines volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/

2
log
View File

@ -3,6 +3,8 @@
if [[ "${1}" == "r" ]]; then if [[ "${1}" == "r" ]]; then
scp volpi@ilona.isti.cnr.it:~/tesi/quacc.log ~/tesi/remote.log &>/dev/null scp volpi@ilona.isti.cnr.it:~/tesi/quacc.log ~/tesi/remote.log &>/dev/null
ssh volpi@ilona.isti.cnr.it tail -n 500 -f /home/volpi/tesi/quacc.log | bat -P --language=log ssh volpi@ilona.isti.cnr.it tail -n 500 -f /home/volpi/tesi/quacc.log | bat -P --language=log
elif [[ "${1}" == "d" ]]; then
ssh andreaesuli@edge-nd1.isti.cnr.it tail -n 500 -f /home/andreaesuli/raid/lorenzo/quacc.log | bat -P --language=log
else else
tail -n 500 -f /home/lorev/tesi/quacc.log | bat --paging=never --language log tail -n 500 -f /home/lorev/tesi/quacc.log | bat --paging=never --language log
fi fi

View File

@ -13,7 +13,7 @@ from dash import Dash, Input, Output, State, callback, ctx, dash_table, dcc, htm
from dash.dash_table.Format import Align, Format, Scheme from dash.dash_table.Format import Align, Format, Scheme
from quacc import plot from quacc import plot
from quacc.evaluation.estimators import CE from quacc.evaluation.estimators import CE, _renames
from quacc.evaluation.report import CompReport, DatasetReport from quacc.evaluation.report import CompReport, DatasetReport
from quacc.evaluation.stats import wilcoxon from quacc.evaluation.stats import wilcoxon
@ -26,6 +26,23 @@ def _get_prev_str(prev: np.ndarray):
return str(tuple(np.around(prev, decimals=2))) return str(tuple(np.around(prev, decimals=2)))
def rename_estimators(estimators, rev=False):
_rnm = _renames
if rev:
_rnm = {v: k for k, v in _renames.items()}
new_estimators = []
for c in estimators:
nc = c
for old, new in _rnm.items():
if c.startswith(old):
nc = new + c[len(old) :]
new_estimators.append(nc)
return new_estimators
def get_datasets(root: str | Path) -> List[DatasetReport]: def get_datasets(root: str | Path) -> List[DatasetReport]:
def load_dataset(dataset): def load_dataset(dataset):
dataset = Path(dataset) dataset = Path(dataset)
@ -153,7 +170,7 @@ def get_DataTable(df, mode):
columns = { columns = {
c: dict( c: dict(
id=c, id=c,
name=_index_name[mode] if c == "index" else c, name=_index_name[mode] if c == "index" else rename_estimators([c])[0],
type="numeric", type="numeric",
format=columns_format, format=columns_format,
) )
@ -412,12 +429,13 @@ def update_estimators(href, dataset, metric, curr_estimators, root):
old_estimators = json.loads(old_estimators) old_estimators = json.loads(old_estimators)
except JSONDecodeError: except JSONDecodeError:
old_estimators = [] old_estimators = []
old_estimators = rename_estimators(old_estimators, rev=True)
valid_estimators: np.ndarray = dr.data(metric=metric).columns.unique(0).to_numpy() valid_estimators: np.ndarray = dr.data(metric=metric).columns.unique(0).to_numpy()
new_estimators = valid_estimators[ new_estimators = valid_estimators[
np.isin(valid_estimators, old_estimators) np.isin(valid_estimators, old_estimators)
].tolist() ].tolist()
valid_estimators = CE.name.sort(valid_estimators.tolist()) valid_estimators = CE.name.sort(valid_estimators.tolist())
return valid_estimators, new_estimators return rename_estimators(valid_estimators), rename_estimators(new_estimators)
@callback( @callback(
@ -473,6 +491,7 @@ def update_content(dataset, metric, estimators, view, mode, root):
quote_via=quote, quote_via=quote,
) )
dr = get_dr(root, dataset) dr = get_dr(root, dataset)
estimators = rename_estimators(estimators, rev=True)
match mode: match mode:
case m if m.endswith("table"): case m if m.endswith("table"):
df = get_table( df = get_table(

View File

@ -126,7 +126,9 @@ class DatasetProvider:
# provare min_df=5 # provare min_df=5
def __imdb(self, **kwargs): def __imdb(self, **kwargs):
return qp.datasets.fetch_reviews("imdb", tfidf=True, min_df=3).train_test return qp.datasets.fetch_reviews(
"imdb", data_home="./quapy_data", tfidf=True, min_df=3
).train_test
def __rcv1(self, target, **kwargs): def __rcv1(self, target, **kwargs):
n_train = 23149 n_train = 23149
@ -135,7 +137,7 @@ class DatasetProvider:
if target is None or target not in available_targets: if target is None or target not in available_targets:
raise ValueError(f"Invalid target {target}") raise ValueError(f"Invalid target {target}")
dataset = fetch_rcv1() dataset = fetch_rcv1(data_home="./scikit_learn_data")
target_index = np.where(dataset.target_names == target)[0] target_index = np.where(dataset.target_names == target)[0]
all_train_d = dataset.data[:n_train, :] all_train_d = dataset.data[:n_train, :]
test_d = dataset.data[n_train:, :] test_d = dataset.data[n_train:, :]

View File

@ -85,14 +85,14 @@ def naive(
report = EvaluationReport(name="naive") report = EvaluationReport(name="naive")
for test in protocol(): for test in protocol():
test_preds = c_model_predict(test.X) test_preds = c_model_predict(test.X)
acc_score = metrics.accuracy_score(test.y, test_preds) test_acc = metrics.accuracy_score(test.y, test_preds)
f1_score = metrics.f1_score(test.y, test_preds, average=f1_average) test_f1 = metrics.f1_score(test.y, test_preds, average=f1_average)
meta_acc = abs(val_acc - acc_score) meta_acc = abs(val_acc - test_acc)
meta_f1 = abs(val_f1 - f1_score) meta_f1 = abs(val_f1 - test_f1)
report.append_row( report.append_row(
test.prevalence(), test.prevalence(),
acc_score=acc_score, acc_score=val_acc,
f1_score=f1_score, f1_score=val_f1,
acc=meta_acc, acc=meta_acc,
f1=meta_f1, f1=meta_f1,
) )

View File

@ -78,3 +78,33 @@ class CompEstimator:
CE = CompEstimator() CE = CompEstimator()
_renames = {
"bin_sld_lr": "(2x2)_SLD_LR",
"mul_sld_lr": "(1x4)_SLD_LR",
"m3w_sld_lr": "(1x3)_SLD_LR",
"d_bin_sld_lr": "d_(2x2)_SLD_LR",
"d_mul_sld_lr": "d_(1x4)_SLD_LR",
"d_m3w_sld_lr": "d_(1x3)_SLD_LR",
"d_bin_sld_rbf": "(2x2)_SLD_RBF",
"d_mul_sld_rbf": "(1x4)_SLD_RBF",
"d_m3w_sld_rbf": "(1x3)_SLD_RBF",
"sld_lr": "SLD_LR",
"bin_kde_lr": "(2x2)_KDEy_LR",
"mul_kde_lr": "(1x4)_KDEy_LR",
"m3w_kde_lr": "(1x3)_KDEy_LR",
"d_bin_kde_lr": "d_(2x2)_KDEy_LR",
"d_mul_kde_lr": "d_(1x4)_KDEy_LR",
"d_m3w_kde_lr": "d_(1x3)_KDEy_LR",
"bin_cc_lr": "(2x2)_CC_LR",
"mul_cc_lr": "(1x4)_CC_LR",
"m3w_cc_lr": "(1x3)_CC_LR",
"kde_lr": "KDEy_LR",
"cc_lr": "CC_LR",
"atc_mc": "ATC",
"doc": "DoC",
"mandoline": "Mandoline",
"rca": "RCA",
"rca_star": "RCA*",
"naive": "Naive",
}

View File

@ -3,7 +3,7 @@ from typing import Callable, List, Union
import numpy as np import numpy as np
from matplotlib.pylab import rand from matplotlib.pylab import rand
from quapy.method.aggregative import PACC, SLD, BaseQuantifier from quapy.method.aggregative import CC, PACC, SLD, BaseQuantifier
from quapy.protocol import UPP, AbstractProtocol, OnLabelledCollectionProtocol from quapy.protocol import UPP, AbstractProtocol, OnLabelledCollectionProtocol
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC from sklearn.svm import SVC, LinearSVC
@ -53,6 +53,17 @@ def _param_grid(method, X_fit: np.ndarray):
"q__classifier__class_weight": [None, "balanced"], "q__classifier__class_weight": [None, "balanced"],
"confidence": [None, ["isoft"], ["max_conf", "entropy"]], "confidence": [None, ["isoft"], ["max_conf", "entropy"]],
} }
case "cc_lr":
return {
"q__classifier__C": np.logspace(-3, 3, 7),
"q__classifier__class_weight": [None, "balanced"],
"confidence": [
None,
["isoft"],
["max_conf", "entropy"],
["max_conf", "entropy", "isoft"],
],
}
case "kde_lr": case "kde_lr":
return { return {
"q__classifier__C": np.logspace(-3, 3, 7), "q__classifier__C": np.logspace(-3, 3, 7),
@ -219,6 +230,10 @@ def __pacc_lr():
return PACC(LogisticRegression()) return PACC(LogisticRegression())
def __cc_lr():
return CC(LogisticRegression())
# fmt: off # fmt: off
__sld_lr_set = [ __sld_lr_set = [
@ -380,9 +395,9 @@ __kde_lr_set = [
M("mul_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ), M("mul_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ),
M("m3w_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], cf=True), M("m3w_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], cf=True),
# gs kde # gs kde
G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="spider" ), G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="grid" ),
G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider" ), G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid" ),
G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider", cf=True), G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid", cf=True),
E("kde_lr_gs"), E("kde_lr_gs"),
] ]
@ -448,6 +463,37 @@ __dense_kde_rbf_set = [
G("d_m3w_kde_rbf_gs", __kde_rbf(), "mul", d=True, pg="kde_rbf", search="spider", cf=True), G("d_m3w_kde_rbf_gs", __kde_rbf(), "mul", d=True, pg="kde_rbf", search="spider", cf=True),
] ]
__cc_lr_set = [
# base cc
M("bin_cc_lr", __cc_lr(), "bin" ),
M("mul_cc_lr", __cc_lr(), "mul" ),
M("m3w_cc_lr", __cc_lr(), "mul", cf=True),
# max_conf + entropy cc
M("bin_cc_lr_c", __cc_lr(), "bin", conf=["max_conf", "entropy"] ),
M("mul_cc_lr_c", __cc_lr(), "mul", conf=["max_conf", "entropy"] ),
M("m3w_cc_lr_c", __cc_lr(), "mul", conf=["max_conf", "entropy"], cf=True),
# max_conf cc
M("bin_cc_lr_mc", __cc_lr(), "bin", conf="max_conf", ),
M("mul_cc_lr_mc", __cc_lr(), "mul", conf="max_conf", ),
M("m3w_cc_lr_mc", __cc_lr(), "mul", conf="max_conf", cf=True),
# entropy cc
M("bin_cc_lr_ne", __cc_lr(), "bin", conf="entropy", ),
M("mul_cc_lr_ne", __cc_lr(), "mul", conf="entropy", ),
M("m3w_cc_lr_ne", __cc_lr(), "mul", conf="entropy", cf=True),
# inverse softmax cc
M("bin_cc_lr_is", __cc_lr(), "bin", conf="isoft", ),
M("mul_cc_lr_is", __cc_lr(), "mul", conf="isoft", ),
M("m3w_cc_lr_is", __cc_lr(), "mul", conf="isoft", cf=True),
# cc all
M("bin_cc_lr_a", __cc_lr(), "bin", conf=["max_conf", "entropy", "isoft"], ),
M("mul_cc_lr_a", __cc_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ),
M("m3w_cc_lr_a", __cc_lr(), "mul", conf=["max_conf", "entropy", "isoft"], cf=True),
# gs cc
G("bin_cc_lr_gs", __cc_lr(), "bin", pg="cc_lr", search="grid" ),
G("mul_cc_lr_gs", __cc_lr(), "mul", pg="cc_lr", search="grid" ),
G("m3w_cc_lr_gs", __cc_lr(), "mul", pg="cc_lr", search="grid", cf=True),
E("cc_lr_gs"),
]
# fmt: on # fmt: on
@ -458,6 +504,8 @@ __methods_set = (
+ __kde_lr_set + __kde_lr_set
+ __dense_kde_lr_set + __dense_kde_lr_set
+ __dense_kde_rbf_set + __dense_kde_rbf_set
+ __cc_lr_set
+ [E("QuAcc")]
) )
_methods = {m.name: m for m in __methods_set} _methods = {m.name: m for m in __methods_set}

View File

@ -140,6 +140,19 @@ class CompReport:
"mul_kde_lr_gs", "mul_kde_lr_gs",
"m3w_kde_lr_gs", "m3w_kde_lr_gs",
], ],
"cc_lr_gs": [
"bin_cc_lr_gs",
"mul_cc_lr_gs",
"m3w_cc_lr_gs",
],
"QuAcc": [
"bin_sld_lr_gs",
"mul_sld_lr_gs",
"m3w_sld_lr_gs",
"bin_kde_lr_gs",
"mul_kde_lr_gs",
"m3w_kde_lr_gs",
],
} }
for name, methods in _mapping.items(): for name, methods in _mapping.items():

View File

@ -25,6 +25,7 @@ def wilcoxon(
) -> pd.DataFrame: ) -> pd.DataFrame:
_data = r.data(metric, estimators) _data = r.data(metric, estimators)
_data = _data.dropna(axis=0, how="any")
_wilcoxon = {} _wilcoxon = {}
for est in _data.columns.unique(0): for est in _data.columns.unique(0):
_wilcoxon[est] = [ _wilcoxon[est] = [

View File

@ -39,8 +39,16 @@ def plot_delta(
else: else:
title = f"{_base_title}_{name}_avg_{avg}_{metric}" title = f"{_base_title}_{name}_avg_{avg}_{metric}"
x_label = f"{'test' if avg is None or avg == 'train' else 'train'} prevalence" if avg is None or avg == "train":
y_label = f"{metric} error" x_label = "Test Prevalence"
else:
x_label = "Train Prevalence"
if metric == "acc":
y_label = "Prediction Error for Vanilla Accuracy"
elif metric == "f1":
y_label = "Prediction Error for F1"
else:
y_label = f"{metric} error"
fig = backend.plot_delta( fig = backend.plot_delta(
base_prevs, base_prevs,
columns, columns,
@ -81,8 +89,12 @@ def plot_diagonal(
else: else:
title = f"diagonal_{name}_{metric}" title = f"diagonal_{name}_{metric}"
x_label = f"true {metric}" if metric == "acc":
y_label = f"estim. {metric}" x_label = "True Vanilla Accuracy"
y_label = "Estimated Vanilla Accuracy"
else:
x_label = f"true {metric}"
y_label = f"estim. {metric}"
fig = backend.plot_diagonal( fig = backend.plot_diagonal(
reference, reference,
columns, columns,
@ -123,8 +135,13 @@ def plot_shift(
else: else:
title = f"shift_{name}_avg_{metric}" title = f"shift_{name}_avg_{metric}"
x_label = "dataset shift" x_label = "Amount of Prior Probability Shift"
y_label = f"{metric} error" if metric == "acc":
y_label = "Prediction Error for Vanilla Accuracy"
elif metric == "f1":
y_label = "Prediction Error for F1"
else:
y_label = f"{metric} error"
fig = backend.plot_shift( fig = backend.plot_shift(
shift_prevs, shift_prevs,
columns, columns,

View File

@ -5,6 +5,7 @@ import numpy as np
import plotly import plotly
import plotly.graph_objects as go import plotly.graph_objects as go
from quacc.evaluation.estimators import _renames
from quacc.plot.base import BasePlot from quacc.plot.base import BasePlot
@ -50,6 +51,7 @@ class PlotlyPlot(BasePlot):
def __init__(self, theme=None): def __init__(self, theme=None):
self.theme = PlotlyPlot.__themes[theme] self.theme = PlotlyPlot.__themes[theme]
self.rename = True
def hex_to_rgb(self, hex: str, t: float | None = None): def hex_to_rgb(self, hex: str, t: float | None = None):
hex = hex.lstrip("#") hex = hex.lstrip("#")
@ -85,6 +87,24 @@ class PlotlyPlot(BasePlot):
def save_fig(self, fig, base_path, title) -> Path: def save_fig(self, fig, base_path, title) -> Path:
return None return None
def rename_plots(
self,
columns,
):
if not self.rename:
return columns
new_columns = []
for c in columns:
nc = c
for old, new in _renames.items():
if c.startswith(old):
nc = new + c[len(old) :]
new_columns.append(nc)
return np.array(new_columns)
def plot_delta( def plot_delta(
self, self,
base_prevs, base_prevs,
@ -102,6 +122,7 @@ class PlotlyPlot(BasePlot):
if isinstance(base_prevs[0], float): if isinstance(base_prevs[0], float):
base_prevs = np.around([(1 - bp, bp) for bp in base_prevs], decimals=4) base_prevs = np.around([(1 - bp, bp) for bp in base_prevs], decimals=4)
x = [str(tuple(bp)) for bp in base_prevs] x = [str(tuple(bp)) for bp in base_prevs]
columns = self.rename_plots(columns)
line_colors = self.get_colors(len(columns)) line_colors = self.get_colors(len(columns))
for name, delta in zip(columns, data): for name, delta in zip(columns, data):
color = next(line_colors) color = next(line_colors)
@ -150,6 +171,7 @@ class PlotlyPlot(BasePlot):
) -> go.Figure: ) -> go.Figure:
fig = go.Figure() fig = go.Figure()
x = reference x = reference
columns = self.rename_plots(columns)
line_colors = self.get_colors(len(columns)) line_colors = self.get_colors(len(columns))
_edges = (np.min([np.min(x), np.min(data)]), np.max([np.max(x), np.max(data)])) _edges = (np.min([np.min(x), np.min(data)]), np.max([np.max(x), np.max(data)]))
@ -211,6 +233,7 @@ class PlotlyPlot(BasePlot):
fig = go.Figure() fig = go.Figure()
# x = shift_prevs[:, pos_class] # x = shift_prevs[:, pos_class]
x = shift_prevs x = shift_prevs
columns = self.rename_plots(columns)
line_colors = self.get_colors(len(columns)) line_colors = self.get_colors(len(columns))
for name, delta in zip(columns, data): for name, delta in zip(columns, data):
col_idx = (columns == name).nonzero()[0][0] col_idx = (columns == name).nonzero()[0][0]

15
rates.md Normal file
View File

@ -0,0 +1,15 @@
# Additional covariates percentage
Rate of usage of additional covariates, recalibration and "balanced" class_weight
during grid search:
| method | av % | recalib % | rebalance % |
| --------------: | :----: | :-------: | :---------: |
| imdb_sld_lr | 81.49% | 77.78% | 59.26% |
| imdb_kde_lr | 71.43% | NA | 88.18% |
| rcv1_CCAT_sld_lr| 62.97% | 70.38% | 77.78% |
| rcv1_CCAT_kde_lr| 78.06% | NA | 84.82% |
| rcv1_GCAT_sld_lr| 76.93% | 61.54% | 65.39% |
| rcv1_GCAT_kde_lr| 71.36% | NA | 78.65% |
| rcv1_MCAT_sld_lr| 62.97% | 48.15% | 74.08% |
| rcv1_MCAT_kde_lr| 71.03% | NA | 68.70% |

4
run.py
View File

@ -15,3 +15,7 @@ def run():
run_local() run_local()
elif args.remote: elif args.remote:
run_remote(detatch=args.detatch) run_remote(detatch=args.detatch)
if __name__ == "__main__":
run()

48
selected_gs.py Normal file
View File

@ -0,0 +1,48 @@
import numpy as np
from quacc.evaluation.report import DatasetReport
datasets = [
"imdb/imdb.pickle",
"rcv1_CCAT/rcv1_CCAT.pickle",
"rcv1_GCAT/rcv1_GCAT.pickle",
"rcv1_MCAT/rcv1_MCAT.pickle",
]
gs = {
"sld_lr_gs": [
"bin_sld_lr_gs",
"mul_sld_lr_gs",
"m3w_sld_lr_gs",
],
"kde_lr_gs": [
"bin_kde_lr_gs",
"mul_kde_lr_gs",
"m3w_kde_lr_gs",
],
}
for dst in datasets:
dr = DatasetReport.unpickle("output/main/" + dst)
print(f"{dst}\n")
for name, methods in gs.items():
print(f"{name}")
sel_methods = [
{k: v for k, v in cr.fit_scores.items() if k in methods} for cr in dr.crs
]
best_methods = [
list(ms.keys())[np.argmin(list(ms.values()))] for ms in sel_methods
]
m_cnt = []
for m in methods:
m_cnt.append((np.array(best_methods) == m).nonzero()[0].shape[0])
m_cnt = np.array(m_cnt)
m_freq = m_cnt / len(best_methods)
for n in methods:
print(n, end="\t")
print()
for v in m_freq:
print(f"{v*100:.2f}", end="\t")
print("\n\n")