From a800dde0c3325e2194042a6450c24bd2a7439b95 Mon Sep 17 00:00:00 2001 From: Lorenzo Volpi Date: Sat, 3 Feb 2024 13:20:28 +0100 Subject: [PATCH 1/3] bug fixed in qdash/app --- qcdash/app.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/qcdash/app.py b/qcdash/app.py index 8fae568..cf4f21f 100644 --- a/qcdash/app.py +++ b/qcdash/app.py @@ -27,6 +27,9 @@ def _get_prev_str(prev: np.ndarray): def rename_estimators(estimators, rev=False): + if estimators is None: + return None + _rnm = _renames if rev: _rnm = {v: k for k, v in _renames.items()} From 527598bbf5f6ac87613fc8f7296ef8fae0538af7 Mon Sep 17 00:00:00 2001 From: Lorenzo Volpi Date: Mon, 12 Feb 2024 15:33:50 +0100 Subject: [PATCH 2/3] tesi updated --- conf.yaml | 10 +- copy_res.sh | 16 +- quacc/evaluation/baseline.py | 430 ++++++++++++++++----------------- quacc/evaluation/estimators.py | 19 +- quacc/evaluation/method.py | 94 +++---- quacc/evaluation/report.py | 6 +- quacc/plot/plotly.py | 47 +++- test_imdb_max_shift.py | 15 ++ 8 files changed, 346 insertions(+), 291 deletions(-) create mode 100644 test_imdb_max_shift.py diff --git a/conf.yaml b/conf.yaml index f21d1aa..42ac4dc 100644 --- a/conf.yaml +++ b/conf.yaml @@ -71,14 +71,14 @@ test_conf: &test_conf main: confs: &main_confs - - DATASET_NAME: imdb - other_confs: - - DATASET_NAME: rcv1 - DATASET_TARGET: CCAT - DATASET_NAME: rcv1 DATASET_TARGET: GCAT - DATASET_NAME: rcv1 DATASET_TARGET: MCAT + other_confs: + - DATASET_NAME: imdb + - DATASET_NAME: rcv1 + DATASET_TARGET: CCAT sld_lr_conf: &sld_lr_conf @@ -453,4 +453,4 @@ timing_gs_conf: &timing_gs_conf confs: *main_confs -exec: *timing_gs_conf +exec: *d_kde_lr_conf diff --git a/copy_res.sh b/copy_res.sh index 8418a84..93d9dc5 100755 --- a/copy_res.sh +++ b/copy_res.sh @@ -1,9 +1,21 @@ #!/bin/bash +DIRS=() +# DIRS+=("kde_lr_gs") +# DIRS+=("cc_lr") +# DIRS+=("baselines") +# DIRS+=("d_sld_rbf") +DIRS+=("d_sld_lr") + +for dir in ${DIRS[@]}; do + scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/${dir} ./output/ + scp -r ./output/${dir} volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/ +done + # scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/kde_lr_gs ./output/ # scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/cc_lr ./output/ -scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/baselines ./output/ +# scp -r andreaesuli@edge-nd1.isti.cnr.it:/home/andreaesuli/raid/lorenzo/output/baselines ./output/ # scp -r ./output/kde_lr_gs volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/ # scp -r ./output/cc_lr volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/ -scp -r ./output/baselines volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/ +# scp -r ./output/baselines volpi@ilona.isti.cnr.it:/home/volpi/tesi/output/ diff --git a/quacc/evaluation/baseline.py b/quacc/evaluation/baseline.py index 7af7804..eb9aee5 100644 --- a/quacc/evaluation/baseline.py +++ b/quacc/evaluation/baseline.py @@ -68,6 +68,27 @@ def kfcv( return report +@baseline +def ref( + c_model: BaseEstimator, + validation: LabelledCollection, + protocol: AbstractStochasticSeededProtocol, +): + c_model_predict = getattr(c_model, "predict") + f1_average = "binary" if validation.n_classes == 2 else "macro" + + report = EvaluationReport(name="ref") + for test in protocol(): + test_preds = c_model_predict(test.X) + report.append_row( + test.prevalence(), + acc_score=metrics.accuracy_score(test.y, test_preds), + f1_score=metrics.f1_score(test.y, test_preds, average=f1_average), + ) + + return report + + @baseline def naive( c_model: BaseEstimator, @@ -101,22 +122,205 @@ def naive( @baseline -def ref( +def mandoline( c_model: BaseEstimator, validation: LabelledCollection, protocol: AbstractStochasticSeededProtocol, -): - c_model_predict = getattr(c_model, "predict") - f1_average = "binary" if validation.n_classes == 2 else "macro" + predict_method="predict_proba", +) -> EvaluationReport: + c_model_predict = getattr(c_model, predict_method) - report = EvaluationReport(name="ref") + val_probs = c_model_predict(validation.X) + val_preds = np.argmax(val_probs, axis=1) + D_val = mandolib.get_slices(val_probs) + emprical_mat_list_val = (1.0 * (val_preds == validation.y))[:, np.newaxis] + + report = EvaluationReport(name="mandoline") for test in protocol(): - test_preds = c_model_predict(test.X) - report.append_row( - test.prevalence(), - acc_score=metrics.accuracy_score(test.y, test_preds), - f1_score=metrics.f1_score(test.y, test_preds, average=f1_average), - ) + test_probs = c_model_predict(test.X) + test_pred = np.argmax(test_probs, axis=1) + D_test = mandolib.get_slices(test_probs) + wp = mandolib.estimate_performance(D_val, D_test, None, emprical_mat_list_val) + score = wp.all_estimates[0].weighted[0] + meta_score = abs(score - metrics.accuracy_score(test.y, test_pred)) + report.append_row(test.prevalence(), acc=meta_score, acc_score=score) + + return report + + +@baseline +def rca( + c_model: BaseEstimator, + validation: LabelledCollection, + protocol: AbstractStochasticSeededProtocol, + predict_method="predict", +): + """elsahar19""" + c_model_predict = getattr(c_model, predict_method) + f1_average = "binary" if validation.n_classes == 2 else "macro" + val1, val2 = validation.split_stratified(train_prop=0.5, random_state=env._R_SEED) + val1_pred1 = c_model_predict(val1.X) + + val2_protocol = APP( + val2, + n_prevalences=21, + repeats=100, + return_type="labelled_collection", + ) + val2_prot_preds = [] + val2_rca = [] + val2_prot_preds = [] + val2_prot_y = [] + for v2 in val2_protocol(): + _preds = c_model_predict(v2.X) + try: + c_model2 = clone_fit(c_model, v2.X, _preds) + c_model2_predict = getattr(c_model2, predict_method) + val1_pred2 = c_model2_predict(val1.X) + rca_score = 1.0 - rcalib.get_score(val1_pred1, val1_pred2, val1.y) + val2_rca.append(rca_score) + val2_prot_preds.append(_preds) + val2_prot_y.append(v2.y) + except ValueError: + pass + + val_targets_acc = np.array( + [ + metrics.accuracy_score(v2_y, v2_preds) + for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds) + ] + ) + reg_acc = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_acc) + val_targets_f1 = np.array( + [ + metrics.f1_score(v2_y, v2_preds, average=f1_average) + for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds) + ] + ) + reg_f1 = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_f1) + + report = EvaluationReport(name="rca") + for test in protocol(): + try: + test_preds = c_model_predict(test.X) + c_model2 = clone_fit(c_model, test.X, test_preds) + c_model2_predict = getattr(c_model2, predict_method) + val1_pred2 = c_model2_predict(val1.X) + rca_score = 1.0 - rcalib.get_score(val1_pred1, val1_pred2, val1.y) + acc_score = reg_acc.predict(np.array([[rca_score]]))[0] + f1_score = reg_f1.predict(np.array([[rca_score]]))[0] + meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_preds)) + meta_f1 = abs( + f1_score - metrics.f1_score(test.y, test_preds, average=f1_average) + ) + report.append_row( + test.prevalence(), + acc=meta_acc, + acc_score=acc_score, + f1=meta_f1, + f1_score=f1_score, + ) + except ValueError: + report.append_row( + test.prevalence(), + acc=np.nan, + acc_score=np.nan, + f1=np.nan, + f1_score=np.nan, + ) + + return report + + +@baseline +def rca_star( + c_model: BaseEstimator, + validation: LabelledCollection, + protocol: AbstractStochasticSeededProtocol, + predict_method="predict", +): + """elsahar19""" + c_model_predict = getattr(c_model, predict_method) + f1_average = "binary" if validation.n_classes == 2 else "macro" + validation1, val2 = validation.split_stratified( + train_prop=0.5, random_state=env._R_SEED + ) + val11, val12 = validation1.split_stratified( + train_prop=0.5, random_state=env._R_SEED + ) + + val11_pred = c_model_predict(val11.X) + c_model1 = clone_fit(c_model, val11.X, val11_pred) + c_model1_predict = getattr(c_model1, predict_method) + val12_pred1 = c_model1_predict(val12.X) + + val2_protocol = APP( + val2, + n_prevalences=21, + repeats=100, + return_type="labelled_collection", + ) + val2_prot_preds = [] + val2_rca = [] + val2_prot_preds = [] + val2_prot_y = [] + for v2 in val2_protocol(): + _preds = c_model_predict(v2.X) + try: + c_model2 = clone_fit(c_model, v2.X, _preds) + c_model2_predict = getattr(c_model2, predict_method) + val12_pred2 = c_model2_predict(val12.X) + rca_score = 1.0 - rcalib.get_score(val12_pred1, val12_pred2, val12.y) + val2_rca.append(rca_score) + val2_prot_preds.append(_preds) + val2_prot_y.append(v2.y) + except ValueError: + pass + + val_targets_acc = np.array( + [ + metrics.accuracy_score(v2_y, v2_preds) + for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds) + ] + ) + reg_acc = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_acc) + val_targets_f1 = np.array( + [ + metrics.f1_score(v2_y, v2_preds, average=f1_average) + for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds) + ] + ) + reg_f1 = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_f1) + + report = EvaluationReport(name="rca_star") + for test in protocol(): + try: + test_pred = c_model_predict(test.X) + c_model2 = clone_fit(c_model, test.X, test_pred) + c_model2_predict = getattr(c_model2, predict_method) + val12_pred2 = c_model2_predict(val12.X) + rca_star_score = 1.0 - rcalib.get_score(val12_pred1, val12_pred2, val12.y) + acc_score = reg_acc.predict(np.array([[rca_star_score]]))[0] + f1_score = reg_f1.predict(np.array([[rca_score]]))[0] + meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_pred)) + meta_f1 = abs( + f1_score - metrics.f1_score(test.y, test_pred, average=f1_average) + ) + report.append_row( + test.prevalence(), + acc=meta_acc, + acc_score=acc_score, + f1=meta_f1, + f1_score=f1_score, + ) + except ValueError: + report.append_row( + test.prevalence(), + acc=np.nan, + acc_score=np.nan, + f1=np.nan, + f1_score=np.nan, + ) return report @@ -311,183 +515,6 @@ def doc_feat( return report -@baseline -def rca( - c_model: BaseEstimator, - validation: LabelledCollection, - protocol: AbstractStochasticSeededProtocol, - predict_method="predict", -): - """elsahar19""" - c_model_predict = getattr(c_model, predict_method) - f1_average = "binary" if validation.n_classes == 2 else "macro" - val1, val2 = validation.split_stratified(train_prop=0.5, random_state=env._R_SEED) - val1_pred1 = c_model_predict(val1.X) - - val2_protocol = APP( - val2, - n_prevalences=21, - repeats=100, - return_type="labelled_collection", - ) - val2_prot_preds = [] - val2_rca = [] - val2_prot_preds = [] - val2_prot_y = [] - for v2 in val2_protocol(): - _preds = c_model_predict(v2.X) - try: - c_model2 = clone_fit(c_model, v2.X, _preds) - c_model2_predict = getattr(c_model2, predict_method) - val1_pred2 = c_model2_predict(val1.X) - rca_score = 1.0 - rcalib.get_score(val1_pred1, val1_pred2, val1.y) - val2_rca.append(rca_score) - val2_prot_preds.append(_preds) - val2_prot_y.append(v2.y) - except ValueError: - pass - - val_targets_acc = np.array( - [ - metrics.accuracy_score(v2_y, v2_preds) - for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds) - ] - ) - reg_acc = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_acc) - val_targets_f1 = np.array( - [ - metrics.f1_score(v2_y, v2_preds, average=f1_average) - for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds) - ] - ) - reg_f1 = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_f1) - - report = EvaluationReport(name="rca") - for test in protocol(): - try: - test_preds = c_model_predict(test.X) - c_model2 = clone_fit(c_model, test.X, test_preds) - c_model2_predict = getattr(c_model2, predict_method) - val1_pred2 = c_model2_predict(val1.X) - rca_score = 1.0 - rcalib.get_score(val1_pred1, val1_pred2, val1.y) - acc_score = reg_acc.predict(np.array([[rca_score]]))[0] - f1_score = reg_f1.predict(np.array([[rca_score]]))[0] - meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_preds)) - meta_f1 = abs( - f1_score - metrics.f1_score(test.y, test_preds, average=f1_average) - ) - report.append_row( - test.prevalence(), - acc=meta_acc, - acc_score=acc_score, - f1=meta_f1, - f1_score=f1_score, - ) - except ValueError: - report.append_row( - test.prevalence(), - acc=np.nan, - acc_score=np.nan, - f1=np.nan, - f1_score=np.nan, - ) - - return report - - -@baseline -def rca_star( - c_model: BaseEstimator, - validation: LabelledCollection, - protocol: AbstractStochasticSeededProtocol, - predict_method="predict", -): - """elsahar19""" - c_model_predict = getattr(c_model, predict_method) - f1_average = "binary" if validation.n_classes == 2 else "macro" - validation1, val2 = validation.split_stratified( - train_prop=0.5, random_state=env._R_SEED - ) - val11, val12 = validation1.split_stratified( - train_prop=0.5, random_state=env._R_SEED - ) - - val11_pred = c_model_predict(val11.X) - c_model1 = clone_fit(c_model, val11.X, val11_pred) - c_model1_predict = getattr(c_model1, predict_method) - val12_pred1 = c_model1_predict(val12.X) - - val2_protocol = APP( - val2, - n_prevalences=21, - repeats=100, - return_type="labelled_collection", - ) - val2_prot_preds = [] - val2_rca = [] - val2_prot_preds = [] - val2_prot_y = [] - for v2 in val2_protocol(): - _preds = c_model_predict(v2.X) - try: - c_model2 = clone_fit(c_model, v2.X, _preds) - c_model2_predict = getattr(c_model2, predict_method) - val12_pred2 = c_model2_predict(val12.X) - rca_score = 1.0 - rcalib.get_score(val12_pred1, val12_pred2, val12.y) - val2_rca.append(rca_score) - val2_prot_preds.append(_preds) - val2_prot_y.append(v2.y) - except ValueError: - pass - - val_targets_acc = np.array( - [ - metrics.accuracy_score(v2_y, v2_preds) - for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds) - ] - ) - reg_acc = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_acc) - val_targets_f1 = np.array( - [ - metrics.f1_score(v2_y, v2_preds, average=f1_average) - for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds) - ] - ) - reg_f1 = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_f1) - - report = EvaluationReport(name="rca_star") - for test in protocol(): - try: - test_pred = c_model_predict(test.X) - c_model2 = clone_fit(c_model, test.X, test_pred) - c_model2_predict = getattr(c_model2, predict_method) - val12_pred2 = c_model2_predict(val12.X) - rca_star_score = 1.0 - rcalib.get_score(val12_pred1, val12_pred2, val12.y) - acc_score = reg_acc.predict(np.array([[rca_star_score]]))[0] - f1_score = reg_f1.predict(np.array([[rca_score]]))[0] - meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_pred)) - meta_f1 = abs( - f1_score - metrics.f1_score(test.y, test_pred, average=f1_average) - ) - report.append_row( - test.prevalence(), - acc=meta_acc, - acc_score=acc_score, - f1=meta_f1, - f1_score=f1_score, - ) - except ValueError: - report.append_row( - test.prevalence(), - acc=np.nan, - acc_score=np.nan, - f1=np.nan, - f1_score=np.nan, - ) - - return report - - @baseline def gde( c_model: BaseEstimator, @@ -514,33 +541,6 @@ def gde( return report -@baseline -def mandoline( - c_model: BaseEstimator, - validation: LabelledCollection, - protocol: AbstractStochasticSeededProtocol, - predict_method="predict_proba", -) -> EvaluationReport: - c_model_predict = getattr(c_model, predict_method) - - val_probs = c_model_predict(validation.X) - val_preds = np.argmax(val_probs, axis=1) - D_val = mandolib.get_slices(val_probs) - emprical_mat_list_val = (1.0 * (val_preds == validation.y))[:, np.newaxis] - - report = EvaluationReport(name="mandoline") - for test in protocol(): - test_probs = c_model_predict(test.X) - test_pred = np.argmax(test_probs, axis=1) - D_test = mandolib.get_slices(test_probs) - wp = mandolib.estimate_performance(D_val, D_test, None, emprical_mat_list_val) - score = wp.all_estimates[0].weighted[0] - meta_score = abs(score - metrics.accuracy_score(test.y, test_pred)) - report.append_row(test.prevalence(), acc=meta_score, acc_score=score) - - return report - - @baseline def logreg( c_model: BaseEstimator, diff --git a/quacc/evaluation/estimators.py b/quacc/evaluation/estimators.py index 6f5ff4d..d6dd81e 100644 --- a/quacc/evaluation/estimators.py +++ b/quacc/evaluation/estimators.py @@ -40,20 +40,19 @@ class CompEstimatorName_: class CompEstimator: def __get(cls, e: str | List[str], get_ref=True): - _dict = alt._alts | method._methods | baseline._baselines + _dict = alt._alts | baseline._baselines | method._methods - match e: - case "__all": - e = list(_dict.keys()) - case "__baselines": - e = list(baseline._baselines.keys()) + if isinstance(e, str) and e == "__all": + e = list(_dict.keys()) + if isinstance(e, str) and e == "__baselines": + e = list(baseline._baselines.keys()) if isinstance(e, str): try: return {e: _dict[e]} except KeyError: raise KeyError(f"Invalid estimator: estimator {e} does not exist") - elif isinstance(e, list): + elif isinstance(e, list) or isinstance(e, np.ndarray): _subtr = np.setdiff1d(e, list(_dict.keys())) if len(_subtr) > 0: raise KeyError( @@ -89,7 +88,7 @@ _renames = { "d_bin_sld_rbf": "(2x2)_SLD_RBF", "d_mul_sld_rbf": "(1x4)_SLD_RBF", "d_m3w_sld_rbf": "(1x3)_SLD_RBF", - "sld_lr": "SLD_LR", + "sld_lr_gs": "MS_SLD_LR", "bin_kde_lr": "(2x2)_KDEy_LR", "mul_kde_lr": "(1x4)_KDEy_LR", "m3w_kde_lr": "(1x3)_KDEy_LR", @@ -99,8 +98,8 @@ _renames = { "bin_cc_lr": "(2x2)_CC_LR", "mul_cc_lr": "(1x4)_CC_LR", "m3w_cc_lr": "(1x3)_CC_LR", - "kde_lr": "KDEy_LR", - "cc_lr": "CC_LR", + "kde_lr_gs": "MS_KDEy_LR", + "cc_lr_gs": "MS_CC_LR", "atc_mc": "ATC", "doc": "DoC", "mandoline": "Mandoline", diff --git a/quacc/evaluation/method.py b/quacc/evaluation/method.py index de1025b..79326be 100644 --- a/quacc/evaluation/method.py +++ b/quacc/evaluation/method.py @@ -242,12 +242,6 @@ __sld_lr_set = [ M("mul_sld_lr", __sld_lr(), "mul" ), M("m3w_sld_lr", __sld_lr(), "mul", cf=True), M("mgf_sld_lr", __sld_lr(), "mul", gf=True), - # max_conf + entropy sld - M("bin_sld_lr_c", __sld_lr(), "bin", conf=["max_conf", "entropy"] ), - M("bgf_sld_lr_c", __sld_lr(), "bin", conf=["max_conf", "entropy"], gf=True), - M("mul_sld_lr_c", __sld_lr(), "mul", conf=["max_conf", "entropy"] ), - M("m3w_sld_lr_c", __sld_lr(), "mul", conf=["max_conf", "entropy"], cf=True), - M("mgf_sld_lr_c", __sld_lr(), "mul", conf=["max_conf", "entropy"], gf=True), # max_conf sld M("bin_sld_lr_mc", __sld_lr(), "bin", conf="max_conf", ), M("bgf_sld_lr_mc", __sld_lr(), "bin", conf="max_conf", gf=True), @@ -266,6 +260,12 @@ __sld_lr_set = [ M("mul_sld_lr_is", __sld_lr(), "mul", conf="isoft", ), M("m3w_sld_lr_is", __sld_lr(), "mul", conf="isoft", cf=True), M("mgf_sld_lr_is", __sld_lr(), "mul", conf="isoft", gf=True), + # max_conf + entropy sld + M("bin_sld_lr_c", __sld_lr(), "bin", conf=["max_conf", "entropy"] ), + M("bgf_sld_lr_c", __sld_lr(), "bin", conf=["max_conf", "entropy"], gf=True), + M("mul_sld_lr_c", __sld_lr(), "mul", conf=["max_conf", "entropy"] ), + M("m3w_sld_lr_c", __sld_lr(), "mul", conf=["max_conf", "entropy"], cf=True), + M("mgf_sld_lr_c", __sld_lr(), "mul", conf=["max_conf", "entropy"], gf=True), # sld all M("bin_sld_lr_a", __sld_lr(), "bin", conf=["max_conf", "entropy", "isoft"], ), M("bgf_sld_lr_a", __sld_lr(), "bin", conf=["max_conf", "entropy", "isoft"], gf=True), @@ -278,7 +278,6 @@ __sld_lr_set = [ G("mul_sld_lr_gs", __sld_lr(), "mul", pg="sld_lr" ), G("m3w_sld_lr_gs", __sld_lr(), "mul", pg="sld_lr", cf=True), G("mgf_sld_lr_gs", __sld_lr(), "mul", pg="sld_lr", gf=True), - E("sld_lr_gs"), ] __dense_sld_lr_set = [ @@ -287,12 +286,6 @@ __dense_sld_lr_set = [ M("d_mul_sld_lr", __sld_lr(), "mul", d=True, ), M("d_m3w_sld_lr", __sld_lr(), "mul", d=True, cf=True), M("d_mgf_sld_lr", __sld_lr(), "mul", d=True, gf=True), - # max_conf + entropy sld - M("d_bin_sld_lr_c", __sld_lr(), "bin", d=True, conf=["max_conf", "entropy"] ), - M("d_bgf_sld_lr_c", __sld_lr(), "bin", d=True, conf=["max_conf", "entropy"], gf=True), - M("d_mul_sld_lr_c", __sld_lr(), "mul", d=True, conf=["max_conf", "entropy"] ), - M("d_m3w_sld_lr_c", __sld_lr(), "mul", d=True, conf=["max_conf", "entropy"], cf=True), - M("d_mgf_sld_lr_c", __sld_lr(), "mul", d=True, conf=["max_conf", "entropy"], gf=True), # max_conf sld M("d_bin_sld_lr_mc", __sld_lr(), "bin", d=True, conf="max_conf", ), M("d_bgf_sld_lr_mc", __sld_lr(), "bin", d=True, conf="max_conf", gf=True), @@ -311,6 +304,12 @@ __dense_sld_lr_set = [ M("d_mul_sld_lr_is", __sld_lr(), "mul", d=True, conf="isoft", ), M("d_m3w_sld_lr_is", __sld_lr(), "mul", d=True, conf="isoft", cf=True), M("d_mgf_sld_lr_is", __sld_lr(), "mul", d=True, conf="isoft", gf=True), + # max_conf + entropy sld + M("d_bin_sld_lr_c", __sld_lr(), "bin", d=True, conf=["max_conf", "entropy"] ), + M("d_bgf_sld_lr_c", __sld_lr(), "bin", d=True, conf=["max_conf", "entropy"], gf=True), + M("d_mul_sld_lr_c", __sld_lr(), "mul", d=True, conf=["max_conf", "entropy"] ), + M("d_m3w_sld_lr_c", __sld_lr(), "mul", d=True, conf=["max_conf", "entropy"], cf=True), + M("d_mgf_sld_lr_c", __sld_lr(), "mul", d=True, conf=["max_conf", "entropy"], gf=True), # sld all M("d_bin_sld_lr_a", __sld_lr(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], ), M("d_bgf_sld_lr_a", __sld_lr(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], gf=True), @@ -331,12 +330,6 @@ __dense_sld_rbf_set = [ M("d_mul_sld_rbf", __sld_rbf(), "mul", d=True, ), M("d_m3w_sld_rbf", __sld_rbf(), "mul", d=True, cf=True), M("d_mgf_sld_rbf", __sld_rbf(), "mul", d=True, gf=True), - # max_conf + entropy sld - M("d_bin_sld_rbf_c", __sld_rbf(), "bin", d=True, conf=["max_conf", "entropy"] ), - M("d_bgf_sld_rbf_c", __sld_rbf(), "bin", d=True, conf=["max_conf", "entropy"], gf=True), - M("d_mul_sld_rbf_c", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy"] ), - M("d_m3w_sld_rbf_c", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy"], cf=True), - M("d_mgf_sld_rbf_c", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy"], gf=True), # max_conf sld M("d_bin_sld_rbf_mc", __sld_rbf(), "bin", d=True, conf="max_conf", ), M("d_bgf_sld_rbf_mc", __sld_rbf(), "bin", d=True, conf="max_conf", gf=True), @@ -355,6 +348,12 @@ __dense_sld_rbf_set = [ M("d_mul_sld_rbf_is", __sld_rbf(), "mul", d=True, conf="isoft", ), M("d_m3w_sld_rbf_is", __sld_rbf(), "mul", d=True, conf="isoft", cf=True), M("d_mgf_sld_rbf_is", __sld_rbf(), "mul", d=True, conf="isoft", gf=True), + # max_conf + entropy sld + M("d_bin_sld_rbf_c", __sld_rbf(), "bin", d=True, conf=["max_conf", "entropy"] ), + M("d_bgf_sld_rbf_c", __sld_rbf(), "bin", d=True, conf=["max_conf", "entropy"], gf=True), + M("d_mul_sld_rbf_c", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy"] ), + M("d_m3w_sld_rbf_c", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy"], cf=True), + M("d_mgf_sld_rbf_c", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy"], gf=True), # sld all M("d_bin_sld_rbf_a", __sld_rbf(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], ), M("d_bgf_sld_rbf_a", __sld_rbf(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], gf=True), @@ -362,11 +361,11 @@ __dense_sld_rbf_set = [ M("d_m3w_sld_rbf_a", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], cf=True), M("d_mgf_sld_rbf_a", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], gf=True), # gs sld - G("d_bin_sld_rbf_gs", __sld_rbf(), "bin", d=True, pg="sld_rbf", search="spider", ), - G("d_bgf_sld_rbf_gs", __sld_rbf(), "bin", d=True, pg="sld_rbf", search="spider", gf=True), - G("d_mul_sld_rbf_gs", __sld_rbf(), "mul", d=True, pg="sld_rbf", search="spider", ), - G("d_m3w_sld_rbf_gs", __sld_rbf(), "mul", d=True, pg="sld_rbf", search="spider", cf=True), - G("d_mgf_sld_rbf_gs", __sld_rbf(), "mul", d=True, pg="sld_rbf", search="spider", gf=True), + G("d_bin_sld_rbf_gs", __sld_rbf(), "bin", d=True, pg="sld_rbf", search="grid", ), + G("d_bgf_sld_rbf_gs", __sld_rbf(), "bin", d=True, pg="sld_rbf", search="grid", gf=True), + G("d_mul_sld_rbf_gs", __sld_rbf(), "mul", d=True, pg="sld_rbf", search="grid", ), + G("d_m3w_sld_rbf_gs", __sld_rbf(), "mul", d=True, pg="sld_rbf", search="grid", cf=True), + G("d_mgf_sld_rbf_gs", __sld_rbf(), "mul", d=True, pg="sld_rbf", search="grid", gf=True), ] __kde_lr_set = [ @@ -374,10 +373,6 @@ __kde_lr_set = [ M("bin_kde_lr", __kde_lr(), "bin" ), M("mul_kde_lr", __kde_lr(), "mul" ), M("m3w_kde_lr", __kde_lr(), "mul", cf=True), - # max_conf + entropy kde - M("bin_kde_lr_c", __kde_lr(), "bin", conf=["max_conf", "entropy"] ), - M("mul_kde_lr_c", __kde_lr(), "mul", conf=["max_conf", "entropy"] ), - M("m3w_kde_lr_c", __kde_lr(), "mul", conf=["max_conf", "entropy"], cf=True), # max_conf kde M("bin_kde_lr_mc", __kde_lr(), "bin", conf="max_conf", ), M("mul_kde_lr_mc", __kde_lr(), "mul", conf="max_conf", ), @@ -390,6 +385,10 @@ __kde_lr_set = [ M("bin_kde_lr_is", __kde_lr(), "bin", conf="isoft", ), M("mul_kde_lr_is", __kde_lr(), "mul", conf="isoft", ), M("m3w_kde_lr_is", __kde_lr(), "mul", conf="isoft", cf=True), + # max_conf + entropy kde + M("bin_kde_lr_c", __kde_lr(), "bin", conf=["max_conf", "entropy"] ), + M("mul_kde_lr_c", __kde_lr(), "mul", conf=["max_conf", "entropy"] ), + M("m3w_kde_lr_c", __kde_lr(), "mul", conf=["max_conf", "entropy"], cf=True), # kde all M("bin_kde_lr_a", __kde_lr(), "bin", conf=["max_conf", "entropy", "isoft"], ), M("mul_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ), @@ -398,7 +397,6 @@ __kde_lr_set = [ G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="grid" ), G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid" ), G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid", cf=True), - E("kde_lr_gs"), ] __dense_kde_lr_set = [ @@ -406,10 +404,6 @@ __dense_kde_lr_set = [ M("d_bin_kde_lr", __kde_lr(), "bin", d=True, ), M("d_mul_kde_lr", __kde_lr(), "mul", d=True, ), M("d_m3w_kde_lr", __kde_lr(), "mul", d=True, cf=True), - # max_conf + entropy kde - M("d_bin_kde_lr_c", __kde_lr(), "bin", d=True, conf=["max_conf", "entropy"] ), - M("d_mul_kde_lr_c", __kde_lr(), "mul", d=True, conf=["max_conf", "entropy"] ), - M("d_m3w_kde_lr_c", __kde_lr(), "mul", d=True, conf=["max_conf", "entropy"], cf=True), # max_conf kde M("d_bin_kde_lr_mc", __kde_lr(), "bin", d=True, conf="max_conf", ), M("d_mul_kde_lr_mc", __kde_lr(), "mul", d=True, conf="max_conf", ), @@ -422,14 +416,18 @@ __dense_kde_lr_set = [ M("d_bin_kde_lr_is", __kde_lr(), "bin", d=True, conf="isoft", ), M("d_mul_kde_lr_is", __kde_lr(), "mul", d=True, conf="isoft", ), M("d_m3w_kde_lr_is", __kde_lr(), "mul", d=True, conf="isoft", cf=True), + # max_conf + entropy kde + M("d_bin_kde_lr_c", __kde_lr(), "bin", d=True, conf=["max_conf", "entropy"] ), + M("d_mul_kde_lr_c", __kde_lr(), "mul", d=True, conf=["max_conf", "entropy"] ), + M("d_m3w_kde_lr_c", __kde_lr(), "mul", d=True, conf=["max_conf", "entropy"], cf=True), # kde all M("d_bin_kde_lr_a", __kde_lr(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], ), M("d_mul_kde_lr_a", __kde_lr(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], ), M("d_m3w_kde_lr_a", __kde_lr(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], cf=True), # gs kde - G("d_bin_kde_lr_gs", __kde_lr(), "bin", d=True, pg="kde_lr", search="spider" ), - G("d_mul_kde_lr_gs", __kde_lr(), "mul", d=True, pg="kde_lr", search="spider" ), - G("d_m3w_kde_lr_gs", __kde_lr(), "mul", d=True, pg="kde_lr", search="spider", cf=True), + G("d_bin_kde_lr_gs", __kde_lr(), "bin", d=True, pg="kde_lr", search="grid" ), + G("d_mul_kde_lr_gs", __kde_lr(), "mul", d=True, pg="kde_lr", search="grid" ), + G("d_m3w_kde_lr_gs", __kde_lr(), "mul", d=True, pg="kde_lr", search="grid", cf=True), ] __dense_kde_rbf_set = [ @@ -437,10 +435,6 @@ __dense_kde_rbf_set = [ M("d_bin_kde_rbf", __kde_rbf(), "bin", d=True, ), M("d_mul_kde_rbf", __kde_rbf(), "mul", d=True, ), M("d_m3w_kde_rbf", __kde_rbf(), "mul", d=True, cf=True), - # max_conf + entropy kde - M("d_bin_kde_rbf_c", __kde_rbf(), "bin", d=True, conf=["max_conf", "entropy"] ), - M("d_mul_kde_rbf_c", __kde_rbf(), "mul", d=True, conf=["max_conf", "entropy"] ), - M("d_m3w_kde_rbf_c", __kde_rbf(), "mul", d=True, conf=["max_conf", "entropy"], cf=True), # max_conf kde M("d_bin_kde_rbf_mc", __kde_rbf(), "bin", d=True, conf="max_conf", ), M("d_mul_kde_rbf_mc", __kde_rbf(), "mul", d=True, conf="max_conf", ), @@ -453,6 +447,10 @@ __dense_kde_rbf_set = [ M("d_bin_kde_rbf_is", __kde_rbf(), "bin", d=True, conf="isoft", ), M("d_mul_kde_rbf_is", __kde_rbf(), "mul", d=True, conf="isoft", ), M("d_m3w_kde_rbf_is", __kde_rbf(), "mul", d=True, conf="isoft", cf=True), + # max_conf + entropy kde + M("d_bin_kde_rbf_c", __kde_rbf(), "bin", d=True, conf=["max_conf", "entropy"] ), + M("d_mul_kde_rbf_c", __kde_rbf(), "mul", d=True, conf=["max_conf", "entropy"] ), + M("d_m3w_kde_rbf_c", __kde_rbf(), "mul", d=True, conf=["max_conf", "entropy"], cf=True), # kde all M("d_bin_kde_rbf_a", __kde_rbf(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], ), M("d_mul_kde_rbf_a", __kde_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], ), @@ -468,10 +466,6 @@ __cc_lr_set = [ M("bin_cc_lr", __cc_lr(), "bin" ), M("mul_cc_lr", __cc_lr(), "mul" ), M("m3w_cc_lr", __cc_lr(), "mul", cf=True), - # max_conf + entropy cc - M("bin_cc_lr_c", __cc_lr(), "bin", conf=["max_conf", "entropy"] ), - M("mul_cc_lr_c", __cc_lr(), "mul", conf=["max_conf", "entropy"] ), - M("m3w_cc_lr_c", __cc_lr(), "mul", conf=["max_conf", "entropy"], cf=True), # max_conf cc M("bin_cc_lr_mc", __cc_lr(), "bin", conf="max_conf", ), M("mul_cc_lr_mc", __cc_lr(), "mul", conf="max_conf", ), @@ -484,6 +478,10 @@ __cc_lr_set = [ M("bin_cc_lr_is", __cc_lr(), "bin", conf="isoft", ), M("mul_cc_lr_is", __cc_lr(), "mul", conf="isoft", ), M("m3w_cc_lr_is", __cc_lr(), "mul", conf="isoft", cf=True), + # max_conf + entropy cc + M("bin_cc_lr_c", __cc_lr(), "bin", conf=["max_conf", "entropy"] ), + M("mul_cc_lr_c", __cc_lr(), "mul", conf=["max_conf", "entropy"] ), + M("m3w_cc_lr_c", __cc_lr(), "mul", conf=["max_conf", "entropy"], cf=True), # cc all M("bin_cc_lr_a", __cc_lr(), "bin", conf=["max_conf", "entropy", "isoft"], ), M("mul_cc_lr_a", __cc_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ), @@ -492,7 +490,13 @@ __cc_lr_set = [ G("bin_cc_lr_gs", __cc_lr(), "bin", pg="cc_lr", search="grid" ), G("mul_cc_lr_gs", __cc_lr(), "mul", pg="cc_lr", search="grid" ), G("m3w_cc_lr_gs", __cc_lr(), "mul", pg="cc_lr", search="grid", cf=True), +] + +__ms_set = [ + E("sld_lr_gs"), + E("kde_lr_gs"), E("cc_lr_gs"), + E("QuAcc"), ] # fmt: on @@ -505,7 +509,7 @@ __methods_set = ( + __dense_kde_lr_set + __dense_kde_rbf_set + __cc_lr_set - + [E("QuAcc")] + + __ms_set ) _methods = {m.name: m for m in __methods_set} diff --git a/quacc/evaluation/report.py b/quacc/evaluation/report.py index 20df414..f078296 100644 --- a/quacc/evaluation/report.py +++ b/quacc/evaluation/report.py @@ -159,10 +159,10 @@ class CompReport: if estimators is not None and name not in estimators: continue - if len(np.where(np.in1d(methods, self._data.columns.unique(1)))[0]) != len( - methods - ): + available_idx = np.where(np.in1d(methods, self._data.columns.unique(1)))[0] + if len(available_idx) == 0: continue + methods = np.array(methods)[available_idx] _metric = _get_metric(metric) m_data = _data.loc[:, (_metric, methods)] diff --git a/quacc/plot/plotly.py b/quacc/plot/plotly.py index 9a62f22..900be13 100644 --- a/quacc/plot/plotly.py +++ b/quacc/plot/plotly.py @@ -5,7 +5,7 @@ import numpy as np import plotly import plotly.graph_objects as go -from quacc.evaluation.estimators import _renames +from quacc.evaluation.estimators import CE, _renames from quacc.plot.base import BasePlot @@ -19,7 +19,7 @@ class PlotCfg: web_cfg = PlotCfg("lines+markers", 2) -png_cfg = PlotCfg( +png_cfg_old = PlotCfg( "lines", 5, legend=dict( @@ -33,6 +33,18 @@ png_cfg = PlotCfg( font=dict(size=24), # template="ggplot2", ) +png_cfg = PlotCfg( + "lines", + 5, + legend=dict( + font=dict( + family="DejaVu Sans", + size=24, + ), + ), + font=dict(size=24), + # template="ggplot2", +) _cfg = png_cfg @@ -122,16 +134,21 @@ class PlotlyPlot(BasePlot): if isinstance(base_prevs[0], float): base_prevs = np.around([(1 - bp, bp) for bp in base_prevs], decimals=4) x = [str(tuple(bp)) for bp in base_prevs] - columns = self.rename_plots(columns) + named_data = {c: d for c, d in zip(columns, data)} + r_columns = {c: r for c, r in zip(columns, self.rename_plots(columns))} line_colors = self.get_colors(len(columns)) - for name, delta in zip(columns, data): + # for name, delta in zip(columns, data): + columns = np.array(CE.name.sort(columns)) + for name in columns: + delta = named_data[name] + r_name = r_columns[name] color = next(line_colors) _line = [ go.Scatter( x=x, y=delta, mode=_cfg.mode, - name=name, + name=r_name, line=dict(color=self.hex_to_rgb(color), width=_cfg.lwidth), hovertemplate="prev.: %{x}
error: %{y:,.4f}", ) @@ -171,13 +188,17 @@ class PlotlyPlot(BasePlot): ) -> go.Figure: fig = go.Figure() x = reference - columns = self.rename_plots(columns) line_colors = self.get_colors(len(columns)) _edges = (np.min([np.min(x), np.min(data)]), np.max([np.max(x), np.max(data)])) _lims = np.array([[_edges[0], _edges[1]], [_edges[0], _edges[1]]]) - for name, val in zip(columns, data): + named_data = {c: d for c, d in zip(columns, data)} + r_columns = {c: r for c, r in zip(columns, self.rename_plots(columns))} + columns = np.array(CE.name.sort(columns)) + for name in columns: + val = named_data[name] + r_name = r_columns[name] color = next(line_colors) slope, interc = np.polyfit(x, val, 1) y_lr = np.array([slope * _x + interc for _x in _lims[0]]) @@ -188,7 +209,7 @@ class PlotlyPlot(BasePlot): y=val, customdata=np.stack((val - x,), axis=-1), mode="markers", - name=name, + name=r_name, line=dict(color=self.hex_to_rgb(color, t=0.5)), hovertemplate="true acc: %{x:,.4f}
estim. acc: %{y:,.4f}
acc err.: %{customdata[0]:,.4f}", ), @@ -233,9 +254,13 @@ class PlotlyPlot(BasePlot): fig = go.Figure() # x = shift_prevs[:, pos_class] x = shift_prevs - columns = self.rename_plots(columns) line_colors = self.get_colors(len(columns)) - for name, delta in zip(columns, data): + named_data = {c: d for c, d in zip(columns, data)} + r_columns = {c: r for c, r in zip(columns, self.rename_plots(columns))} + columns = np.array(CE.name.sort(columns)) + for name in columns: + delta = named_data[name] + r_name = r_columns[name] col_idx = (columns == name).nonzero()[0][0] color = next(line_colors) fig.add_trace( @@ -244,7 +269,7 @@ class PlotlyPlot(BasePlot): y=delta, customdata=np.stack((counts[col_idx],), axis=-1), mode=_cfg.mode, - name=name, + name=r_name, line=dict(color=self.hex_to_rgb(color), width=_cfg.lwidth), hovertemplate="shift: %{x}
error: %{y}" + "
count: %{customdata[0]}" diff --git a/test_imdb_max_shift.py b/test_imdb_max_shift.py new file mode 100644 index 0000000..e3a59de --- /dev/null +++ b/test_imdb_max_shift.py @@ -0,0 +1,15 @@ +from quacc.evaluation.report import DatasetReport +import pandas as pd + +dr = DatasetReport.unpickle("output/main/imdb/imdb.pickle") + +_data = dr.data( + metric="acc", estimators=["bin_sld_lr_mc", "bin_sld_lr_ne", "bin_sld_lr_c"] +) +d1 = _data.loc[((0.9, 0.1), (1.0, 0.0), slice(None)), :] +d2 = _data.loc[((0.1, 0.9), (0.0, 1.0), slice(None)), :] +dd = pd.concat([d1, d2], axis=0) + +print(d1.to_numpy(), "\n", d1.mean(), "\n") +print(d2.to_numpy(), "\n", d2.mean(), "\n") +print(dd.to_numpy(), "\n", dd.mean(), "\n") From 4c6a0342ffa83bb783b71c3a11f3d563823ea31c Mon Sep 17 00:00:00 2001 From: Lorenzo Volpi Date: Thu, 7 Mar 2024 19:33:32 +0100 Subject: [PATCH 3/3] tesi updated --- TODO.md | 3 +++ quacc/evaluation/estimators.py | 9 ++++--- quacc/evaluation/method.py | 2 +- quacc/evaluation/report.py | 20 ++++++++++++++ quacc/plot/base.py | 1 + quacc/plot/plot.py | 2 ++ quacc/plot/plotly.py | 48 +++++++++++++++++++++++++--------- 7 files changed, 68 insertions(+), 17 deletions(-) diff --git a/TODO.md b/TODO.md index 154ff00..d559f56 100644 --- a/TODO.md +++ b/TODO.md @@ -61,3 +61,6 @@ multiclass: - [x] aggiungere supporto a multiclass in dataset.py - [x] aggiungere group_false in ExtensionPolicy - [ ] modificare BQAE in modo che i quantifier si adattino alla casistica(binary/multi in base a group_false) + +fix: +- [ ] make quantifiers predict 0 prevalence for classes for which we have 0 samples diff --git a/quacc/evaluation/estimators.py b/quacc/evaluation/estimators.py index d6dd81e..476584b 100644 --- a/quacc/evaluation/estimators.py +++ b/quacc/evaluation/estimators.py @@ -88,7 +88,8 @@ _renames = { "d_bin_sld_rbf": "(2x2)_SLD_RBF", "d_mul_sld_rbf": "(1x4)_SLD_RBF", "d_m3w_sld_rbf": "(1x3)_SLD_RBF", - "sld_lr_gs": "MS_SLD_LR", + # "sld_lr_gs": "MS_SLD_LR", + "sld_lr_gs": "QuAcc(SLD)", "bin_kde_lr": "(2x2)_KDEy_LR", "mul_kde_lr": "(1x4)_KDEy_LR", "m3w_kde_lr": "(1x3)_KDEy_LR", @@ -98,8 +99,10 @@ _renames = { "bin_cc_lr": "(2x2)_CC_LR", "mul_cc_lr": "(1x4)_CC_LR", "m3w_cc_lr": "(1x3)_CC_LR", - "kde_lr_gs": "MS_KDEy_LR", - "cc_lr_gs": "MS_CC_LR", + # "kde_lr_gs": "MS_KDEy_LR", + "kde_lr_gs": "QuAcc(KDEy)", + # "cc_lr_gs": "MS_CC_LR", + "cc_lr_gs": "QuAcc(CC)", "atc_mc": "ATC", "doc": "DoC", "mandoline": "Mandoline", diff --git a/quacc/evaluation/method.py b/quacc/evaluation/method.py index 79326be..3b0af41 100644 --- a/quacc/evaluation/method.py +++ b/quacc/evaluation/method.py @@ -493,9 +493,9 @@ __cc_lr_set = [ ] __ms_set = [ + E("cc_lr_gs"), E("sld_lr_gs"), E("kde_lr_gs"), - E("cc_lr_gs"), E("QuAcc"), ] diff --git a/quacc/evaluation/report.py b/quacc/evaluation/report.py index f078296..a6ba71b 100644 --- a/quacc/evaluation/report.py +++ b/quacc/evaluation/report.py @@ -447,6 +447,7 @@ class DatasetReport: "delta_test", "stdev_test", "test_table", + "diagonal", "stats_table", "fit_scores", ] @@ -745,6 +746,25 @@ class DatasetReport: base_path=base_path, backend=backend, ) + elif mode == "diagonal": + f_data = self.data(metric=metric + "_score", estimators=estimators) + if f_data.empty: + return None + + ref: pd.Series = f_data.loc[:, "ref"] + f_data.drop(columns=["ref"], inplace=True) + return plot.plot_diagonal( + reference=ref.to_numpy(), + columns=f_data.columns.to_numpy(), + data=f_data.T.to_numpy(), + metric=metric, + name=conf, + # train_prev=self.train_prev, + fixed_lim=True, + save_fig=save_fig, + base_path=base_path, + backend=backend, + ) def to_md( self, diff --git a/quacc/plot/base.py b/quacc/plot/base.py index 04e9e8c..a44b219 100644 --- a/quacc/plot/base.py +++ b/quacc/plot/base.py @@ -17,6 +17,7 @@ class BasePlot: title="default", x_label="true", y_label="estim.", + fixed_lim=False, legend=True, ): ... diff --git a/quacc/plot/plot.py b/quacc/plot/plot.py index fa7c082..3eebf1e 100644 --- a/quacc/plot/plot.py +++ b/quacc/plot/plot.py @@ -77,6 +77,7 @@ def plot_diagonal( metric="acc", name="default", train_prev=None, + fixed_lim=False, legend=True, save_fig=False, base_path=None, @@ -103,6 +104,7 @@ def plot_diagonal( title=title, x_label=x_label, y_label=y_label, + fixed_lim=fixed_lim, legend=legend, ) diff --git a/quacc/plot/plotly.py b/quacc/plot/plotly.py index 900be13..52a514d 100644 --- a/quacc/plot/plotly.py +++ b/quacc/plot/plotly.py @@ -184,14 +184,21 @@ class PlotlyPlot(BasePlot): title="default", x_label="true", y_label="estim.", + fixed_lim=False, legend=True, ) -> go.Figure: fig = go.Figure() x = reference line_colors = self.get_colors(len(columns)) - _edges = (np.min([np.min(x), np.min(data)]), np.max([np.max(x), np.max(data)])) - _lims = np.array([[_edges[0], _edges[1]], [_edges[0], _edges[1]]]) + if fixed_lim: + _lims = np.array([[0.0, 1.0], [0.0, 1.0]]) + else: + _edges = ( + np.min([np.min(x), np.min(data)]), + np.max([np.max(x), np.max(data)]), + ) + _lims = np.array([[_edges[0], _edges[1]], [_edges[0], _edges[1]]]) named_data = {c: d for c, d in zip(columns, data)} r_columns = {c: r for c, r in zip(columns, self.rename_plots(columns))} @@ -201,7 +208,7 @@ class PlotlyPlot(BasePlot): r_name = r_columns[name] color = next(line_colors) slope, interc = np.polyfit(x, val, 1) - y_lr = np.array([slope * _x + interc for _x in _lims[0]]) + # y_lr = np.array([slope * _x + interc for _x in _lims[0]]) fig.add_traces( [ go.Scatter( @@ -210,17 +217,25 @@ class PlotlyPlot(BasePlot): customdata=np.stack((val - x,), axis=-1), mode="markers", name=r_name, - line=dict(color=self.hex_to_rgb(color, t=0.5)), + marker=dict(color=self.hex_to_rgb(color, t=0.5)), hovertemplate="true acc: %{x:,.4f}
estim. acc: %{y:,.4f}
acc err.: %{customdata[0]:,.4f}", + # showlegend=False, ), - go.Scatter( - x=_lims[0], - y=y_lr, - mode="lines", - name=name, - line=dict(color=self.hex_to_rgb(color), width=3), - showlegend=False, - ), + # go.Scatter( + # x=[x[-1]], + # y=[val[-1]], + # mode="markers", + # marker=dict(color=self.hex_to_rgb(color), size=8), + # name=r_name, + # ), + # go.Scatter( + # x=_lims[0], + # y=y_lr, + # mode="lines", + # name=name, + # line=dict(color=self.hex_to_rgb(color), width=3), + # showlegend=False, + # ), ] ) fig.add_trace( @@ -235,7 +250,14 @@ class PlotlyPlot(BasePlot): ) self.update_layout(fig, title, x_label, y_label) - fig.update_layout(yaxis_scaleanchor="x", yaxis_scaleratio=1.0) + fig.update_layout( + autosize=False, + width=1300, + height=1000, + yaxis_scaleanchor="x", + yaxis_scaleratio=1.0, + yaxis_range=[-0.1, 1.1], + ) return fig def plot_shift(