update

2024-01-30 13:56:17 +01:00 · 2024-01-30 13:56:17 +01:00 · 2d8d4c3c68
parent 6bf2fb9e1b
commit 2d8d4c3c68
26 changed files with 11884 additions and 622 deletions
--- a/accuracy_prediction_via_quantification.py
+++ b/accuracy_prediction_via_quantification.py
@ -0,0 +1,90 @@
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import f1_score
+
+import quapy as qp
+from method.kdey import KDEyML, KDEyCS, KDEyHD
+from quapy.protocol import APP
+from quapy.method.aggregative import PACC, ACC, EMQ, PCC, CC, DMy
+
+datasets = qp.datasets.UCI_DATASETS
+
+# target = 'f1'
+target = 'acc'
+
+errors = []
+
+# dataset_name = datasets[-2]
+for dataset_name in datasets:
+    if dataset_name in ['balance.2', 'acute.a', 'acute.b', 'iris.1']:
+        continue
+    train, test = qp.datasets.fetch_UCIDataset(dataset_name).train_test
+
+    print(f'dataset name = {dataset_name}')
+    print(f'#train = {len(train)}')
+    print(f'#test = {len(test)}')
+
+    cls = LogisticRegression()
+
+    train, val = train.split_stratified(random_state=0)
+
+
+    cls.fit(*train.Xy)
+    y_val = val.labels
+    y_hat_val = cls.predict(val.instances)
+
+    for sample in APP(test, n_prevalences=11, repeats=1, sample_size=100, return_type='labelled_collection')():
+        print('='*80)
+        y_hat = cls.predict(sample.instances)
+        y = sample.labels
+        if target == 'acc':
+            acc = (y_hat==y).mean()
+        else:
+            acc = f1_score(y, y_hat, zero_division=0)
+
+        q = EMQ(cls)
+        q.fit(train, fit_classifier=False)
+
+        # q = EMQ(cls)
+        # q.fit(train, val_split=val, fit_classifier=False)
+        M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
+        M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
+        p_hat = q.quantify(sample.instances)
+        cont_table_hat = p_hat * M_hat
+
+        tp = cont_table_hat[1,1]
+        tn = cont_table_hat[0,0]
+        fn = cont_table_hat[0,1]
+        fp = cont_table_hat[1,0]
+
+        if target == 'acc':
+            acc_hat = (tp+tn)
+        else:
+            den = (2*tp + fn + fp)
+            if den > 0:
+                acc_hat = 2*tp / den
+            else:
+                acc_hat = 0
+
+        error = abs(acc - acc_hat)
+        errors.append(error)
+
+        print('true_prev: ', sample.prevalence())
+        print('estim_prev: ', p_hat)
+        print('M-true:\n', M_true)
+        print('M-hat:\n', M_hat)
+        print('cont_table:\n', cont_table_hat)
+        print(f'classifier accuracy={acc:.3f}')
+        print(f'estimated accuracy={acc_hat:.3f}')
+        print(f'estimation error={error:.4f}')
+
+print('process end')
+print('='*80)
+print(f'mean error = {np.mean(errors)}')
+print(f'std error = {np.std(errors)}')
+
+
+
+
+
+
--- a/accuracy_prediction_via_quantification2.py
+++ b/accuracy_prediction_via_quantification2.py
@ -0,0 +1,269 @@
+import numpy as np
+import scipy.special
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import f1_score
+
+import quapy as qp
+from quapy.protocol import APP
+from quapy.method.aggregative import PACC, ACC, EMQ, PCC, CC, DMy, T50, MS2, KDEyML, KDEyCS, KDEyHD
+from sklearn import clone
+import quapy.functional as F
+
+# datasets = qp.datasets.UCI_DATASETS
+datasets = ['imdb']
+
+# target = 'f1'
+target = 'acc'
+
+errors = []
+
+def method_1(cls, train, val, sample, y=None, y_hat=None):
+    """
+    Converts a misclassification matrix computed in validation (i.e., in the train distribution P) into
+    the corresponding equivalent misclassification matrix in test (i.e., in the test distribution Q)
+    by relying on the PPS assumptions.
+
+    :return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
+    """
+
+    y_val = val.labels
+    y_hat_val = cls.predict(val.instances)
+
+    # q = EMQ(LogisticRegression(class_weight='balanced'))
+    # q.fit(val, fit_classifier=True)
+    q = EMQ(cls)
+    q.fit(train, fit_classifier=False)
+
+
+    # q = KDEyML(cls)
+    # q.fit(train, val_split=val, fit_classifier=False)
+    M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
+    M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
+    p_hat = q.quantify(sample.instances)
+    cont_table_hat = p_hat * M_hat
+    # cont_table_hat = np.clip(cont_table_hat, 0, 1)
+    # cont_table_hat = cont_table_hat / cont_table_hat.sum()
+
+    print('true_prev: ', sample.prevalence())
+    print('estim_prev: ', p_hat)
+    print('M-true:\n', M_true)
+    print('M-hat:\n', M_hat)
+    print('cont_table:\n', cont_table_hat)
+    print('cont_table Sum :\n', cont_table_hat.sum())
+
+    tp = cont_table_hat[1, 1]
+    tn = cont_table_hat[0, 0]
+    fn = cont_table_hat[0, 1]
+    fp = cont_table_hat[1, 0]
+
+    return tn, fn, fp, tp
+
+
+def method_2(cls, train, val, sample, y=None, y_hat=None):
+    """
+    Assume P and Q are the training and test distributions
+    Solves the following system of linear equations:
+    tp + fp = CC (the classify & count estimate, observed)
+    fn + tp = Q(Y=1) (this is not observed but is estimated via quantification)
+    tp + fp + fn + tn = 1 (trivial)
+
+    There are 4 unknowns and 3 equations. The fourth required one is established
+    by assuming that the PPS conditions hold, i.e., that P(X|Y)=Q(X|Y); note that
+    this implies P(hatY|Y)=Q(hatY|Y) if hatY is computed by any measurable function.
+    In particular, we consider that the tpr in P (estimated via validation, hereafter tpr) and
+    in Q (unknown, hereafter tpr_Q) should
+    be the same. This means:
+    tpr = tpr_Q = tp / (tp + fn)
+    after some manipulation:
+    tp (tpr-1) + fn (tpr) = 0 <-- our last equation
+
+    Note that the last equation relies on the estimate tpr. It is likely that, the more
+    positives we have, the more reliable this estimate is. This suggests that, in cases
+    in which we have more negatives in the validation set than positives, it might be
+    convenient to resort to the true negative rate (tnr) instead. This gives rise to
+    the alternative fourth equation:
+    tn (tnr-1) + fp (tnr) = 0
+
+    :return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
+    """
+
+    y_val = val.labels
+    y_hat_val = cls.predict(val.instances)
+
+    q = ACC(cls)
+    q.fit(train, val_split=val, fit_classifier=False)
+    p_hat = q.quantify(sample.instances)
+    pos_prev = p_hat[1]
+    # pos_prev = sample.prevalence()[1]
+
+    cc = CC(cls)
+    cc.fit(train, fit_classifier=False)
+    cc_prev = cc.quantify(sample.instances)[1]
+
+    M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
+    M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
+    cont_table_true = sample.prevalence() * M_true
+
+    if val.prevalence()[1] > 0.5:
+
+        # in this case, the tpr might be a more reliable estimate than tnr
+        tpr_hat = M_hat[1, 1]
+
+        A = np.asarray([
+            [0, 0, 1, 1],
+            [0, 1, 0, 1],
+            [1, 1, 1, 1],
+            [0, tpr_hat, 0, tpr_hat - 1]
+        ])
+
+    else:
+
+        # in this case, the tnr might be a more reliable estimate than tpr
+        tnr_hat = M_hat[0, 0]
+
+        A = np.asarray([
+            [0, 0, 1, 1],
+            [0, 1, 0, 1],
+            [1, 1, 1, 1],
+            [tnr_hat-1, 0, tnr_hat, 0]
+        ])
+
+    b = np.asarray(
+        [cc_prev, pos_prev, 1, 0]
+    )
+
+    tn, fn, fp, tp = np.linalg.solve(A, b)
+
+    cont_table_estim = np.asarray([
+        [tn, fn],
+        [fp, tp]
+    ])
+
+    # if (cont_table_estim < 0).any() or (cont_table_estim>1).any():
+    #     cont_table_estim = scipy.special.softmax(cont_table_estim)
+
+    print('true_prev: ', sample.prevalence())
+    print('estim_prev: ', p_hat)
+    print('true_cont_table:\n', cont_table_true)
+    print('estim_cont_table:\n', cont_table_estim)
+    # print('true_tpr', M_true[1,1])
+    # print('estim_tpr', tpr_hat)
+
+
+    return tn, fn, fp, tp
+
+
+def method_3(cls, train, val, sample, y=None, y_hat=None):
+    """
+    This is just method 2 but without involving any quapy's quantifier.
+
+    :return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
+    """
+
+    classes = val.classes_
+    y_val = val.labels
+    y_hat_val = cls.predict(val.instances)
+    M_hat = ACC.getPteCondEstim(classes, y_val, y_hat_val)
+    y_hat_test = cls.predict(sample.instances)
+    pos_prev_cc = F.prevalence_from_labels(y_hat_test, classes)[1]
+    tpr_hat = M_hat[1,1]
+    fpr_hat = M_hat[1,0]
+    tnr_hat = M_hat[0,0]
+    pos_prev_test_hat = (pos_prev_cc - fpr_hat) / (tpr_hat - fpr_hat)
+    pos_prev_test_hat = np.clip(pos_prev_test_hat, 0, 1)
+    pos_prev_val = val.prevalence()[1]
+
+    if pos_prev_val > 0.5:
+        # in this case, the tpr might be a more reliable estimate than tnr
+        A = np.asarray([
+            [0, 0, 1, 1],
+            [0, 1, 0, 1],
+            [1, 1, 1, 1],
+            [0, tpr_hat, 0, tpr_hat - 1]
+        ])
+    else:
+        # in this case, the tnr might be a more reliable estimate than tpr
+        A = np.asarray([
+            [0, 0, 1, 1],
+            [0, 1, 0, 1],
+            [1, 1, 1, 1],
+            [tnr_hat-1, 0, tnr_hat, 0]
+        ])
+
+    b = np.asarray(
+        [pos_prev_cc, pos_prev_test_hat, 1, 0]
+    )
+
+    tn, fn, fp, tp = np.linalg.solve(A, b)
+
+    return tn, fn, fp, tp
+
+
+def cls_eval_from_counters(tn, fn, fp, tp):
+    if target == 'acc':
+        acc_hat = (tp + tn)
+    else:
+        den = (2 * tp + fn + fp)
+        if den > 0:
+            acc_hat = 2 * tp / den
+        else:
+            acc_hat = 0
+    return acc_hat
+
+
+def cls_eval_from_labels(y, y_hat):
+    if target == 'acc':
+        acc = (y_hat == y).mean()
+    else:
+        acc = f1_score(y, y_hat, zero_division=0)
+    return acc
+
+
+for dataset_name in datasets:
+
+    train_orig, test = qp.datasets.fetch_reviews(dataset_name, tfidf=True, min_df=10).train_test
+
+    train_prot = APP(train_orig, n_prevalences=11, repeats=1, return_type='labelled_collection', random_state=0, sample_size=10000)
+    for train in train_prot():
+        if np.product(train.prevalence()) == 0:
+            # skip experiments with no positives or no negatives in training
+            continue
+
+        cls = LogisticRegression(class_weight='balanced')
+
+        train, val = train.split_stratified(train_prop=0.5, random_state=0)
+
+        print(f'dataset name = {dataset_name}')
+        print(f'#train = {len(train)}, prev={F.strprev(train.prevalence())}')
+        print(f'#val = {len(val)}, prev={F.strprev(val.prevalence())}')
+        print(f'#test = {len(test)}, prev={F.strprev(test.prevalence())}')
+
+        cls.fit(*train.Xy)
+
+        for sample in APP(test, n_prevalences=21, repeats=10, sample_size=1000, return_type='labelled_collection')():
+            print('='*80)
+            y_hat = cls.predict(sample.instances)
+            y = sample.labels
+            acc_true = cls_eval_from_labels(y, y_hat)
+
+            tn, fn, fp, tp = method_3(cls, train, val, sample, y, y_hat)
+
+            acc_hat = cls_eval_from_counters(tn, fn, fp, tp)
+
+            error = abs(acc_true - acc_hat)
+            errors.append(error)
+
+            print(f'classifier accuracy={acc_true:.3f}')
+            print(f'estimated accuracy={acc_hat:.3f}')
+            print(f'estimation error={error:.4f}')
+
+print('process end')
+print('='*80)
+print(f'mean error = {np.mean(errors)}')
+print(f'std error = {np.std(errors)}')
+
+
+
+
+
+
--- a/conf.yaml
+++ b/conf.yaml
@ -5,47 +5,80 @@ debug_conf: &debug_conf
    OUT_DIR_NAME: output/debug
    DATASET_N_PREVS: 9
    COMP_ESTIMATORS:
-      - bin_sld_lr
-      - mul_sld_lr
-      - m3w_sld_lr
-      - d_bin_sld_lr
-      - d_mul_sld_lr
-      - d_m3w_sld_lr
-      - d_bin_sld_rbf
-      - d_mul_sld_rbf
-      - d_m3w_sld_rbf
-      - bin_kde_lr
-      - mul_kde_lr
-      - m3w_kde_lr
-      - d_bin_kde_lr
-      - d_mul_kde_lr
-      - d_m3w_kde_lr
-      - d_bin_kde_rbf
-      - d_mul_kde_rbf
-      - d_m3w_kde_rbf
+      # - bin_sld_lr
+      # - mul_sld_lr
+      # - m3w_sld_lr
+      # - d_bin_sld_lr
+      # - d_mul_sld_lr
+      # - d_m3w_sld_lr
+      # - d_bin_sld_rbf
+      # - d_mul_sld_rbf
+      # - d_m3w_sld_rbf
+      # - bin_kde_lr
+      # - mul_kde_lr
+      # - m3w_kde_lr
+      # - d_bin_kde_lr
+      # - d_mul_kde_lr
+      # - d_m3w_kde_lr
+      # - d_bin_kde_rbf
+      # - d_mul_kde_rbf
+      # - d_m3w_kde_rbf
      # - mandoline
-      # - rca
+      - bin_sld_lr_is
+      - mul_sld_lr_is
+      - m3w_sld_lr_is
+      - rca
+      - rca_star
      - doc
      - atc_mc
    N_JOBS: -2

  confs:
    - DATASET_NAME: imdb
+    - DATASET_NAME: rcv1
+      DATASET_TARGET: CCAT
  other_confs:
    - DATASET_NAME: twitter_gasp
    - DATASET_NAME: rcv1
      DATASET_TARGET: CCAT
+
+test_conf: &test_conf
+  global:
+    METRICS: 
+      - acc
+      - f1
+    OUT_DIR_NAME: output/test
+    DATASET_N_PREVS: 9
+    COMP_ESTIMATORS:
+      - cross
+      - cross2
+      - bin_sld_lr
+      - mul_sld_lr
+      - m3w_sld_lr
+      - bin_sld_lr_is
+      - mul_sld_lr_is
+      - m3w_sld_lr_is
+      - doc 
+      - atc_mc
+    N_JOBS: -2
+
+  confs:
+    - DATASET_NAME: imdb
+    - DATASET_NAME: rcv1
+      DATASET_TARGET: CCAT
+  other_confs:
+    - DATASET_NAME: twitter_gasp
      
 main:
  confs: &main_confs
    - DATASET_NAME: rcv1
-      DATASET_TARGET: MCAT
+      DATASET_TARGET: CCAT
  other_confs:
    - DATASET_NAME: imdb
-    - DATASET_NAME: rcv1
-      DATASET_TARGET: CCAT
    - DATASET_NAME: rcv1
      DATASET_TARGET: GCAT
+    - DATASET_NAME: rcv1
+      DATASET_TARGET: MCAT

 sld_lr_conf: &sld_lr_conf

@ -72,6 +105,9 @@ sld_lr_conf: &sld_lr_conf
      - bin_sld_lr_is
      - mul_sld_lr_is
      - m3w_sld_lr_is
+      - bin_sld_lr_a
+      - mul_sld_lr_a
+      - m3w_sld_lr_a
      - bin_sld_lr_gs
      - mul_sld_lr_gs
      - m3w_sld_lr_gs
@ -116,6 +152,9 @@ d_sld_lr_conf: &d_sld_lr_conf
      - d_bin_sld_lr_is
      - d_mul_sld_lr_is
      - d_m3w_sld_lr_is
+      - d_bin_sld_lr_a
+      - d_mul_sld_lr_a
+      - d_m3w_sld_lr_a
      - d_bin_sld_lr_gs
      - d_mul_sld_lr_gs
      - d_m3w_sld_lr_gs
@ -160,6 +199,9 @@ d_sld_rbf_conf: &d_sld_rbf_conf
      - d_bin_sld_rbf_is
      - d_mul_sld_rbf_is
      - d_m3w_sld_rbf_is
+      - d_bin_sld_rbf_a
+      - d_mul_sld_rbf_a
+      - d_m3w_sld_rbf_a
      - d_bin_sld_rbf_gs
      - d_mul_sld_rbf_gs
      - d_m3w_sld_rbf_gs
@ -202,6 +244,9 @@ kde_lr_conf: &kde_lr_conf
      - bin_kde_lr_is
      - mul_kde_lr_is
      - m3w_kde_lr_is
+      - bin_kde_lr_a
+      - mul_kde_lr_a
+      - m3w_kde_lr_a
      - bin_kde_lr_gs
      - mul_kde_lr_gs
      - m3w_kde_lr_gs
@ -238,6 +283,9 @@ d_kde_lr_conf: &d_kde_lr_conf
      - d_bin_kde_lr_is
      - d_mul_kde_lr_is
      - d_m3w_kde_lr_is
+      - d_bin_kde_lr_a
+      - d_mul_kde_lr_a
+      - d_m3w_kde_lr_a
      - d_bin_kde_lr_gs
      - d_mul_kde_lr_gs
      - d_m3w_kde_lr_gs
@ -274,6 +322,9 @@ d_kde_rbf_conf: &d_kde_rbf_conf
      - d_bin_kde_rbf_is
      - d_mul_kde_rbf_is
      - d_m3w_kde_rbf_is
+      - d_bin_kde_rbf_a
+      - d_mul_kde_rbf_a
+      - d_m3w_kde_rbf_a
      - d_bin_kde_rbf_gs
      - d_mul_kde_rbf_gs
      - d_m3w_kde_rbf_gs
@ -287,5 +338,72 @@ d_kde_rbf_conf: &d_kde_rbf_conf
    - DATASET_NAME: rcv1
      DATASET_TARGET: CCAT

+baselines_conf: &baselines_conf
+  global:
+    METRICS: 
+      - acc
+      - f1
+    OUT_DIR_NAME: output/baselines
+    DATASET_N_PREVS: 9
+    COMP_ESTIMATORS:
+      - doc
+      - atc_mc
+      - mandoline
+      - rca
+      - rca_star
+    N_JOBS: -2

-exec: *d_sld_rbf_conf
+  confs: *main_confs
+  other_confs:
+    - DATASET_NAME: imdb
+    - DATASET_NAME: rcv1
+      DATASET_TARGET: CCAT
+
+kde_lr_gs_conf: &kde_lr_gs_conf
+  global:
+    METRICS: 
+      - acc
+      - f1
+    OUT_DIR_NAME: output/kde_lr_gs
+    DATASET_N_PREVS: 9
+    COMP_ESTIMATORS:
+      - bin_kde_lr_gs
+      - mul_kde_lr_gs
+      - m3w_kde_lr_gs
+    N_JOBS: -2
+
+  confs: *main_confs
+
+timing_conf: &timing_conf
+  global:
+    METRICS:
+      - acc
+      - f1
+    OUT_DIR_NAME: output/timing
+    DATASET_N_PREVS: 1
+    COMP_ESTIMATORS:
+      - bin_sld_lr_a
+      - mul_sld_lr_a
+      - m3w_sld_lr_a
+      - bin_kde_lr_a
+      - mul_kde_lr_a
+      - m3w_kde_lr_a
+      - bin_sld_lr_gs
+      - mul_sld_lr_gs
+      - m3w_sld_lr_gs
+      - bin_kde_lr_gs
+      - mul_kde_lr_gs
+      - m3w_kde_lr_gs
+      - doc 
+      - atc_mc
+      - rca
+      - rca_star
+      - mandoline
+    N_JOBS: 1
+    PROTOCOL_N_PREVS: 1,
+    PROTOCOL_REPEATS: 1,
+    SAMPLE_SIZE: 1000,
+
+  confs: *main_confs
+
+exec: *kde_lr_gs_conf
--- a/copy_source.sh
+++ b/copy_source.sh
@ -0,0 +1,11 @@
+#!/bin/bash
+
+CMD="cp"
+DEST="~/tesi_docker/"
+
+bash -c "${CMD} -r quacc ${DEST}"
+bash -c "${CMD} -r baselines ${DEST}"
+bash -c "${CMD} run.py ${DEST}"
+bash -c "${CMD} remote.py ${DEST}"
+bash -c "${CMD} conf.yaml ${DEST}"
+bash -c "${CMD} requirements.txt ${DEST}"
--- a/8
+++ b/8
@ -0,0 +1,8 @@
+#!/bin/bash
+
+if [[ "${1}" == "r" ]]; then
+	scp volpi@ilona.isti.cnr.it:~/tesi/quacc.log ~/tesi/remote.log &>/dev/null
+	ssh volpi@ilona.isti.cnr.it tail -n 500 -f /home/volpi/tesi/quacc.log | bat -P --language=log
+else
+	tail -n 500 -f /home/lorev/tesi/quacc.log | bat --paging=never --language log
+fi
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -13,6 +13,7 @@ jinja2 = "^3.1.2"
 pyyaml = "^6.0.1"
 logging = "^0.4.9.6"
 abstention = "^0.1.3.1"
+pytest = "^8.0.0"

 [tool.poetry.scripts]
 main = "quacc.main:main"
@ -34,21 +35,20 @@ dash = "gunicorn qcdash.app:server -b ilona.isti.cnr.it:33421"
 shell = """
    scp {$HOST}:~/tesi/quacc.log ~/tesi/remote.log &> /dev/null
    ssh {$HOST} tail -n 0 -f /home/volpi/tesi/quacc.log >> ~/tesi/remote.log
-
 """
-
 [tool.poe.tasks.logrf]
 shell = """
-    scp {$HOST}:~/tesi/quacc.log ~/tesi/remote.log &> /dev/null
-    ssh {$HOST} tail -n 500 -f /home/volpi/tesi/quacc.log | bat --paging=never --language log
-
+    ssh {$HOST} tail -n 500 -f /home/volpi/tesi/quacc.log | bat -P --language=log
+"""
+[tool.poe.tasks.logf]
+shell = """
+    tail -n 500 -f /home/lorev/tesi/quacc.log | bat --paging=never --language log
 """

 interpreter = "fish"
 env = { HOST = "volpi@ilona.isti.cnr.it" }

 [tool.poetry.group.dev.dependencies]
-pytest = "^7.4.0"
 pylance = "^0.5.9"
 pytest-mock = "^3.11.1"
 pytest-cov = "^4.1.0"
--- a/qcdash/app.py
+++ b/qcdash/app.py
@ -85,6 +85,8 @@ def get_table(dr: DatasetReport, metric, estimators, view, mode):
        case ("avg", "train_table"):
            # return dr.data(metric=metric, estimators=estimators).groupby(level=1).mean()
            return dr.train_table(metric=metric, estimators=estimators)
+        case ("avg", "train_std_table"):
+            return dr.train_std_table(metric=metric, estimators=estimators)
        case ("avg", "test_table"):
            # return dr.data(metric=metric, estimators=estimators).groupby(level=0).mean()
            return dr.test_table(metric=metric, estimators=estimators)
@ -121,24 +123,44 @@ def get_DataTable(df, mode):

    _index_name = dict(
        train_table="test prev.",
+        train_std_table="train prev.",
        test_table="train prev.",
        shift_table="shift",
        stats_table="method",
    )
    df = df.reset_index()
+
+    if mode == "train_std_table":
+        columns_format = Format()
+        df_columns = np.concatenate([["index"], df.columns.unique(1)[1:]])
+        data = [
+            dict(
+                index="(" + ", ".join([f"{v:.2f}" for v in idx]) + ")"
+                if isinstance(idx, tuple | list | np.ndarray)
+                else str(idx)
+            )
+            | {
+                k: f"{df.loc[i,('avg',k)]:.4f}~{df.loc[i,('std',k)]:.3f}"
+                for k in df.columns.unique(1)[1:]
+            }
+            for i, idx in zip(df.index, df.loc[:, ("index", "")])
+        ]
+    else:
+        columns_format = Format(precision=6, scheme=Scheme.exponent, nully="nan")
+        df_columns = df.columns
+        data = df.to_dict("records")
+
    columns = {
        c: dict(
            id=c,
            name=_index_name[mode] if c == "index" else c,
            type="numeric",
-            format=Format(precision=6, scheme=Scheme.exponent, nully="nan"),
+            format=columns_format,
        )
-        for c in df.columns
+        for c in df_columns
    }
-    # columns["index"]["format"] = Format(precision=2, scheme=Scheme.fixed)
    columns["index"]["format"] = Format()
    columns = list(columns.values())
-    data = df.to_dict("records")
    for d in data:
        if isinstance(d["index"], tuple | list | np.ndarray):
            d["index"] = "(" + ", ".join([f"{v:.2f}" for v in d["index"]]) + ")"
--- a/quacc.log
+++ b/quacc.log
--- a/quacc/dataset.py
+++ b/quacc/dataset.py
@ -320,25 +320,59 @@ def rcv1_info():
    n_train = 23149

    targets = []
-    for target in range(103):
-        train_t_prev = np.average(dataset.target[:n_train, target].toarray().flatten())
-        test_t_prev = np.average(dataset.target[n_train:, target].toarray().flatten())
+    for target in ["CCAT", "MCAT", "GCAT"]:
+        target_index = np.where(dataset.target_names == target)[0]
+        train_t_prev = np.average(
+            dataset.target[:n_train, target_index].toarray().flatten()
+        )
+        test_t_prev = np.average(
+            dataset.target[n_train:, target_index].toarray().flatten()
+        )
+        d = Dataset(name="rcv1", target=target)()[0]
        targets.append(
            (
-                dataset.target_names[target],
+                target,
                {
                    "train": (1.0 - train_t_prev, train_t_prev),
                    "test": (1.0 - test_t_prev, test_t_prev),
+                    "train_size": len(d.train),
+                    "val_size": len(d.validation),
+                    "test_size": len(d.test),
                },
            )
        )

-    targets.sort(key=lambda t: t[1]["train"][1])
    for n, d in targets:
        print(f"{n}:")
-        for k, (fp, tp) in d.items():
-            print(f"\t{k}: {fp:.4f}, {tp:.4f}")
+        for k, v in d.items():
+            if isinstance(v, tuple):
+                print(f"\t{k}: {v[0]:.4f}, {v[1]:.4f}")
+            else:
+                print(f"\t{k}: {v}")
+
+
+def imdb_info():
+    train, test = qp.datasets.fetch_reviews("imdb", tfidf=True, min_df=3).train_test
+
+    train_t_prev = train.prevalence()
+    test_t_prev = test.prevalence()
+    dst = Dataset(name="imdb")()[0]
+    d = {
+        "train": (train_t_prev[0], train_t_prev[1]),
+        "test": (test_t_prev[0], test_t_prev[1]),
+        "train_size": len(dst.train),
+        "val_size": len(dst.validation),
+        "test_size": len(dst.test),
+    }
+
+    print("imdb:")
+    for k, v in d.items():
+        if isinstance(v, tuple):
+            print(f"\t{k}: {v[0]:.4f}, {v[1]:.4f}")
+        else:
+            print(f"\t{k}: {v}")


 if __name__ == "__main__":
-    fetch_cifar100()
+    rcv1_info()
+    imdb_info()
--- a/quacc/evaluation/alt.py
+++ b/quacc/evaluation/alt.py
@ -0,0 +1,115 @@
+from functools import wraps
+
+import numpy as np
+import quapy.functional as F
+import sklearn.metrics as metrics
+from quapy.method.aggregative import ACC, EMQ
+from sklearn import clone
+from sklearn.linear_model import LogisticRegression
+
+import quacc as qc
+from quacc.evaluation.report import EvaluationReport
+
+_alts = {}
+
+
+def alt(func):
+    @wraps(func)
+    def wrapper(c_model, validation, protocol):
+        return func(c_model, validation, protocol)
+
+    wrapper.name = func.__name__
+    _alts[func.__name__] = wrapper
+
+    return wrapper
+
+
+@alt
+def cross(c_model, validation, protocol):
+    y_val = validation.labels
+    y_hat_val = c_model.predict(validation.instances)
+
+    qcls = clone(c_model)
+    qcls.fit(*validation.Xy)
+
+    er = EvaluationReport(name="cross")
+    for sample in protocol():
+        y_hat = c_model.predict(sample.instances)
+        y = sample.labels
+        ground_acc = (y_hat == y).mean()
+        ground_f1 = metrics.f1_score(y, y_hat, zero_division=0)
+
+        q = EMQ(qcls)
+        q.fit(validation, fit_classifier=False)
+
+        M_hat = ACC.getPteCondEstim(validation.classes_, y_val, y_hat_val)
+        p_hat = q.quantify(sample.instances)
+        cont_table_hat = p_hat * M_hat
+
+        acc_score = qc.error.acc(cont_table_hat)
+        f1_score = qc.error.f1(cont_table_hat)
+
+        meta_acc = abs(acc_score - ground_acc)
+        meta_f1 = abs(f1_score - ground_f1)
+        er.append_row(
+            sample.prevalence(),
+            acc=meta_acc,
+            f1=meta_f1,
+            acc_score=acc_score,
+            f1_score=f1_score,
+        )
+
+    return er
+
+
+@alt
+def cross2(c_model, validation, protocol):
+    classes = validation.classes_
+    y_val = validation.labels
+    y_hat_val = c_model.predict(validation.instances)
+    M_hat = ACC.getPteCondEstim(classes, y_val, y_hat_val)
+    pos_prev_val = validation.prevalence()[1]
+
+    er = EvaluationReport(name="cross2")
+    for sample in protocol():
+        y_test = sample.labels
+        y_hat_test = c_model.predict(sample.instances)
+        ground_acc = (y_hat_test == y_test).mean()
+        ground_f1 = metrics.f1_score(y_test, y_hat_test, zero_division=0)
+        pos_prev_cc = F.prevalence_from_labels(y_hat_test, classes)[1]
+        tpr_hat = M_hat[1, 1]
+        fpr_hat = M_hat[1, 0]
+        tnr_hat = M_hat[0, 0]
+        pos_prev_test_hat = (pos_prev_cc - fpr_hat) / (tpr_hat - fpr_hat)
+        pos_prev_test_hat = np.clip(pos_prev_test_hat, 0, 1)
+
+        if pos_prev_val > 0.5:
+            # in this case, the tpr might be a more reliable estimate than tnr
+            A = np.asarray(
+                [[0, 0, 1, 1], [0, 1, 0, 1], [1, 1, 1, 1], [0, tpr_hat, 0, tpr_hat - 1]]
+            )
+        else:
+            # in this case, the tnr might be a more reliable estimate than tpr
+            A = np.asarray(
+                [[0, 0, 1, 1], [0, 1, 0, 1], [1, 1, 1, 1], [tnr_hat - 1, 0, tnr_hat, 0]]
+            )
+
+        b = np.asarray([pos_prev_cc, pos_prev_test_hat, 1, 0])
+
+        tn, fn, fp, tp = np.linalg.solve(A, b)
+        cont_table_hat = np.array([[tn, fp], [fn, tp]])
+
+        acc_score = qc.error.acc(cont_table_hat)
+        f1_score = qc.error.f1(cont_table_hat)
+
+        meta_acc = abs(acc_score - ground_acc)
+        meta_f1 = abs(f1_score - ground_f1)
+        er.append_row(
+            sample.prevalence(),
+            acc=meta_acc,
+            f1=meta_f1,
+            acc_score=acc_score,
+            f1_score=f1_score,
+        )
+
+    return er
--- a/quacc/evaluation/baseline.py
+++ b/quacc/evaluation/baseline.py
@ -288,21 +288,76 @@ def rca(
 ):
    """elsahar19"""
    c_model_predict = getattr(c_model, predict_method)
-    val_pred1 = c_model_predict(validation.X)
+    f1_average = "binary" if validation.n_classes == 2 else "macro"
+    val1, val2 = validation.split_stratified(train_prop=0.5, random_state=env._R_SEED)
+    val1_pred1 = c_model_predict(val1.X)
+
+    val2_protocol = APP(
+        val2,
+        n_prevalences=21,
+        repeats=100,
+        return_type="labelled_collection",
+    )
+    val2_prot_preds = []
+    val2_rca = []
+    val2_prot_preds = []
+    val2_prot_y = []
+    for v2 in val2_protocol():
+        _preds = c_model_predict(v2.X)
+        try:
+            c_model2 = clone_fit(c_model, v2.X, _preds)
+            c_model2_predict = getattr(c_model2, predict_method)
+            val1_pred2 = c_model2_predict(val1.X)
+            rca_score = 1.0 - rcalib.get_score(val1_pred1, val1_pred2, val1.y)
+            val2_rca.append(rca_score)
+            val2_prot_preds.append(_preds)
+            val2_prot_y.append(v2.y)
+        except ValueError:
+            pass
+
+    val_targets_acc = np.array(
+        [
+            metrics.accuracy_score(v2_y, v2_preds)
+            for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds)
+        ]
+    )
+    reg_acc = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_acc)
+    val_targets_f1 = np.array(
+        [
+            metrics.f1_score(v2_y, v2_preds, average=f1_average)
+            for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds)
+        ]
+    )
+    reg_f1 = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_f1)

    report = EvaluationReport(name="rca")
    for test in protocol():
        try:
-            test_pred = c_model_predict(test.X)
-            c_model2 = clone_fit(c_model, test.X, test_pred)
+            test_preds = c_model_predict(test.X)
+            c_model2 = clone_fit(c_model, test.X, test_preds)
            c_model2_predict = getattr(c_model2, predict_method)
-            val_pred2 = c_model2_predict(validation.X)
-            rca_score = 1.0 - rcalib.get_score(val_pred1, val_pred2, validation.y)
-            meta_score = abs(rca_score - metrics.accuracy_score(test.y, test_pred))
-            report.append_row(test.prevalence(), acc=meta_score, acc_score=rca_score)
+            val1_pred2 = c_model2_predict(val1.X)
+            rca_score = 1.0 - rcalib.get_score(val1_pred1, val1_pred2, val1.y)
+            acc_score = reg_acc.predict(np.array([[rca_score]]))[0]
+            f1_score = reg_f1.predict(np.array([[rca_score]]))[0]
+            meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_preds))
+            meta_f1 = abs(
+                f1_score - metrics.f1_score(test.y, test_preds, average=f1_average)
+            )
+            report.append_row(
+                test.prevalence(),
+                acc=meta_acc,
+                acc_score=acc_score,
+                f1=meta_f1,
+                f1_score=f1_score,
+            )
        except ValueError:
            report.append_row(
-                test.prevalence(), acc=float("nan"), acc_score=float("nan")
+                test.prevalence(),
+                acc=np.nan,
+                acc_score=np.nan,
+                f1=np.nan,
+                f1_score=np.nan,
            )

    return report
@ -317,13 +372,56 @@ def rca_star(
 ):
    """elsahar19"""
    c_model_predict = getattr(c_model, predict_method)
-    validation1, validation2 = validation.split_stratified(
+    f1_average = "binary" if validation.n_classes == 2 else "macro"
+    validation1, val2 = validation.split_stratified(
        train_prop=0.5, random_state=env._R_SEED
    )
-    val1_pred = c_model_predict(validation1.X)
-    c_model1 = clone_fit(c_model, validation1.X, val1_pred)
+    val11, val12 = validation1.split_stratified(
+        train_prop=0.5, random_state=env._R_SEED
+    )
+
+    val11_pred = c_model_predict(val11.X)
+    c_model1 = clone_fit(c_model, val11.X, val11_pred)
    c_model1_predict = getattr(c_model1, predict_method)
-    val2_pred1 = c_model1_predict(validation2.X)
+    val12_pred1 = c_model1_predict(val12.X)
+
+    val2_protocol = APP(
+        val2,
+        n_prevalences=21,
+        repeats=100,
+        return_type="labelled_collection",
+    )
+    val2_prot_preds = []
+    val2_rca = []
+    val2_prot_preds = []
+    val2_prot_y = []
+    for v2 in val2_protocol():
+        _preds = c_model_predict(v2.X)
+        try:
+            c_model2 = clone_fit(c_model, v2.X, _preds)
+            c_model2_predict = getattr(c_model2, predict_method)
+            val12_pred2 = c_model2_predict(val12.X)
+            rca_score = 1.0 - rcalib.get_score(val12_pred1, val12_pred2, val12.y)
+            val2_rca.append(rca_score)
+            val2_prot_preds.append(_preds)
+            val2_prot_y.append(v2.y)
+        except ValueError:
+            pass
+
+    val_targets_acc = np.array(
+        [
+            metrics.accuracy_score(v2_y, v2_preds)
+            for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds)
+        ]
+    )
+    reg_acc = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_acc)
+    val_targets_f1 = np.array(
+        [
+            metrics.f1_score(v2_y, v2_preds, average=f1_average)
+            for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds)
+        ]
+    )
+    reg_f1 = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_f1)

    report = EvaluationReport(name="rca_star")
    for test in protocol():
@ -331,17 +429,28 @@ def rca_star(
            test_pred = c_model_predict(test.X)
            c_model2 = clone_fit(c_model, test.X, test_pred)
            c_model2_predict = getattr(c_model2, predict_method)
-            val2_pred2 = c_model2_predict(validation2.X)
-            rca_star_score = 1.0 - rcalib.get_score(
-                val2_pred1, val2_pred2, validation2.y
+            val12_pred2 = c_model2_predict(val12.X)
+            rca_star_score = 1.0 - rcalib.get_score(val12_pred1, val12_pred2, val12.y)
+            acc_score = reg_acc.predict(np.array([[rca_star_score]]))[0]
+            f1_score = reg_f1.predict(np.array([[rca_score]]))[0]
+            meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_pred))
+            meta_f1 = abs(
+                f1_score - metrics.f1_score(test.y, test_pred, average=f1_average)
            )
-            meta_score = abs(rca_star_score - metrics.accuracy_score(test.y, test_pred))
            report.append_row(
-                test.prevalence(), acc=meta_score, acc_score=rca_star_score
+                test.prevalence(),
+                acc=meta_acc,
+                acc_score=acc_score,
+                f1=meta_f1,
+                f1_score=f1_score,
            )
        except ValueError:
            report.append_row(
-                test.prevalence(), acc=float("nan"), acc_score=float("nan")
+                test.prevalence(),
+                acc=np.nan,
+                acc_score=np.nan,
+                f1=np.nan,
+                f1_score=np.nan,
            )

    return report
@ -447,3 +556,4 @@ def kdex2(
        report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)

    return report
+
--- a/quacc/evaluation/comp.py
+++ b/quacc/evaluation/comp.py
@ -57,6 +57,8 @@ def estimate_worker(_estimate, train, validation, test, q=None):
 def split_tasks(estimators, train, validation, test, q):
    _par, _seq = [], []
    for estim in estimators:
+        if hasattr(estim, "nocall"):
+            continue
        _task = [estim, train, validation, test]
        match estim.name:
            case n if n.endswith("_gs"):
--- a/quacc/evaluation/estimators.py
+++ b/quacc/evaluation/estimators.py
@ -2,7 +2,7 @@ from typing import List

 import numpy as np

-from quacc.evaluation import baseline, method
+from quacc.evaluation import baseline, method, alt


 class CompEstimatorFunc_:
@ -40,7 +40,7 @@ class CompEstimatorName_:

 class CompEstimator:
    def __get(cls, e: str | List[str], get_ref=True):
-        _dict = method._methods | baseline._baselines
+        _dict = alt._alts | method._methods | baseline._baselines

        match e:
            case "__all":
--- a/quacc/evaluation/method.py
+++ b/quacc/evaluation/method.py
@ -26,7 +26,12 @@ def _param_grid(method, X_fit: np.ndarray):
                "q__classifier__C": np.logspace(-3, 3, 7),
                "q__classifier__class_weight": [None, "balanced"],
                "q__recalib": [None, "bcts"],
-                "confidence": [None, ["isoft"], ["max_conf", "entropy"]],
+                "confidence": [
+                    None,
+                    ["isoft"],
+                    ["max_conf", "entropy"],
+                    ["max_conf", "entropy", "isoft"],
+                ],
            }
        case "sld_rbf":
            _scale = 1.0 / (X_fit.shape[1] * X_fit.var())
@ -35,7 +40,12 @@ def _param_grid(method, X_fit: np.ndarray):
                "q__classifier__class_weight": [None, "balanced"],
                "q__classifier__gamma": _scale * np.logspace(-2, 2, 5),
                "q__recalib": [None, "bcts"],
-                "confidence": [None, ["isoft"], ["max_conf", "entropy"]],
+                "confidence": [
+                    None,
+                    ["isoft"],
+                    ["max_conf", "entropy"],
+                    ["max_conf", "entropy", "isoft"],
+                ],
            }
        case "pacc":
            return {
@ -48,7 +58,7 @@ def _param_grid(method, X_fit: np.ndarray):
                "q__classifier__C": np.logspace(-3, 3, 7),
                "q__classifier__class_weight": [None, "balanced"],
                "q__bandwidth": np.linspace(0.01, 0.2, 20),
-                "confidence": [None, ["isoft"]],
+                "confidence": [None, ["isoft"], ["max_conf", "entropy", "isoft"]],
            }
        case "kde_rbf":
            _scale = 1.0 / (X_fit.shape[1] * X_fit.var())
@ -57,7 +67,7 @@ def _param_grid(method, X_fit: np.ndarray):
                "q__classifier__class_weight": [None, "balanced"],
                "q__classifier__gamma": _scale * np.logspace(-2, 2, 5),
                "q__bandwidth": np.linspace(0.01, 0.2, 20),
-                "confidence": [None, ["isoft"]],
+                "confidence": [None, ["isoft"], ["max_conf", "entropy", "isoft"]],
            }


@ -96,6 +106,15 @@ def evaluation_report(
    return report


+@dataclass(frozen=True)
+class EmptyMethod:
+    name: str
+    nocall: bool = True
+
+    def __call__(self, c_model, validation, protocol) -> EvaluationReport:
+        pass
+
+
@dataclass(frozen=True)
 class EvaluationMethod:
    name: str
@ -162,13 +181,16 @@ class EvaluationMethodGridSearch(EvaluationMethod):
            verbose=False,
            **_search_params,
        ).fit(v_train)
-        return evaluation_report(
+        er = evaluation_report(
            estimator=est,
            protocol=protocol,
            method_name=self.name,
        )
+        er.fit_score = est.best_score()
+        return er


+E = EmptyMethod
 M = EvaluationMethod
 G = EvaluationMethodGridSearch

@ -229,12 +251,19 @@ __sld_lr_set = [
    M("mul_sld_lr_is",   __sld_lr(),  "mul", conf="isoft",                        ),
    M("m3w_sld_lr_is",   __sld_lr(),  "mul", conf="isoft",                 cf=True),
    M("mgf_sld_lr_is",   __sld_lr(),  "mul", conf="isoft",                 gf=True),
+    # sld all
+    M("bin_sld_lr_a",   __sld_lr(),  "bin", conf=["max_conf", "entropy", "isoft"],         ),
+    M("bgf_sld_lr_a",   __sld_lr(),  "bin", conf=["max_conf", "entropy", "isoft"],  gf=True),
+    M("mul_sld_lr_a",   __sld_lr(),  "mul", conf=["max_conf", "entropy", "isoft"],         ),
+    M("m3w_sld_lr_a",   __sld_lr(),  "mul", conf=["max_conf", "entropy", "isoft"],  cf=True),
+    M("mgf_sld_lr_a",   __sld_lr(),  "mul", conf=["max_conf", "entropy", "isoft"],  gf=True),
    # gs sld
    G("bin_sld_lr_gs",   __sld_lr(),  "bin", pg="sld_lr"                          ),
    G("bgf_sld_lr_gs",   __sld_lr(),  "bin", pg="sld_lr",                  gf=True),
    G("mul_sld_lr_gs",   __sld_lr(),  "mul", pg="sld_lr"                          ),
    G("m3w_sld_lr_gs",   __sld_lr(),  "mul", pg="sld_lr",                  cf=True),
    G("mgf_sld_lr_gs",   __sld_lr(),  "mul", pg="sld_lr",                  gf=True),
+    E("sld_lr_gs"),
 ]

 __dense_sld_lr_set = [
@ -267,12 +296,18 @@ __dense_sld_lr_set = [
    M("d_mul_sld_lr_is",   __sld_lr(),  "mul", d=True, conf="isoft",                        ),
    M("d_m3w_sld_lr_is",   __sld_lr(),  "mul", d=True, conf="isoft",                 cf=True),
    M("d_mgf_sld_lr_is",   __sld_lr(),  "mul", d=True, conf="isoft",                 gf=True),
+    # sld all
+    M("d_bin_sld_lr_a",    __sld_lr(),  "bin", d=True, conf=["max_conf", "entropy", "isoft"],         ),
+    M("d_bgf_sld_lr_a",    __sld_lr(),  "bin", d=True, conf=["max_conf", "entropy", "isoft"],  gf=True),
+    M("d_mul_sld_lr_a",    __sld_lr(),  "mul", d=True, conf=["max_conf", "entropy", "isoft"],         ),
+    M("d_m3w_sld_lr_a",    __sld_lr(),  "mul", d=True, conf=["max_conf", "entropy", "isoft"],  cf=True),
+    M("d_mgf_sld_lr_a",    __sld_lr(),  "mul", d=True, conf=["max_conf", "entropy", "isoft"],  gf=True),
    # gs sld
-    G("d_bin_sld_lr_gs",   __sld_lr(),  "bin", d=True, pg="sld_lr"                             ),
-    G("d_bgf_sld_lr_gs",   __sld_lr(),  "bin", d=True, pg="sld_lr",                     gf=True),
-    G("d_mul_sld_lr_gs",   __sld_lr(),  "mul", d=True, pg="sld_lr"                             ),
-    G("d_m3w_sld_lr_gs",   __sld_lr(),  "mul", d=True, pg="sld_lr",                     cf=True),
-    G("d_mgf_sld_lr_gs",   __sld_lr(),  "mul", d=True, pg="sld_lr",                     gf=True),
+    G("d_bin_sld_lr_gs",   __sld_lr(),  "bin", d=True, pg="sld_lr"                          ),
+    G("d_bgf_sld_lr_gs",   __sld_lr(),  "bin", d=True, pg="sld_lr",                  gf=True),
+    G("d_mul_sld_lr_gs",   __sld_lr(),  "mul", d=True, pg="sld_lr"                          ),
+    G("d_m3w_sld_lr_gs",   __sld_lr(),  "mul", d=True, pg="sld_lr",                  cf=True),
+    G("d_mgf_sld_lr_gs",   __sld_lr(),  "mul", d=True, pg="sld_lr",                  gf=True),
 ]

 __dense_sld_rbf_set = [
@ -305,6 +340,12 @@ __dense_sld_rbf_set = [
    M("d_mul_sld_rbf_is", __sld_rbf(), "mul", d=True, conf="isoft",                          ),
    M("d_m3w_sld_rbf_is", __sld_rbf(), "mul", d=True, conf="isoft",                   cf=True),
    M("d_mgf_sld_rbf_is", __sld_rbf(), "mul", d=True, conf="isoft",                   gf=True),
+    # sld all
+    M("d_bin_sld_rbf_a",  __sld_rbf(), "bin", d=True, conf=["max_conf", "entropy", "isoft"],         ),
+    M("d_bgf_sld_rbf_a",  __sld_rbf(), "bin", d=True, conf=["max_conf", "entropy", "isoft"],  gf=True),
+    M("d_mul_sld_rbf_a",  __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"],         ),
+    M("d_m3w_sld_rbf_a",  __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"],  cf=True),
+    M("d_mgf_sld_rbf_a",  __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"],  gf=True),
    # gs sld
    G("d_bin_sld_rbf_gs", __sld_rbf(), "bin", d=True, pg="sld_rbf", search="spider",        ),
    G("d_bgf_sld_rbf_gs", __sld_rbf(), "bin", d=True, pg="sld_rbf", search="spider", gf=True),
@ -334,10 +375,15 @@ __kde_lr_set = [
    M("bin_kde_lr_is", __kde_lr(), "bin", conf="isoft",                        ),
    M("mul_kde_lr_is", __kde_lr(), "mul", conf="isoft",                        ),
    M("m3w_kde_lr_is", __kde_lr(), "mul", conf="isoft",                 cf=True),
+    # kde all
+    M("bin_kde_lr_a",  __kde_lr(), "bin", conf=["max_conf", "entropy", "isoft"],         ),
+    M("mul_kde_lr_a",  __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"],         ),
+    M("m3w_kde_lr_a",  __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"],  cf=True),
    # gs kde
-    G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="spider"         ),
-    G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider"         ),
-    G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider", cf=True),
+    G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="grid"         ),
+    G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid"         ),
+    G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid", cf=True),
+    E("kde_lr_gs"),
 ]

 __dense_kde_lr_set = [
@ -361,6 +407,10 @@ __dense_kde_lr_set = [
    M("d_bin_kde_lr_is", __kde_lr(), "bin", d=True, conf="isoft",                        ),
    M("d_mul_kde_lr_is", __kde_lr(), "mul", d=True, conf="isoft",                        ),
    M("d_m3w_kde_lr_is", __kde_lr(), "mul", d=True, conf="isoft",                 cf=True),
+    # kde all
+    M("d_bin_kde_lr_a",  __kde_lr(), "bin", d=True, conf=["max_conf", "entropy", "isoft"],         ),
+    M("d_mul_kde_lr_a",  __kde_lr(), "mul", d=True, conf=["max_conf", "entropy", "isoft"],         ),
+    M("d_m3w_kde_lr_a",  __kde_lr(), "mul", d=True, conf=["max_conf", "entropy", "isoft"],  cf=True),
    # gs kde                             
    G("d_bin_kde_lr_gs", __kde_lr(), "bin", d=True, pg="kde_lr", search="spider"            ),
    G("d_mul_kde_lr_gs", __kde_lr(), "mul", d=True, pg="kde_lr", search="spider"            ),
@ -388,6 +438,10 @@ __dense_kde_rbf_set = [
    M("d_bin_kde_rbf_is", __kde_rbf(), "bin", d=True, conf="isoft",                         ),
    M("d_mul_kde_rbf_is", __kde_rbf(), "mul", d=True, conf="isoft",                         ),
    M("d_m3w_kde_rbf_is", __kde_rbf(), "mul", d=True, conf="isoft",                  cf=True),
+    # kde all
+    M("d_bin_kde_rbf_a",  __kde_rbf(), "bin", d=True, conf=["max_conf", "entropy", "isoft"],         ),
+    M("d_mul_kde_rbf_a",  __kde_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"],         ),
+    M("d_m3w_kde_rbf_a",  __kde_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"],  cf=True),
    # gs kde
    G("d_bin_kde_rbf_gs", __kde_rbf(), "bin", d=True, pg="kde_rbf", search="spider"          ),
    G("d_mul_kde_rbf_gs", __kde_rbf(), "mul", d=True, pg="kde_rbf", search="spider"          ),
--- a/quacc/evaluation/report.py
+++ b/quacc/evaluation/report.py
@ -1,7 +1,6 @@
 import json
 import pickle
 from collections import defaultdict
-from itertools import chain
 from pathlib import Path
 from typing import List, Tuple

@ -39,6 +38,7 @@ class EvaluationReport:
        self.data: pd.DataFrame | None = None
        self.name = name if name is not None else "default"
        self.time = 0.0
+        self.fit_score = None

    def append_row(self, basep: np.ndarray | Tuple, **row):
        # bp = basep[1]
@ -89,6 +89,7 @@ class CompReport:
        train_prev: np.ndarray = None,
        valid_prev: np.ndarray = None,
        times=None,
+        fit_scores=None,
        g_time=None,
    ):
        if isinstance(datas, pd.DataFrame):
@ -105,6 +106,13 @@ class CompReport:
                .sort_index(axis=0, level=0, ascending=False, sort_remaining=False)
            )

+        if fit_scores is None:
+            self.fit_scores = {
+                er.name: er.fit_score for er in datas if er.fit_score is not None
+            }
+        else:
+            self.fit_scores = fit_scores
+
        if times is None:
            self.times = {er.name: er.time for er in datas}
        else:
@ -114,6 +122,51 @@ class CompReport:
        self.train_prev = train_prev
        self.valid_prev = valid_prev

+    def postprocess(
+        self,
+        f_data: pd.DataFrame,
+        _data: pd.DataFrame,
+        metric=None,
+        estimators=None,
+    ) -> pd.DataFrame:
+        _mapping = {
+            "sld_lr_gs": [
+                "bin_sld_lr_gs",
+                "mul_sld_lr_gs",
+                "m3w_sld_lr_gs",
+            ],
+            "kde_lr_gs": [
+                "bin_kde_lr_gs",
+                "mul_kde_lr_gs",
+                "m3w_kde_lr_gs",
+            ],
+        }
+
+        for name, methods in _mapping.items():
+            if estimators is not None and name not in estimators:
+                continue
+
+            if len(np.where(np.in1d(methods, self._data.columns.unique(1)))[0]) != len(
+                methods
+            ):
+                continue
+
+            _metric = _get_metric(metric)
+            m_data = _data.loc[:, (_metric, methods)]
+            _fit_scores = [(k, v) for (k, v) in self.fit_scores.items() if k in methods]
+            _best_method = [k for k, v in _fit_scores][
+                np.argmin([v for k, v in _fit_scores])
+            ]
+            _metric = (
+                [_metric]
+                if _metric is isinstance(_metric, str)
+                else m_data.columns.unique(0)
+            )
+            for _m in _metric:
+                f_data.loc[:, (_m, name)] = m_data.loc[:, (_m, _best_method)]
+
+        return f_data
+
    @property
    def prevs(self) -> np.ndarray:
        return self.data().index.unique(0)
@ -149,6 +202,7 @@ class CompReport:
            train_prev=self.train_prev,
            valid_prev=self.valid_prev,
            times=self.times | other.times,
+            fit_scores=self.fit_scores | other.fit_scores,
            g_time=self.times["tot"] + other.times["tot"],
        )

@ -159,7 +213,10 @@ class CompReport:
        _estimators = _get_estimators(
            estimators, self._data.loc[:, (_metric, slice(None))].columns.unique(1)
        )
-        f_data: pd.DataFrame = self._data.copy().loc[:, (_metric, _estimators)]
+        _data: pd.DataFrame = self._data.copy()
+        f_data: pd.DataFrame = _data.loc[:, (_metric, _estimators)]
+
+        f_data = self.postprocess(f_data, _data, metric=metric, estimators=estimators)

        if len(f_data.columns.unique(0)) == 1:
            f_data = f_data.droplevel(level=0, axis=1)
@ -187,7 +244,11 @@ class CompReport:
        _estimators = _get_estimators(
            estimators, shift_data.loc[:, (_metric, slice(None))].columns.unique(1)
        )
+        s_data: pd.DataFrame = shift_data
        shift_data: pd.DataFrame = shift_data.loc[:, (_metric, _estimators)]
+        shift_data = self.postprocess(
+            shift_data, s_data, metric=metric, estimators=estimators
+        )

        if len(shift_data.columns.unique(0)) == 1:
            shift_data = shift_data.droplevel(level=0, axis=1)
@ -354,17 +415,27 @@ class CompReport:
        return res


+def _cr_train_prev(cr: CompReport):
+    return tuple(np.around(cr.train_prev, decimals=2))
+
+
+def _cr_data(cr: CompReport, metric=None, estimators=None):
+    return cr.data(metric, estimators)
+
+
 class DatasetReport:
    _default_dr_modes = [
        "delta_train",
        "stdev_train",
        "train_table",
+        "train_std_table",
        "shift",
        "shift_table",
        "delta_test",
        "stdev_test",
        "test_table",
        "stats_table",
+        "fit_scores",
    ]
    _default_cr_modes = CompReport._default_modes

@ -380,15 +451,62 @@ class DatasetReport:

        return DatasetReport(self.name, _crs)

+    def fit_scores(self, metric: str = None, estimators: List[str] = None):
+        def _get_sort_idx(arr):
+            return np.array([np.searchsorted(np.sort(a), a) + 1 for a in arr])
+
+        def _get_best_idx(arr):
+            return np.argmin(arr, axis=1)
+
+        def _fdata_idx(idx) -> np.ndarray:
+            return _fdata.loc[(idx, slice(None), slice(None)), :].to_numpy()
+
+        _crs_train = [_cr_train_prev(cr) for cr in self.crs]
+
+        for cr in self.crs:
+            if not hasattr(cr, "fit_scores"):
+                return None
+
+        _crs_fit_scores = [cr.fit_scores for cr in self.crs]
+
+        _fit_scores = pd.DataFrame(_crs_fit_scores, index=_crs_train)
+        _fit_scores = _fit_scores.sort_index(axis=0, ascending=False)
+
+        _estimators = _get_estimators(estimators, _fit_scores.columns)
+        if _estimators.shape[0] == 0:
+            return None
+
+        _fdata = self.data(metric=metric, estimators=_estimators)
+
+        # ensure that columns in _fit_scores have the same ordering of _fdata
+        _fit_scores = _fit_scores.loc[:, _fdata.columns]
+
+        _best_fit_estimators = _get_best_idx(_fit_scores.to_numpy())
+
+        # scores = np.array(
+        #     [
+        #         _get_sort_idx(
+        #             _fdata.loc[(idx, slice(None), slice(None)), :].to_numpy()
+        #         )[:, cl].mean()
+        #         for idx, cl in zip(_fit_scores.index, _best_fit_estimators)
+        #     ]
+        # )
+        # for idx, cl in zip(_fit_scores.index, _best_fit_estimators):
+        #     print(_fdata_idx(idx)[:, cl])
+        #     print(_fdata_idx(idx).min(axis=1), end="\n\n")
+
+        scores = np.array(
+            [
+                np.abs(_fdata_idx(idx)[:, cl] - _fdata_idx(idx).min(axis=1)).mean()
+                for idx, cl in zip(_fit_scores.index, _best_fit_estimators)
+            ]
+        )
+
+        return scores
+
    def data(self, metric: str = None, estimators: List[str] = None) -> pd.DataFrame:
-        def _cr_train_prev(cr: CompReport):
-            return tuple(np.around(cr.train_prev, decimals=2))
-
-        def _cr_data(cr: CompReport):
-            return cr.data(metric, estimators)
-
        _crs_sorted = sorted(
-            [(_cr_train_prev(cr), _cr_data(cr)) for cr in self.crs],
+            [(_cr_train_prev(cr), _cr_data(cr, metric, estimators)) for cr in self.crs],
            key=lambda cr: len(cr[1].columns),
            reverse=True,
        )
@ -460,6 +578,15 @@ class DatasetReport:
        avg_p.loc["mean", :] = f_data.mean()
        return avg_p

+    def train_std_table(self, metric: str = None, estimators: List[str] = None):
+        f_data = self.data(metric=metric, estimators=estimators)
+        avg_p = f_data.groupby(level=1, sort=False).mean()
+        avg_p.loc["mean", :] = f_data.mean()
+        avg_s = f_data.groupby(level=1, sort=False).std()
+        avg_s.loc["mean", :] = f_data.std()
+        avg_r = pd.concat([avg_p, avg_s], axis=1, keys=["avg", "std"])
+        return avg_r
+
    def test_table(
        self, metric: str = None, estimators: List[str] = None
    ) -> pd.DataFrame:
@ -591,6 +718,20 @@ class DatasetReport:
                base_path=base_path,
                backend=backend,
            )
+        elif mode == "fit_scores":
+            _fit_scores = self.fit_scores(metric, estimators) if data is None else data
+            if _fit_scores is None:
+                return None
+            train_prevs = self.data(metric, estimators).index.unique(0)
+            return plot.plot_fit_scores(
+                train_prevs=train_prevs,
+                scores=_fit_scores,
+                metric=metric,
+                name=conf,
+                save_fig=save_fig,
+                base_path=base_path,
+                backend=backend,
+            )

    def to_md(
        self,
--- a/quacc/method/base.py
+++ b/quacc/method/base.py
@ -42,7 +42,7 @@ class BaseAccuracyEstimator(BaseQuantifier):
            pred_proba = self.classifier.predict_proba(coll.X)

        return ExtendedCollection.from_lc(
-            coll, pred_proba=pred_proba, extpol=self.extpol
+            coll, pred_proba=pred_proba, ext=pred_proba, extpol=self.extpol
        )

    def _extend_instances(self, instances: np.ndarray | sp.csr_matrix):
--- a/quacc/method/confidence.py
+++ b/quacc/method/confidence.py
@ -63,6 +63,13 @@ class Threshold(ConfidenceMetric):
        _exp = scores - self.threshold
        return _exp

+    # def conf(self, X, probas):
+    #     scores = self.get_scores(probas)
+    #     _exp = np.where(
+    #         scores >= self.threshold, np.ones(scores.shape), np.zeros(scores.shape)
+    #     )
+    #     return _exp[:, np.newaxis]
+

@metric("linreg")
 class LinReg(ConfidenceMetric):
--- a/quacc/method/model_selection.py
+++ b/quacc/method/model_selection.py
@ -242,6 +242,11 @@ class GridSearchAE(BaseAccuracyEstimator):
            return self.best_model_
        raise ValueError("best_model called before fit")

+    def best_score(self):
+        if hasattr(self, "best_score_"):
+            return self.best_score_
+        raise ValueError("best_score called before fit")
+

 class RandomizedSearchAE(GridSearchAE):
    ERR_THRESHOLD = 1e-4
@ -473,3 +478,4 @@ class SpiderSearchAE(GridSearchAE):
                score += 1

        return score
+
--- a/quacc/plot/init.py
+++ b/quacc/plot/init.py
@ -1 +1,7 @@
-from quacc.plot.plot import get_backend, plot_delta, plot_diagonal, plot_shift
+from quacc.plot.plot import (
+    get_backend,
+    plot_delta,
+    plot_diagonal,
+    plot_shift,
+    plot_fit_scores,
+)
--- a/quacc/plot/base.py
+++ b/quacc/plot/base.py
@ -52,3 +52,16 @@ class BasePlot:
        legend=True,
    ):
        ...
+
+    @classmethod
+    def plot_fit_scores(
+        train_prevs,
+        scores,
+        *,
+        pos_class=1,
+        title="default",
+        x_label="prev.",
+        y_label="position",
+        legend=True,
+    ):
+        ...
--- a/quacc/plot/plot.py
+++ b/quacc/plot/plot.py
@ -142,3 +142,37 @@ def plot_shift(
        return fig, output_path

    return fig
+
+
+def plot_fit_scores(
+    train_prevs,
+    scores,
+    *,
+    pos_class=1,
+    metric="acc",
+    name="default",
+    legend=True,
+    save_fig=False,
+    base_path=None,
+    backend=None,
+):
+    backend = __backend if backend is None else backend
+    title = f"fit_scores_{name}_avg_{metric}"
+
+    x_label = "train prev."
+    y_label = "position"
+    fig = backend.plot_fit_scores(
+        train_prevs,
+        scores,
+        pos_class=pos_class,
+        title=title,
+        x_label=x_label,
+        y_label=y_label,
+        legend=legend,
+    )
+
+    if save_fig:
+        output_path = backend.save_fig(fig, base_path, title)
+        return fig, output_path
+
+    return fig
--- a/quacc/plot/plotly.py
+++ b/quacc/plot/plotly.py
@ -8,10 +8,38 @@ import plotly.graph_objects as go
 from quacc.plot.base import BasePlot


+class PlotCfg:
+    def __init__(self, mode, lwidth, font=None, legend=None, template="seaborn"):
+        self.mode = mode
+        self.lwidth = lwidth
+        self.legend = {} if legend is None else legend
+        self.font = {} if font is None else font
+        self.template = template
+
+
+web_cfg = PlotCfg("lines+markers", 2)
+png_cfg = PlotCfg(
+    "lines",
+    5,
+    legend=dict(
+        orientation="h",
+        yanchor="bottom",
+        xanchor="right",
+        y=1.02,
+        x=1,
+        font=dict(size=24),
+    ),
+    font=dict(size=24),
+    # template="ggplot2",
+)
+
+_cfg = png_cfg
+
+
 class PlotlyPlot(BasePlot):
    __themes = defaultdict(
        lambda: {
-            "template": "seaborn",
+            "template": _cfg.template,
        }
    )
    __themes = __themes | {
@ -35,7 +63,7 @@ class PlotlyPlot(BasePlot):
            case v if v > 10:
                __colors = plotly.colors.qualitative.Light24
            case _:
-                __colors = plotly.colors.qualitative.Plotly
+                __colors = plotly.colors.qualitative.G10

        def __generator(cs):
            while True:
@ -50,9 +78,8 @@ class PlotlyPlot(BasePlot):
            xaxis_title=x_label,
            yaxis_title=y_label,
            template=self.theme["template"],
-            font=dict(
-                size=18,
-            ),
+            font=_cfg.font,
+            legend=_cfg.legend,
        )

    def save_fig(self, fig, base_path, title) -> Path:
@ -82,9 +109,9 @@ class PlotlyPlot(BasePlot):
                go.Scatter(
                    x=x,
                    y=delta,
-                    mode="lines+markers",
+                    mode=_cfg.mode,
                    name=name,
-                    line=dict(color=self.hex_to_rgb(color)),
+                    line=dict(color=self.hex_to_rgb(color), width=_cfg.lwidth),
                    hovertemplate="prev.: %{x}<br>error: %{y:,.4f}",
                )
            ]
@ -193,9 +220,9 @@ class PlotlyPlot(BasePlot):
                    x=x,
                    y=delta,
                    customdata=np.stack((counts[col_idx],), axis=-1),
-                    mode="lines+markers",
+                    mode=_cfg.mode,
                    name=name,
-                    line=dict(color=self.hex_to_rgb(color)),
+                    line=dict(color=self.hex_to_rgb(color), width=_cfg.lwidth),
                    hovertemplate="shift: %{x}<br>error: %{y}"
                    + "<br>count: %{customdata[0]}"
                    if counts is not None
@ -205,3 +232,29 @@ class PlotlyPlot(BasePlot):

        self.update_layout(fig, title, x_label, y_label)
        return fig
+
+    def plot_fit_scores(
+        self,
+        train_prevs,
+        scores,
+        *,
+        pos_class=1,
+        title="default",
+        x_label="prev.",
+        y_label="position",
+        legend=True,
+    ) -> go.Figure:
+        fig = go.Figure()
+        # x = train_prevs
+        x = [str(tuple(bp)) for bp in train_prevs]
+        fig.add_trace(
+            go.Scatter(
+                x=x,
+                y=scores,
+                mode="lines+markers",
+                showlegend=False,
+            ),
+        )
+
+        self.update_layout(fig, title, x_label, y_label)
+        return fig
--- a/remote.log
+++ b/remote.log
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,116 @@
+abstention==0.1.3.1 ; python_version >= "3.10" and python_version < "4.0"
+ansi2html==1.9.1 ; python_version >= "3.10" and python_version < "4.0"
+appnope==0.1.3 ; python_version >= "3.10" and python_version < "4.0" and platform_system == "Darwin"
+asttokens==2.4.1 ; python_version >= "3.10" and python_version < "4.0"
+bcrypt==4.1.2 ; python_version >= "3.10" and python_version < "4.0"
+bleach==6.1.0 ; python_version >= "3.10" and python_version < "4.0"
+blinker==1.7.0 ; python_version >= "3.10" and python_version < "4.0"
+bokeh==3.3.4 ; python_version >= "3.10" and python_version < "4.0"
+certifi==2023.11.17 ; python_version >= "3.10" and python_version < "4.0"
+cffi==1.16.0 ; python_version >= "3.10" and python_version < "4.0"
+charset-normalizer==3.3.2 ; python_version >= "3.10" and python_version < "4.0"
+click==8.1.7 ; python_version >= "3.10" and python_version < "4.0"
+colorama==0.4.6 ; python_version >= "3.10" and python_version < "4" and sys_platform == "win32" or python_version >= "3.10" and python_version < "4" and platform_system == "Windows"
+comm==0.2.1 ; python_version >= "3.10" and python_version < "4.0"
+contourpy==1.2.0 ; python_version >= "3.10" and python_version < "4"
+coverage[toml]==7.4.1 ; python_version >= "3.10" and python_version < "4.0"
+cryptography==42.0.1 ; python_version >= "3.10" and python_version < "4.0"
+cycler==0.12.1 ; python_version >= "3.10" and python_version < "4"
+dash-bootstrap-components==1.5.0 ; python_version >= "3.10" and python_version < "4"
+dash-core-components==2.0.0 ; python_version >= "3.10" and python_version < "4.0"
+dash-html-components==2.0.0 ; python_version >= "3.10" and python_version < "4.0"
+dash-table==5.0.0 ; python_version >= "3.10" and python_version < "4.0"
+dash==2.14.2 ; python_version >= "3.10" and python_version < "4.0"
+debugpy==1.8.0 ; python_version >= "3.10" and python_version < "4.0"
+decorator==5.1.1 ; python_version >= "3.10" and python_version < "4.0"
+exceptiongroup==1.2.0 ; python_version >= "3.10" and python_version < "3.11"
+executing==2.0.1 ; python_version >= "3.10" and python_version < "4.0"
+flask==3.0.1 ; python_version >= "3.10" and python_version < "4.0"
+fonttools==4.47.2 ; python_version >= "3.10" and python_version < "4"
+gunicorn==21.2.0 ; python_version >= "3.10" and python_version < "4.0"
+idna==3.6 ; python_version >= "3.10" and python_version < "4.0"
+importlib-metadata==7.0.1 ; python_version >= "3.10" and python_version < "4.0"
+iniconfig==2.0.0 ; python_version >= "3.10" and python_version < "4.0"
+ipykernel==6.29.0 ; python_version >= "3.10" and python_version < "4.0"
+ipympl==0.9.3 ; python_version >= "3.10" and python_version < "4.0"
+ipython-genutils==0.2.0 ; python_version >= "3.10" and python_version < "4.0"
+ipython==8.20.0 ; python_version >= "3.10" and python_version < "4.0"
+ipywidgets-bokeh==1.5.0 ; python_version >= "3.10" and python_version < "4.0"
+ipywidgets==8.1.1 ; python_version >= "3.10" and python_version < "4.0"
+itsdangerous==2.1.2 ; python_version >= "3.10" and python_version < "4.0"
+jedi==0.19.1 ; python_version >= "3.10" and python_version < "4.0"
+jinja2==3.1.3 ; python_version >= "3.10" and python_version < "4.0"
+joblib==1.3.2 ; python_version >= "3.10" and python_version < "4"
+jupyter-client==8.6.0 ; python_version >= "3.10" and python_version < "4.0"
+jupyter-core==5.7.1 ; python_version >= "3.10" and python_version < "4.0"
+jupyterlab-widgets==3.0.9 ; python_version >= "3.10" and python_version < "4.0"
+kiwisolver==1.4.5 ; python_version >= "3.10" and python_version < "4"
+linkify-it-py==2.0.2 ; python_version >= "3.10" and python_version < "4.0"
+logging==0.4.9.6 ; python_version >= "3.10" and python_version < "4.0"
+markdown-it-py==3.0.0 ; python_version >= "3.10" and python_version < "4.0"
+markdown==3.5.2 ; python_version >= "3.10" and python_version < "4.0"
+markupsafe==2.1.4 ; python_version >= "3.10" and python_version < "4.0"
+matplotlib-inline==0.1.6 ; python_version >= "3.10" and python_version < "4.0"
+matplotlib==3.8.2 ; python_version >= "3.10" and python_version < "4"
+mdit-py-plugins==0.4.0 ; python_version >= "3.10" and python_version < "4.0"
+mdurl==0.1.2 ; python_version >= "3.10" and python_version < "4.0"
+nest-asyncio==1.6.0 ; python_version >= "3.10" and python_version < "4.0"
+numpy==1.26.3 ; python_version >= "3.10" and python_version < "4.0"
+packaging==23.2 ; python_version >= "3.10" and python_version < "4.0"
+pandas-stubs==2.1.4.231227 ; python_version >= "3.10" and python_version < "4.0"
+pandas==2.2.0 ; python_version >= "3.10" and python_version < "4.0"
+panel==1.3.8 ; python_version >= "3.10" and python_version < "4.0"
+param==2.0.2 ; python_version >= "3.10" and python_version < "4.0"
+paramiko==3.4.0 ; python_version >= "3.10" and python_version < "4.0"
+parso==0.8.3 ; python_version >= "3.10" and python_version < "4.0"
+pexpect==4.9.0 ; python_version >= "3.10" and python_version < "4.0" and sys_platform != "win32"
+pillow==10.2.0 ; python_version >= "3.10" and python_version < "4.0"
+platformdirs==4.1.0 ; python_version >= "3.10" and python_version < "4.0"
+plotly==5.18.0 ; python_version >= "3.10" and python_version < "4.0"
+pluggy==1.4.0 ; python_version >= "3.10" and python_version < "4.0"
+prompt-toolkit==3.0.43 ; python_version >= "3.10" and python_version < "4.0"
+psutil==5.9.8 ; python_version >= "3.10" and python_version < "4.0"
+ptyprocess==0.7.0 ; python_version >= "3.10" and python_version < "4.0" and sys_platform != "win32"
+pure-eval==0.2.2 ; python_version >= "3.10" and python_version < "4.0"
+pyarrow==15.0.0 ; python_version >= "3.10" and python_version < "4.0"
+pycparser==2.21 ; python_version >= "3.10" and python_version < "4.0"
+pygments==2.17.2 ; python_version >= "3.10" and python_version < "4.0"
+pylance==0.5.10 ; python_version >= "3.10" and python_version < "4.0"
+pynacl==1.5.0 ; python_version >= "3.10" and python_version < "4.0"
+pyparsing==3.1.1 ; python_version >= "3.10" and python_version < "4"
+pytest-cov==4.1.0 ; python_version >= "3.10" and python_version < "4.0"
+pytest-mock==3.12.0 ; python_version >= "3.10" and python_version < "4.0"
+pytest==8.0.0 ; python_version >= "3.10" and python_version < "4.0"
+python-dateutil==2.8.2 ; python_version >= "3.10" and python_version < "4.0"
+pytz==2023.4 ; python_version >= "3.10" and python_version < "4.0"
+pyviz-comms==3.0.1 ; python_version >= "3.10" and python_version < "4.0"
+pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.10" and python_version < "4.0"
+pyyaml==6.0.1 ; python_version >= "3.10" and python_version < "4.0"
+pyzmq==25.1.2 ; python_version >= "3.10" and python_version < "4.0"
+quapy==0.1.7 ; python_version >= "3.10" and python_version < "4"
+requests==2.31.0 ; python_version >= "3.10" and python_version < "4.0"
+retrying==1.3.4 ; python_version >= "3.10" and python_version < "4.0"
+scikit-learn==1.4.0 ; python_version >= "3.10" and python_version < "4"
+scipy==1.12.0 ; python_version >= "3.10" and python_version < "4.0"
+setuptools==69.0.3 ; python_version >= "3.10" and python_version < "4.0"
+six==1.16.0 ; python_version >= "3.10" and python_version < "4.0"
+stack-data==0.6.3 ; python_version >= "3.10" and python_version < "4.0"
+tabulate==0.9.0 ; python_version >= "3.10" and python_version < "4.0"
+tenacity==8.2.3 ; python_version >= "3.10" and python_version < "4.0"
+threadpoolctl==3.2.0 ; python_version >= "3.10" and python_version < "4"
+tomli==2.0.1 ; python_version >= "3.10" and python_full_version <= "3.11.0a6"
+tornado==6.4 ; python_version >= "3.10" and python_version < "4.0"
+tqdm==4.66.1 ; python_version >= "3.10" and python_version < "4"
+traitlets==5.14.1 ; python_version >= "3.10" and python_version < "4.0"
+types-pytz==2023.4.0.20240130 ; python_version >= "3.10" and python_version < "4.0"
+typing-extensions==4.9.0 ; python_version >= "3.10" and python_version < "4.0"
+tzdata==2023.4 ; python_version >= "3.10" and python_version < "4.0"
+uc-micro-py==1.0.2 ; python_version >= "3.10" and python_version < "4.0"
+urllib3==2.1.0 ; python_version >= "3.10" and python_version < "4.0"
+wcwidth==0.2.13 ; python_version >= "3.10" and python_version < "4.0"
+webencodings==0.5.1 ; python_version >= "3.10" and python_version < "4.0"
+werkzeug==3.0.1 ; python_version >= "3.10" and python_version < "4.0"
+widgetsnbextension==4.0.9 ; python_version >= "3.10" and python_version < "4.0"
+xlrd==2.0.1 ; python_version >= "3.10" and python_version < "4"
+xyzservices==2023.10.1 ; python_version >= "3.10" and python_version < "4.0"
+zipp==3.17.0 ; python_version >= "3.10" and python_version < "4.0"
--- a/test_postprocess.py
+++ b/test_postprocess.py
@ -0,0 +1,9 @@
+from quacc.evaluation.report import DatasetReport
+
+dr = DatasetReport.unpickle("output/main/imdb/imdb.pickle")
+_estimators = ["sld_lr_gs", "bin_sld_lr_gs", "mul_sld_lr_gs", "m3w_sld_lr_gs"]
+_data = dr.data(metric="acc", estimators=_estimators)
+for idx, cr in zip(_data.index.unique(0), dr.crs[::-1]):
+    print(cr.train_prev)
+    print({k: v for k, v in cr.fit_scores.items() if k in _estimators})
+    print(_data.loc[(idx, slice(None), slice(None)), :])