update
This commit is contained in:
parent
6bf2fb9e1b
commit
2d8d4c3c68
|
@ -0,0 +1,90 @@
|
|||
import numpy as np
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.metrics import f1_score
|
||||
|
||||
import quapy as qp
|
||||
from method.kdey import KDEyML, KDEyCS, KDEyHD
|
||||
from quapy.protocol import APP
|
||||
from quapy.method.aggregative import PACC, ACC, EMQ, PCC, CC, DMy
|
||||
|
||||
datasets = qp.datasets.UCI_DATASETS
|
||||
|
||||
# target = 'f1'
|
||||
target = 'acc'
|
||||
|
||||
errors = []
|
||||
|
||||
# dataset_name = datasets[-2]
|
||||
for dataset_name in datasets:
|
||||
if dataset_name in ['balance.2', 'acute.a', 'acute.b', 'iris.1']:
|
||||
continue
|
||||
train, test = qp.datasets.fetch_UCIDataset(dataset_name).train_test
|
||||
|
||||
print(f'dataset name = {dataset_name}')
|
||||
print(f'#train = {len(train)}')
|
||||
print(f'#test = {len(test)}')
|
||||
|
||||
cls = LogisticRegression()
|
||||
|
||||
train, val = train.split_stratified(random_state=0)
|
||||
|
||||
|
||||
cls.fit(*train.Xy)
|
||||
y_val = val.labels
|
||||
y_hat_val = cls.predict(val.instances)
|
||||
|
||||
for sample in APP(test, n_prevalences=11, repeats=1, sample_size=100, return_type='labelled_collection')():
|
||||
print('='*80)
|
||||
y_hat = cls.predict(sample.instances)
|
||||
y = sample.labels
|
||||
if target == 'acc':
|
||||
acc = (y_hat==y).mean()
|
||||
else:
|
||||
acc = f1_score(y, y_hat, zero_division=0)
|
||||
|
||||
q = EMQ(cls)
|
||||
q.fit(train, fit_classifier=False)
|
||||
|
||||
# q = EMQ(cls)
|
||||
# q.fit(train, val_split=val, fit_classifier=False)
|
||||
M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
|
||||
M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
|
||||
p_hat = q.quantify(sample.instances)
|
||||
cont_table_hat = p_hat * M_hat
|
||||
|
||||
tp = cont_table_hat[1,1]
|
||||
tn = cont_table_hat[0,0]
|
||||
fn = cont_table_hat[0,1]
|
||||
fp = cont_table_hat[1,0]
|
||||
|
||||
if target == 'acc':
|
||||
acc_hat = (tp+tn)
|
||||
else:
|
||||
den = (2*tp + fn + fp)
|
||||
if den > 0:
|
||||
acc_hat = 2*tp / den
|
||||
else:
|
||||
acc_hat = 0
|
||||
|
||||
error = abs(acc - acc_hat)
|
||||
errors.append(error)
|
||||
|
||||
print('true_prev: ', sample.prevalence())
|
||||
print('estim_prev: ', p_hat)
|
||||
print('M-true:\n', M_true)
|
||||
print('M-hat:\n', M_hat)
|
||||
print('cont_table:\n', cont_table_hat)
|
||||
print(f'classifier accuracy={acc:.3f}')
|
||||
print(f'estimated accuracy={acc_hat:.3f}')
|
||||
print(f'estimation error={error:.4f}')
|
||||
|
||||
print('process end')
|
||||
print('='*80)
|
||||
print(f'mean error = {np.mean(errors)}')
|
||||
print(f'std error = {np.std(errors)}')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,269 @@
|
|||
import numpy as np
|
||||
import scipy.special
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.metrics import f1_score
|
||||
|
||||
import quapy as qp
|
||||
from quapy.protocol import APP
|
||||
from quapy.method.aggregative import PACC, ACC, EMQ, PCC, CC, DMy, T50, MS2, KDEyML, KDEyCS, KDEyHD
|
||||
from sklearn import clone
|
||||
import quapy.functional as F
|
||||
|
||||
# datasets = qp.datasets.UCI_DATASETS
|
||||
datasets = ['imdb']
|
||||
|
||||
# target = 'f1'
|
||||
target = 'acc'
|
||||
|
||||
errors = []
|
||||
|
||||
def method_1(cls, train, val, sample, y=None, y_hat=None):
|
||||
"""
|
||||
Converts a misclassification matrix computed in validation (i.e., in the train distribution P) into
|
||||
the corresponding equivalent misclassification matrix in test (i.e., in the test distribution Q)
|
||||
by relying on the PPS assumptions.
|
||||
|
||||
:return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
|
||||
"""
|
||||
|
||||
y_val = val.labels
|
||||
y_hat_val = cls.predict(val.instances)
|
||||
|
||||
# q = EMQ(LogisticRegression(class_weight='balanced'))
|
||||
# q.fit(val, fit_classifier=True)
|
||||
q = EMQ(cls)
|
||||
q.fit(train, fit_classifier=False)
|
||||
|
||||
|
||||
# q = KDEyML(cls)
|
||||
# q.fit(train, val_split=val, fit_classifier=False)
|
||||
M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
|
||||
M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
|
||||
p_hat = q.quantify(sample.instances)
|
||||
cont_table_hat = p_hat * M_hat
|
||||
# cont_table_hat = np.clip(cont_table_hat, 0, 1)
|
||||
# cont_table_hat = cont_table_hat / cont_table_hat.sum()
|
||||
|
||||
print('true_prev: ', sample.prevalence())
|
||||
print('estim_prev: ', p_hat)
|
||||
print('M-true:\n', M_true)
|
||||
print('M-hat:\n', M_hat)
|
||||
print('cont_table:\n', cont_table_hat)
|
||||
print('cont_table Sum :\n', cont_table_hat.sum())
|
||||
|
||||
tp = cont_table_hat[1, 1]
|
||||
tn = cont_table_hat[0, 0]
|
||||
fn = cont_table_hat[0, 1]
|
||||
fp = cont_table_hat[1, 0]
|
||||
|
||||
return tn, fn, fp, tp
|
||||
|
||||
|
||||
def method_2(cls, train, val, sample, y=None, y_hat=None):
|
||||
"""
|
||||
Assume P and Q are the training and test distributions
|
||||
Solves the following system of linear equations:
|
||||
tp + fp = CC (the classify & count estimate, observed)
|
||||
fn + tp = Q(Y=1) (this is not observed but is estimated via quantification)
|
||||
tp + fp + fn + tn = 1 (trivial)
|
||||
|
||||
There are 4 unknowns and 3 equations. The fourth required one is established
|
||||
by assuming that the PPS conditions hold, i.e., that P(X|Y)=Q(X|Y); note that
|
||||
this implies P(hatY|Y)=Q(hatY|Y) if hatY is computed by any measurable function.
|
||||
In particular, we consider that the tpr in P (estimated via validation, hereafter tpr) and
|
||||
in Q (unknown, hereafter tpr_Q) should
|
||||
be the same. This means:
|
||||
tpr = tpr_Q = tp / (tp + fn)
|
||||
after some manipulation:
|
||||
tp (tpr-1) + fn (tpr) = 0 <-- our last equation
|
||||
|
||||
Note that the last equation relies on the estimate tpr. It is likely that, the more
|
||||
positives we have, the more reliable this estimate is. This suggests that, in cases
|
||||
in which we have more negatives in the validation set than positives, it might be
|
||||
convenient to resort to the true negative rate (tnr) instead. This gives rise to
|
||||
the alternative fourth equation:
|
||||
tn (tnr-1) + fp (tnr) = 0
|
||||
|
||||
:return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
|
||||
"""
|
||||
|
||||
y_val = val.labels
|
||||
y_hat_val = cls.predict(val.instances)
|
||||
|
||||
q = ACC(cls)
|
||||
q.fit(train, val_split=val, fit_classifier=False)
|
||||
p_hat = q.quantify(sample.instances)
|
||||
pos_prev = p_hat[1]
|
||||
# pos_prev = sample.prevalence()[1]
|
||||
|
||||
cc = CC(cls)
|
||||
cc.fit(train, fit_classifier=False)
|
||||
cc_prev = cc.quantify(sample.instances)[1]
|
||||
|
||||
M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
|
||||
M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
|
||||
cont_table_true = sample.prevalence() * M_true
|
||||
|
||||
if val.prevalence()[1] > 0.5:
|
||||
|
||||
# in this case, the tpr might be a more reliable estimate than tnr
|
||||
tpr_hat = M_hat[1, 1]
|
||||
|
||||
A = np.asarray([
|
||||
[0, 0, 1, 1],
|
||||
[0, 1, 0, 1],
|
||||
[1, 1, 1, 1],
|
||||
[0, tpr_hat, 0, tpr_hat - 1]
|
||||
])
|
||||
|
||||
else:
|
||||
|
||||
# in this case, the tnr might be a more reliable estimate than tpr
|
||||
tnr_hat = M_hat[0, 0]
|
||||
|
||||
A = np.asarray([
|
||||
[0, 0, 1, 1],
|
||||
[0, 1, 0, 1],
|
||||
[1, 1, 1, 1],
|
||||
[tnr_hat-1, 0, tnr_hat, 0]
|
||||
])
|
||||
|
||||
b = np.asarray(
|
||||
[cc_prev, pos_prev, 1, 0]
|
||||
)
|
||||
|
||||
tn, fn, fp, tp = np.linalg.solve(A, b)
|
||||
|
||||
cont_table_estim = np.asarray([
|
||||
[tn, fn],
|
||||
[fp, tp]
|
||||
])
|
||||
|
||||
# if (cont_table_estim < 0).any() or (cont_table_estim>1).any():
|
||||
# cont_table_estim = scipy.special.softmax(cont_table_estim)
|
||||
|
||||
print('true_prev: ', sample.prevalence())
|
||||
print('estim_prev: ', p_hat)
|
||||
print('true_cont_table:\n', cont_table_true)
|
||||
print('estim_cont_table:\n', cont_table_estim)
|
||||
# print('true_tpr', M_true[1,1])
|
||||
# print('estim_tpr', tpr_hat)
|
||||
|
||||
|
||||
return tn, fn, fp, tp
|
||||
|
||||
|
||||
def method_3(cls, train, val, sample, y=None, y_hat=None):
|
||||
"""
|
||||
This is just method 2 but without involving any quapy's quantifier.
|
||||
|
||||
:return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
|
||||
"""
|
||||
|
||||
classes = val.classes_
|
||||
y_val = val.labels
|
||||
y_hat_val = cls.predict(val.instances)
|
||||
M_hat = ACC.getPteCondEstim(classes, y_val, y_hat_val)
|
||||
y_hat_test = cls.predict(sample.instances)
|
||||
pos_prev_cc = F.prevalence_from_labels(y_hat_test, classes)[1]
|
||||
tpr_hat = M_hat[1,1]
|
||||
fpr_hat = M_hat[1,0]
|
||||
tnr_hat = M_hat[0,0]
|
||||
pos_prev_test_hat = (pos_prev_cc - fpr_hat) / (tpr_hat - fpr_hat)
|
||||
pos_prev_test_hat = np.clip(pos_prev_test_hat, 0, 1)
|
||||
pos_prev_val = val.prevalence()[1]
|
||||
|
||||
if pos_prev_val > 0.5:
|
||||
# in this case, the tpr might be a more reliable estimate than tnr
|
||||
A = np.asarray([
|
||||
[0, 0, 1, 1],
|
||||
[0, 1, 0, 1],
|
||||
[1, 1, 1, 1],
|
||||
[0, tpr_hat, 0, tpr_hat - 1]
|
||||
])
|
||||
else:
|
||||
# in this case, the tnr might be a more reliable estimate than tpr
|
||||
A = np.asarray([
|
||||
[0, 0, 1, 1],
|
||||
[0, 1, 0, 1],
|
||||
[1, 1, 1, 1],
|
||||
[tnr_hat-1, 0, tnr_hat, 0]
|
||||
])
|
||||
|
||||
b = np.asarray(
|
||||
[pos_prev_cc, pos_prev_test_hat, 1, 0]
|
||||
)
|
||||
|
||||
tn, fn, fp, tp = np.linalg.solve(A, b)
|
||||
|
||||
return tn, fn, fp, tp
|
||||
|
||||
|
||||
def cls_eval_from_counters(tn, fn, fp, tp):
|
||||
if target == 'acc':
|
||||
acc_hat = (tp + tn)
|
||||
else:
|
||||
den = (2 * tp + fn + fp)
|
||||
if den > 0:
|
||||
acc_hat = 2 * tp / den
|
||||
else:
|
||||
acc_hat = 0
|
||||
return acc_hat
|
||||
|
||||
|
||||
def cls_eval_from_labels(y, y_hat):
|
||||
if target == 'acc':
|
||||
acc = (y_hat == y).mean()
|
||||
else:
|
||||
acc = f1_score(y, y_hat, zero_division=0)
|
||||
return acc
|
||||
|
||||
|
||||
for dataset_name in datasets:
|
||||
|
||||
train_orig, test = qp.datasets.fetch_reviews(dataset_name, tfidf=True, min_df=10).train_test
|
||||
|
||||
train_prot = APP(train_orig, n_prevalences=11, repeats=1, return_type='labelled_collection', random_state=0, sample_size=10000)
|
||||
for train in train_prot():
|
||||
if np.product(train.prevalence()) == 0:
|
||||
# skip experiments with no positives or no negatives in training
|
||||
continue
|
||||
|
||||
cls = LogisticRegression(class_weight='balanced')
|
||||
|
||||
train, val = train.split_stratified(train_prop=0.5, random_state=0)
|
||||
|
||||
print(f'dataset name = {dataset_name}')
|
||||
print(f'#train = {len(train)}, prev={F.strprev(train.prevalence())}')
|
||||
print(f'#val = {len(val)}, prev={F.strprev(val.prevalence())}')
|
||||
print(f'#test = {len(test)}, prev={F.strprev(test.prevalence())}')
|
||||
|
||||
cls.fit(*train.Xy)
|
||||
|
||||
for sample in APP(test, n_prevalences=21, repeats=10, sample_size=1000, return_type='labelled_collection')():
|
||||
print('='*80)
|
||||
y_hat = cls.predict(sample.instances)
|
||||
y = sample.labels
|
||||
acc_true = cls_eval_from_labels(y, y_hat)
|
||||
|
||||
tn, fn, fp, tp = method_3(cls, train, val, sample, y, y_hat)
|
||||
|
||||
acc_hat = cls_eval_from_counters(tn, fn, fp, tp)
|
||||
|
||||
error = abs(acc_true - acc_hat)
|
||||
errors.append(error)
|
||||
|
||||
print(f'classifier accuracy={acc_true:.3f}')
|
||||
print(f'estimated accuracy={acc_hat:.3f}')
|
||||
print(f'estimation error={error:.4f}')
|
||||
|
||||
print('process end')
|
||||
print('='*80)
|
||||
print(f'mean error = {np.mean(errors)}')
|
||||
print(f'std error = {np.std(errors)}')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
164
conf.yaml
164
conf.yaml
|
@ -5,47 +5,80 @@ debug_conf: &debug_conf
|
|||
OUT_DIR_NAME: output/debug
|
||||
DATASET_N_PREVS: 9
|
||||
COMP_ESTIMATORS:
|
||||
- bin_sld_lr
|
||||
- mul_sld_lr
|
||||
- m3w_sld_lr
|
||||
- d_bin_sld_lr
|
||||
- d_mul_sld_lr
|
||||
- d_m3w_sld_lr
|
||||
- d_bin_sld_rbf
|
||||
- d_mul_sld_rbf
|
||||
- d_m3w_sld_rbf
|
||||
- bin_kde_lr
|
||||
- mul_kde_lr
|
||||
- m3w_kde_lr
|
||||
- d_bin_kde_lr
|
||||
- d_mul_kde_lr
|
||||
- d_m3w_kde_lr
|
||||
- d_bin_kde_rbf
|
||||
- d_mul_kde_rbf
|
||||
- d_m3w_kde_rbf
|
||||
# - bin_sld_lr
|
||||
# - mul_sld_lr
|
||||
# - m3w_sld_lr
|
||||
# - d_bin_sld_lr
|
||||
# - d_mul_sld_lr
|
||||
# - d_m3w_sld_lr
|
||||
# - d_bin_sld_rbf
|
||||
# - d_mul_sld_rbf
|
||||
# - d_m3w_sld_rbf
|
||||
# - bin_kde_lr
|
||||
# - mul_kde_lr
|
||||
# - m3w_kde_lr
|
||||
# - d_bin_kde_lr
|
||||
# - d_mul_kde_lr
|
||||
# - d_m3w_kde_lr
|
||||
# - d_bin_kde_rbf
|
||||
# - d_mul_kde_rbf
|
||||
# - d_m3w_kde_rbf
|
||||
# - mandoline
|
||||
# - rca
|
||||
- bin_sld_lr_is
|
||||
- mul_sld_lr_is
|
||||
- m3w_sld_lr_is
|
||||
- rca
|
||||
- rca_star
|
||||
- doc
|
||||
- atc_mc
|
||||
N_JOBS: -2
|
||||
|
||||
confs:
|
||||
- DATASET_NAME: imdb
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
other_confs:
|
||||
- DATASET_NAME: twitter_gasp
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
|
||||
test_conf: &test_conf
|
||||
global:
|
||||
METRICS:
|
||||
- acc
|
||||
- f1
|
||||
OUT_DIR_NAME: output/test
|
||||
DATASET_N_PREVS: 9
|
||||
COMP_ESTIMATORS:
|
||||
- cross
|
||||
- cross2
|
||||
- bin_sld_lr
|
||||
- mul_sld_lr
|
||||
- m3w_sld_lr
|
||||
- bin_sld_lr_is
|
||||
- mul_sld_lr_is
|
||||
- m3w_sld_lr_is
|
||||
- doc
|
||||
- atc_mc
|
||||
N_JOBS: -2
|
||||
|
||||
confs:
|
||||
- DATASET_NAME: imdb
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
other_confs:
|
||||
- DATASET_NAME: twitter_gasp
|
||||
|
||||
main:
|
||||
confs: &main_confs
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: MCAT
|
||||
DATASET_TARGET: CCAT
|
||||
other_confs:
|
||||
- DATASET_NAME: imdb
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: GCAT
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: MCAT
|
||||
|
||||
sld_lr_conf: &sld_lr_conf
|
||||
|
||||
|
@ -72,6 +105,9 @@ sld_lr_conf: &sld_lr_conf
|
|||
- bin_sld_lr_is
|
||||
- mul_sld_lr_is
|
||||
- m3w_sld_lr_is
|
||||
- bin_sld_lr_a
|
||||
- mul_sld_lr_a
|
||||
- m3w_sld_lr_a
|
||||
- bin_sld_lr_gs
|
||||
- mul_sld_lr_gs
|
||||
- m3w_sld_lr_gs
|
||||
|
@ -116,6 +152,9 @@ d_sld_lr_conf: &d_sld_lr_conf
|
|||
- d_bin_sld_lr_is
|
||||
- d_mul_sld_lr_is
|
||||
- d_m3w_sld_lr_is
|
||||
- d_bin_sld_lr_a
|
||||
- d_mul_sld_lr_a
|
||||
- d_m3w_sld_lr_a
|
||||
- d_bin_sld_lr_gs
|
||||
- d_mul_sld_lr_gs
|
||||
- d_m3w_sld_lr_gs
|
||||
|
@ -160,6 +199,9 @@ d_sld_rbf_conf: &d_sld_rbf_conf
|
|||
- d_bin_sld_rbf_is
|
||||
- d_mul_sld_rbf_is
|
||||
- d_m3w_sld_rbf_is
|
||||
- d_bin_sld_rbf_a
|
||||
- d_mul_sld_rbf_a
|
||||
- d_m3w_sld_rbf_a
|
||||
- d_bin_sld_rbf_gs
|
||||
- d_mul_sld_rbf_gs
|
||||
- d_m3w_sld_rbf_gs
|
||||
|
@ -202,6 +244,9 @@ kde_lr_conf: &kde_lr_conf
|
|||
- bin_kde_lr_is
|
||||
- mul_kde_lr_is
|
||||
- m3w_kde_lr_is
|
||||
- bin_kde_lr_a
|
||||
- mul_kde_lr_a
|
||||
- m3w_kde_lr_a
|
||||
- bin_kde_lr_gs
|
||||
- mul_kde_lr_gs
|
||||
- m3w_kde_lr_gs
|
||||
|
@ -238,6 +283,9 @@ d_kde_lr_conf: &d_kde_lr_conf
|
|||
- d_bin_kde_lr_is
|
||||
- d_mul_kde_lr_is
|
||||
- d_m3w_kde_lr_is
|
||||
- d_bin_kde_lr_a
|
||||
- d_mul_kde_lr_a
|
||||
- d_m3w_kde_lr_a
|
||||
- d_bin_kde_lr_gs
|
||||
- d_mul_kde_lr_gs
|
||||
- d_m3w_kde_lr_gs
|
||||
|
@ -274,6 +322,9 @@ d_kde_rbf_conf: &d_kde_rbf_conf
|
|||
- d_bin_kde_rbf_is
|
||||
- d_mul_kde_rbf_is
|
||||
- d_m3w_kde_rbf_is
|
||||
- d_bin_kde_rbf_a
|
||||
- d_mul_kde_rbf_a
|
||||
- d_m3w_kde_rbf_a
|
||||
- d_bin_kde_rbf_gs
|
||||
- d_mul_kde_rbf_gs
|
||||
- d_m3w_kde_rbf_gs
|
||||
|
@ -287,5 +338,72 @@ d_kde_rbf_conf: &d_kde_rbf_conf
|
|||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
|
||||
baselines_conf: &baselines_conf
|
||||
global:
|
||||
METRICS:
|
||||
- acc
|
||||
- f1
|
||||
OUT_DIR_NAME: output/baselines
|
||||
DATASET_N_PREVS: 9
|
||||
COMP_ESTIMATORS:
|
||||
- doc
|
||||
- atc_mc
|
||||
- mandoline
|
||||
- rca
|
||||
- rca_star
|
||||
N_JOBS: -2
|
||||
|
||||
exec: *d_sld_rbf_conf
|
||||
confs: *main_confs
|
||||
other_confs:
|
||||
- DATASET_NAME: imdb
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
|
||||
kde_lr_gs_conf: &kde_lr_gs_conf
|
||||
global:
|
||||
METRICS:
|
||||
- acc
|
||||
- f1
|
||||
OUT_DIR_NAME: output/kde_lr_gs
|
||||
DATASET_N_PREVS: 9
|
||||
COMP_ESTIMATORS:
|
||||
- bin_kde_lr_gs
|
||||
- mul_kde_lr_gs
|
||||
- m3w_kde_lr_gs
|
||||
N_JOBS: -2
|
||||
|
||||
confs: *main_confs
|
||||
|
||||
timing_conf: &timing_conf
|
||||
global:
|
||||
METRICS:
|
||||
- acc
|
||||
- f1
|
||||
OUT_DIR_NAME: output/timing
|
||||
DATASET_N_PREVS: 1
|
||||
COMP_ESTIMATORS:
|
||||
- bin_sld_lr_a
|
||||
- mul_sld_lr_a
|
||||
- m3w_sld_lr_a
|
||||
- bin_kde_lr_a
|
||||
- mul_kde_lr_a
|
||||
- m3w_kde_lr_a
|
||||
- bin_sld_lr_gs
|
||||
- mul_sld_lr_gs
|
||||
- m3w_sld_lr_gs
|
||||
- bin_kde_lr_gs
|
||||
- mul_kde_lr_gs
|
||||
- m3w_kde_lr_gs
|
||||
- doc
|
||||
- atc_mc
|
||||
- rca
|
||||
- rca_star
|
||||
- mandoline
|
||||
N_JOBS: 1
|
||||
PROTOCOL_N_PREVS: 1,
|
||||
PROTOCOL_REPEATS: 1,
|
||||
SAMPLE_SIZE: 1000,
|
||||
|
||||
confs: *main_confs
|
||||
|
||||
exec: *kde_lr_gs_conf
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
#!/bin/bash
|
||||
|
||||
CMD="cp"
|
||||
DEST="~/tesi_docker/"
|
||||
|
||||
bash -c "${CMD} -r quacc ${DEST}"
|
||||
bash -c "${CMD} -r baselines ${DEST}"
|
||||
bash -c "${CMD} run.py ${DEST}"
|
||||
bash -c "${CMD} remote.py ${DEST}"
|
||||
bash -c "${CMD} conf.yaml ${DEST}"
|
||||
bash -c "${CMD} requirements.txt ${DEST}"
|
|
@ -0,0 +1,8 @@
|
|||
#!/bin/bash
|
||||
|
||||
if [[ "${1}" == "r" ]]; then
|
||||
scp volpi@ilona.isti.cnr.it:~/tesi/quacc.log ~/tesi/remote.log &>/dev/null
|
||||
ssh volpi@ilona.isti.cnr.it tail -n 500 -f /home/volpi/tesi/quacc.log | bat -P --language=log
|
||||
else
|
||||
tail -n 500 -f /home/lorev/tesi/quacc.log | bat --paging=never --language log
|
||||
fi
|
File diff suppressed because it is too large
Load Diff
|
@ -13,6 +13,7 @@ jinja2 = "^3.1.2"
|
|||
pyyaml = "^6.0.1"
|
||||
logging = "^0.4.9.6"
|
||||
abstention = "^0.1.3.1"
|
||||
pytest = "^8.0.0"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
main = "quacc.main:main"
|
||||
|
@ -34,21 +35,20 @@ dash = "gunicorn qcdash.app:server -b ilona.isti.cnr.it:33421"
|
|||
shell = """
|
||||
scp {$HOST}:~/tesi/quacc.log ~/tesi/remote.log &> /dev/null
|
||||
ssh {$HOST} tail -n 0 -f /home/volpi/tesi/quacc.log >> ~/tesi/remote.log
|
||||
|
||||
"""
|
||||
|
||||
[tool.poe.tasks.logrf]
|
||||
shell = """
|
||||
scp {$HOST}:~/tesi/quacc.log ~/tesi/remote.log &> /dev/null
|
||||
ssh {$HOST} tail -n 500 -f /home/volpi/tesi/quacc.log | bat --paging=never --language log
|
||||
|
||||
ssh {$HOST} tail -n 500 -f /home/volpi/tesi/quacc.log | bat -P --language=log
|
||||
"""
|
||||
[tool.poe.tasks.logf]
|
||||
shell = """
|
||||
tail -n 500 -f /home/lorev/tesi/quacc.log | bat --paging=never --language log
|
||||
"""
|
||||
|
||||
interpreter = "fish"
|
||||
env = { HOST = "volpi@ilona.isti.cnr.it" }
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^7.4.0"
|
||||
pylance = "^0.5.9"
|
||||
pytest-mock = "^3.11.1"
|
||||
pytest-cov = "^4.1.0"
|
||||
|
|
|
@ -85,6 +85,8 @@ def get_table(dr: DatasetReport, metric, estimators, view, mode):
|
|||
case ("avg", "train_table"):
|
||||
# return dr.data(metric=metric, estimators=estimators).groupby(level=1).mean()
|
||||
return dr.train_table(metric=metric, estimators=estimators)
|
||||
case ("avg", "train_std_table"):
|
||||
return dr.train_std_table(metric=metric, estimators=estimators)
|
||||
case ("avg", "test_table"):
|
||||
# return dr.data(metric=metric, estimators=estimators).groupby(level=0).mean()
|
||||
return dr.test_table(metric=metric, estimators=estimators)
|
||||
|
@ -121,24 +123,44 @@ def get_DataTable(df, mode):
|
|||
|
||||
_index_name = dict(
|
||||
train_table="test prev.",
|
||||
train_std_table="train prev.",
|
||||
test_table="train prev.",
|
||||
shift_table="shift",
|
||||
stats_table="method",
|
||||
)
|
||||
df = df.reset_index()
|
||||
|
||||
if mode == "train_std_table":
|
||||
columns_format = Format()
|
||||
df_columns = np.concatenate([["index"], df.columns.unique(1)[1:]])
|
||||
data = [
|
||||
dict(
|
||||
index="(" + ", ".join([f"{v:.2f}" for v in idx]) + ")"
|
||||
if isinstance(idx, tuple | list | np.ndarray)
|
||||
else str(idx)
|
||||
)
|
||||
| {
|
||||
k: f"{df.loc[i,('avg',k)]:.4f}~{df.loc[i,('std',k)]:.3f}"
|
||||
for k in df.columns.unique(1)[1:]
|
||||
}
|
||||
for i, idx in zip(df.index, df.loc[:, ("index", "")])
|
||||
]
|
||||
else:
|
||||
columns_format = Format(precision=6, scheme=Scheme.exponent, nully="nan")
|
||||
df_columns = df.columns
|
||||
data = df.to_dict("records")
|
||||
|
||||
columns = {
|
||||
c: dict(
|
||||
id=c,
|
||||
name=_index_name[mode] if c == "index" else c,
|
||||
type="numeric",
|
||||
format=Format(precision=6, scheme=Scheme.exponent, nully="nan"),
|
||||
format=columns_format,
|
||||
)
|
||||
for c in df.columns
|
||||
for c in df_columns
|
||||
}
|
||||
# columns["index"]["format"] = Format(precision=2, scheme=Scheme.fixed)
|
||||
columns["index"]["format"] = Format()
|
||||
columns = list(columns.values())
|
||||
data = df.to_dict("records")
|
||||
for d in data:
|
||||
if isinstance(d["index"], tuple | list | np.ndarray):
|
||||
d["index"] = "(" + ", ".join([f"{v:.2f}" for v in d["index"]]) + ")"
|
||||
|
|
|
@ -320,25 +320,59 @@ def rcv1_info():
|
|||
n_train = 23149
|
||||
|
||||
targets = []
|
||||
for target in range(103):
|
||||
train_t_prev = np.average(dataset.target[:n_train, target].toarray().flatten())
|
||||
test_t_prev = np.average(dataset.target[n_train:, target].toarray().flatten())
|
||||
for target in ["CCAT", "MCAT", "GCAT"]:
|
||||
target_index = np.where(dataset.target_names == target)[0]
|
||||
train_t_prev = np.average(
|
||||
dataset.target[:n_train, target_index].toarray().flatten()
|
||||
)
|
||||
test_t_prev = np.average(
|
||||
dataset.target[n_train:, target_index].toarray().flatten()
|
||||
)
|
||||
d = Dataset(name="rcv1", target=target)()[0]
|
||||
targets.append(
|
||||
(
|
||||
dataset.target_names[target],
|
||||
target,
|
||||
{
|
||||
"train": (1.0 - train_t_prev, train_t_prev),
|
||||
"test": (1.0 - test_t_prev, test_t_prev),
|
||||
"train_size": len(d.train),
|
||||
"val_size": len(d.validation),
|
||||
"test_size": len(d.test),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
targets.sort(key=lambda t: t[1]["train"][1])
|
||||
for n, d in targets:
|
||||
print(f"{n}:")
|
||||
for k, (fp, tp) in d.items():
|
||||
print(f"\t{k}: {fp:.4f}, {tp:.4f}")
|
||||
for k, v in d.items():
|
||||
if isinstance(v, tuple):
|
||||
print(f"\t{k}: {v[0]:.4f}, {v[1]:.4f}")
|
||||
else:
|
||||
print(f"\t{k}: {v}")
|
||||
|
||||
|
||||
def imdb_info():
|
||||
train, test = qp.datasets.fetch_reviews("imdb", tfidf=True, min_df=3).train_test
|
||||
|
||||
train_t_prev = train.prevalence()
|
||||
test_t_prev = test.prevalence()
|
||||
dst = Dataset(name="imdb")()[0]
|
||||
d = {
|
||||
"train": (train_t_prev[0], train_t_prev[1]),
|
||||
"test": (test_t_prev[0], test_t_prev[1]),
|
||||
"train_size": len(dst.train),
|
||||
"val_size": len(dst.validation),
|
||||
"test_size": len(dst.test),
|
||||
}
|
||||
|
||||
print("imdb:")
|
||||
for k, v in d.items():
|
||||
if isinstance(v, tuple):
|
||||
print(f"\t{k}: {v[0]:.4f}, {v[1]:.4f}")
|
||||
else:
|
||||
print(f"\t{k}: {v}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fetch_cifar100()
|
||||
rcv1_info()
|
||||
imdb_info()
|
||||
|
|
|
@ -0,0 +1,115 @@
|
|||
from functools import wraps
|
||||
|
||||
import numpy as np
|
||||
import quapy.functional as F
|
||||
import sklearn.metrics as metrics
|
||||
from quapy.method.aggregative import ACC, EMQ
|
||||
from sklearn import clone
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import quacc as qc
|
||||
from quacc.evaluation.report import EvaluationReport
|
||||
|
||||
_alts = {}
|
||||
|
||||
|
||||
def alt(func):
|
||||
@wraps(func)
|
||||
def wrapper(c_model, validation, protocol):
|
||||
return func(c_model, validation, protocol)
|
||||
|
||||
wrapper.name = func.__name__
|
||||
_alts[func.__name__] = wrapper
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@alt
|
||||
def cross(c_model, validation, protocol):
|
||||
y_val = validation.labels
|
||||
y_hat_val = c_model.predict(validation.instances)
|
||||
|
||||
qcls = clone(c_model)
|
||||
qcls.fit(*validation.Xy)
|
||||
|
||||
er = EvaluationReport(name="cross")
|
||||
for sample in protocol():
|
||||
y_hat = c_model.predict(sample.instances)
|
||||
y = sample.labels
|
||||
ground_acc = (y_hat == y).mean()
|
||||
ground_f1 = metrics.f1_score(y, y_hat, zero_division=0)
|
||||
|
||||
q = EMQ(qcls)
|
||||
q.fit(validation, fit_classifier=False)
|
||||
|
||||
M_hat = ACC.getPteCondEstim(validation.classes_, y_val, y_hat_val)
|
||||
p_hat = q.quantify(sample.instances)
|
||||
cont_table_hat = p_hat * M_hat
|
||||
|
||||
acc_score = qc.error.acc(cont_table_hat)
|
||||
f1_score = qc.error.f1(cont_table_hat)
|
||||
|
||||
meta_acc = abs(acc_score - ground_acc)
|
||||
meta_f1 = abs(f1_score - ground_f1)
|
||||
er.append_row(
|
||||
sample.prevalence(),
|
||||
acc=meta_acc,
|
||||
f1=meta_f1,
|
||||
acc_score=acc_score,
|
||||
f1_score=f1_score,
|
||||
)
|
||||
|
||||
return er
|
||||
|
||||
|
||||
@alt
|
||||
def cross2(c_model, validation, protocol):
|
||||
classes = validation.classes_
|
||||
y_val = validation.labels
|
||||
y_hat_val = c_model.predict(validation.instances)
|
||||
M_hat = ACC.getPteCondEstim(classes, y_val, y_hat_val)
|
||||
pos_prev_val = validation.prevalence()[1]
|
||||
|
||||
er = EvaluationReport(name="cross2")
|
||||
for sample in protocol():
|
||||
y_test = sample.labels
|
||||
y_hat_test = c_model.predict(sample.instances)
|
||||
ground_acc = (y_hat_test == y_test).mean()
|
||||
ground_f1 = metrics.f1_score(y_test, y_hat_test, zero_division=0)
|
||||
pos_prev_cc = F.prevalence_from_labels(y_hat_test, classes)[1]
|
||||
tpr_hat = M_hat[1, 1]
|
||||
fpr_hat = M_hat[1, 0]
|
||||
tnr_hat = M_hat[0, 0]
|
||||
pos_prev_test_hat = (pos_prev_cc - fpr_hat) / (tpr_hat - fpr_hat)
|
||||
pos_prev_test_hat = np.clip(pos_prev_test_hat, 0, 1)
|
||||
|
||||
if pos_prev_val > 0.5:
|
||||
# in this case, the tpr might be a more reliable estimate than tnr
|
||||
A = np.asarray(
|
||||
[[0, 0, 1, 1], [0, 1, 0, 1], [1, 1, 1, 1], [0, tpr_hat, 0, tpr_hat - 1]]
|
||||
)
|
||||
else:
|
||||
# in this case, the tnr might be a more reliable estimate than tpr
|
||||
A = np.asarray(
|
||||
[[0, 0, 1, 1], [0, 1, 0, 1], [1, 1, 1, 1], [tnr_hat - 1, 0, tnr_hat, 0]]
|
||||
)
|
||||
|
||||
b = np.asarray([pos_prev_cc, pos_prev_test_hat, 1, 0])
|
||||
|
||||
tn, fn, fp, tp = np.linalg.solve(A, b)
|
||||
cont_table_hat = np.array([[tn, fp], [fn, tp]])
|
||||
|
||||
acc_score = qc.error.acc(cont_table_hat)
|
||||
f1_score = qc.error.f1(cont_table_hat)
|
||||
|
||||
meta_acc = abs(acc_score - ground_acc)
|
||||
meta_f1 = abs(f1_score - ground_f1)
|
||||
er.append_row(
|
||||
sample.prevalence(),
|
||||
acc=meta_acc,
|
||||
f1=meta_f1,
|
||||
acc_score=acc_score,
|
||||
f1_score=f1_score,
|
||||
)
|
||||
|
||||
return er
|
|
@ -288,21 +288,76 @@ def rca(
|
|||
):
|
||||
"""elsahar19"""
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
val_pred1 = c_model_predict(validation.X)
|
||||
f1_average = "binary" if validation.n_classes == 2 else "macro"
|
||||
val1, val2 = validation.split_stratified(train_prop=0.5, random_state=env._R_SEED)
|
||||
val1_pred1 = c_model_predict(val1.X)
|
||||
|
||||
val2_protocol = APP(
|
||||
val2,
|
||||
n_prevalences=21,
|
||||
repeats=100,
|
||||
return_type="labelled_collection",
|
||||
)
|
||||
val2_prot_preds = []
|
||||
val2_rca = []
|
||||
val2_prot_preds = []
|
||||
val2_prot_y = []
|
||||
for v2 in val2_protocol():
|
||||
_preds = c_model_predict(v2.X)
|
||||
try:
|
||||
c_model2 = clone_fit(c_model, v2.X, _preds)
|
||||
c_model2_predict = getattr(c_model2, predict_method)
|
||||
val1_pred2 = c_model2_predict(val1.X)
|
||||
rca_score = 1.0 - rcalib.get_score(val1_pred1, val1_pred2, val1.y)
|
||||
val2_rca.append(rca_score)
|
||||
val2_prot_preds.append(_preds)
|
||||
val2_prot_y.append(v2.y)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
val_targets_acc = np.array(
|
||||
[
|
||||
metrics.accuracy_score(v2_y, v2_preds)
|
||||
for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds)
|
||||
]
|
||||
)
|
||||
reg_acc = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_acc)
|
||||
val_targets_f1 = np.array(
|
||||
[
|
||||
metrics.f1_score(v2_y, v2_preds, average=f1_average)
|
||||
for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds)
|
||||
]
|
||||
)
|
||||
reg_f1 = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_f1)
|
||||
|
||||
report = EvaluationReport(name="rca")
|
||||
for test in protocol():
|
||||
try:
|
||||
test_pred = c_model_predict(test.X)
|
||||
c_model2 = clone_fit(c_model, test.X, test_pred)
|
||||
test_preds = c_model_predict(test.X)
|
||||
c_model2 = clone_fit(c_model, test.X, test_preds)
|
||||
c_model2_predict = getattr(c_model2, predict_method)
|
||||
val_pred2 = c_model2_predict(validation.X)
|
||||
rca_score = 1.0 - rcalib.get_score(val_pred1, val_pred2, validation.y)
|
||||
meta_score = abs(rca_score - metrics.accuracy_score(test.y, test_pred))
|
||||
report.append_row(test.prevalence(), acc=meta_score, acc_score=rca_score)
|
||||
val1_pred2 = c_model2_predict(val1.X)
|
||||
rca_score = 1.0 - rcalib.get_score(val1_pred1, val1_pred2, val1.y)
|
||||
acc_score = reg_acc.predict(np.array([[rca_score]]))[0]
|
||||
f1_score = reg_f1.predict(np.array([[rca_score]]))[0]
|
||||
meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_preds))
|
||||
meta_f1 = abs(
|
||||
f1_score - metrics.f1_score(test.y, test_preds, average=f1_average)
|
||||
)
|
||||
report.append_row(
|
||||
test.prevalence(),
|
||||
acc=meta_acc,
|
||||
acc_score=acc_score,
|
||||
f1=meta_f1,
|
||||
f1_score=f1_score,
|
||||
)
|
||||
except ValueError:
|
||||
report.append_row(
|
||||
test.prevalence(), acc=float("nan"), acc_score=float("nan")
|
||||
test.prevalence(),
|
||||
acc=np.nan,
|
||||
acc_score=np.nan,
|
||||
f1=np.nan,
|
||||
f1_score=np.nan,
|
||||
)
|
||||
|
||||
return report
|
||||
|
@ -317,13 +372,56 @@ def rca_star(
|
|||
):
|
||||
"""elsahar19"""
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
validation1, validation2 = validation.split_stratified(
|
||||
f1_average = "binary" if validation.n_classes == 2 else "macro"
|
||||
validation1, val2 = validation.split_stratified(
|
||||
train_prop=0.5, random_state=env._R_SEED
|
||||
)
|
||||
val1_pred = c_model_predict(validation1.X)
|
||||
c_model1 = clone_fit(c_model, validation1.X, val1_pred)
|
||||
val11, val12 = validation1.split_stratified(
|
||||
train_prop=0.5, random_state=env._R_SEED
|
||||
)
|
||||
|
||||
val11_pred = c_model_predict(val11.X)
|
||||
c_model1 = clone_fit(c_model, val11.X, val11_pred)
|
||||
c_model1_predict = getattr(c_model1, predict_method)
|
||||
val2_pred1 = c_model1_predict(validation2.X)
|
||||
val12_pred1 = c_model1_predict(val12.X)
|
||||
|
||||
val2_protocol = APP(
|
||||
val2,
|
||||
n_prevalences=21,
|
||||
repeats=100,
|
||||
return_type="labelled_collection",
|
||||
)
|
||||
val2_prot_preds = []
|
||||
val2_rca = []
|
||||
val2_prot_preds = []
|
||||
val2_prot_y = []
|
||||
for v2 in val2_protocol():
|
||||
_preds = c_model_predict(v2.X)
|
||||
try:
|
||||
c_model2 = clone_fit(c_model, v2.X, _preds)
|
||||
c_model2_predict = getattr(c_model2, predict_method)
|
||||
val12_pred2 = c_model2_predict(val12.X)
|
||||
rca_score = 1.0 - rcalib.get_score(val12_pred1, val12_pred2, val12.y)
|
||||
val2_rca.append(rca_score)
|
||||
val2_prot_preds.append(_preds)
|
||||
val2_prot_y.append(v2.y)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
val_targets_acc = np.array(
|
||||
[
|
||||
metrics.accuracy_score(v2_y, v2_preds)
|
||||
for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds)
|
||||
]
|
||||
)
|
||||
reg_acc = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_acc)
|
||||
val_targets_f1 = np.array(
|
||||
[
|
||||
metrics.f1_score(v2_y, v2_preds, average=f1_average)
|
||||
for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds)
|
||||
]
|
||||
)
|
||||
reg_f1 = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_f1)
|
||||
|
||||
report = EvaluationReport(name="rca_star")
|
||||
for test in protocol():
|
||||
|
@ -331,17 +429,28 @@ def rca_star(
|
|||
test_pred = c_model_predict(test.X)
|
||||
c_model2 = clone_fit(c_model, test.X, test_pred)
|
||||
c_model2_predict = getattr(c_model2, predict_method)
|
||||
val2_pred2 = c_model2_predict(validation2.X)
|
||||
rca_star_score = 1.0 - rcalib.get_score(
|
||||
val2_pred1, val2_pred2, validation2.y
|
||||
val12_pred2 = c_model2_predict(val12.X)
|
||||
rca_star_score = 1.0 - rcalib.get_score(val12_pred1, val12_pred2, val12.y)
|
||||
acc_score = reg_acc.predict(np.array([[rca_star_score]]))[0]
|
||||
f1_score = reg_f1.predict(np.array([[rca_score]]))[0]
|
||||
meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_pred))
|
||||
meta_f1 = abs(
|
||||
f1_score - metrics.f1_score(test.y, test_pred, average=f1_average)
|
||||
)
|
||||
meta_score = abs(rca_star_score - metrics.accuracy_score(test.y, test_pred))
|
||||
report.append_row(
|
||||
test.prevalence(), acc=meta_score, acc_score=rca_star_score
|
||||
test.prevalence(),
|
||||
acc=meta_acc,
|
||||
acc_score=acc_score,
|
||||
f1=meta_f1,
|
||||
f1_score=f1_score,
|
||||
)
|
||||
except ValueError:
|
||||
report.append_row(
|
||||
test.prevalence(), acc=float("nan"), acc_score=float("nan")
|
||||
test.prevalence(),
|
||||
acc=np.nan,
|
||||
acc_score=np.nan,
|
||||
f1=np.nan,
|
||||
f1_score=np.nan,
|
||||
)
|
||||
|
||||
return report
|
||||
|
@ -447,3 +556,4 @@ def kdex2(
|
|||
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
|
||||
|
||||
return report
|
||||
|
||||
|
|
|
@ -57,6 +57,8 @@ def estimate_worker(_estimate, train, validation, test, q=None):
|
|||
def split_tasks(estimators, train, validation, test, q):
|
||||
_par, _seq = [], []
|
||||
for estim in estimators:
|
||||
if hasattr(estim, "nocall"):
|
||||
continue
|
||||
_task = [estim, train, validation, test]
|
||||
match estim.name:
|
||||
case n if n.endswith("_gs"):
|
||||
|
|
|
@ -2,7 +2,7 @@ from typing import List
|
|||
|
||||
import numpy as np
|
||||
|
||||
from quacc.evaluation import baseline, method
|
||||
from quacc.evaluation import baseline, method, alt
|
||||
|
||||
|
||||
class CompEstimatorFunc_:
|
||||
|
@ -40,7 +40,7 @@ class CompEstimatorName_:
|
|||
|
||||
class CompEstimator:
|
||||
def __get(cls, e: str | List[str], get_ref=True):
|
||||
_dict = method._methods | baseline._baselines
|
||||
_dict = alt._alts | method._methods | baseline._baselines
|
||||
|
||||
match e:
|
||||
case "__all":
|
||||
|
|
|
@ -26,7 +26,12 @@ def _param_grid(method, X_fit: np.ndarray):
|
|||
"q__classifier__C": np.logspace(-3, 3, 7),
|
||||
"q__classifier__class_weight": [None, "balanced"],
|
||||
"q__recalib": [None, "bcts"],
|
||||
"confidence": [None, ["isoft"], ["max_conf", "entropy"]],
|
||||
"confidence": [
|
||||
None,
|
||||
["isoft"],
|
||||
["max_conf", "entropy"],
|
||||
["max_conf", "entropy", "isoft"],
|
||||
],
|
||||
}
|
||||
case "sld_rbf":
|
||||
_scale = 1.0 / (X_fit.shape[1] * X_fit.var())
|
||||
|
@ -35,7 +40,12 @@ def _param_grid(method, X_fit: np.ndarray):
|
|||
"q__classifier__class_weight": [None, "balanced"],
|
||||
"q__classifier__gamma": _scale * np.logspace(-2, 2, 5),
|
||||
"q__recalib": [None, "bcts"],
|
||||
"confidence": [None, ["isoft"], ["max_conf", "entropy"]],
|
||||
"confidence": [
|
||||
None,
|
||||
["isoft"],
|
||||
["max_conf", "entropy"],
|
||||
["max_conf", "entropy", "isoft"],
|
||||
],
|
||||
}
|
||||
case "pacc":
|
||||
return {
|
||||
|
@ -48,7 +58,7 @@ def _param_grid(method, X_fit: np.ndarray):
|
|||
"q__classifier__C": np.logspace(-3, 3, 7),
|
||||
"q__classifier__class_weight": [None, "balanced"],
|
||||
"q__bandwidth": np.linspace(0.01, 0.2, 20),
|
||||
"confidence": [None, ["isoft"]],
|
||||
"confidence": [None, ["isoft"], ["max_conf", "entropy", "isoft"]],
|
||||
}
|
||||
case "kde_rbf":
|
||||
_scale = 1.0 / (X_fit.shape[1] * X_fit.var())
|
||||
|
@ -57,7 +67,7 @@ def _param_grid(method, X_fit: np.ndarray):
|
|||
"q__classifier__class_weight": [None, "balanced"],
|
||||
"q__classifier__gamma": _scale * np.logspace(-2, 2, 5),
|
||||
"q__bandwidth": np.linspace(0.01, 0.2, 20),
|
||||
"confidence": [None, ["isoft"]],
|
||||
"confidence": [None, ["isoft"], ["max_conf", "entropy", "isoft"]],
|
||||
}
|
||||
|
||||
|
||||
|
@ -96,6 +106,15 @@ def evaluation_report(
|
|||
return report
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EmptyMethod:
|
||||
name: str
|
||||
nocall: bool = True
|
||||
|
||||
def __call__(self, c_model, validation, protocol) -> EvaluationReport:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EvaluationMethod:
|
||||
name: str
|
||||
|
@ -162,13 +181,16 @@ class EvaluationMethodGridSearch(EvaluationMethod):
|
|||
verbose=False,
|
||||
**_search_params,
|
||||
).fit(v_train)
|
||||
return evaluation_report(
|
||||
er = evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
method_name=self.name,
|
||||
)
|
||||
er.fit_score = est.best_score()
|
||||
return er
|
||||
|
||||
|
||||
E = EmptyMethod
|
||||
M = EvaluationMethod
|
||||
G = EvaluationMethodGridSearch
|
||||
|
||||
|
@ -229,12 +251,19 @@ __sld_lr_set = [
|
|||
M("mul_sld_lr_is", __sld_lr(), "mul", conf="isoft", ),
|
||||
M("m3w_sld_lr_is", __sld_lr(), "mul", conf="isoft", cf=True),
|
||||
M("mgf_sld_lr_is", __sld_lr(), "mul", conf="isoft", gf=True),
|
||||
# sld all
|
||||
M("bin_sld_lr_a", __sld_lr(), "bin", conf=["max_conf", "entropy", "isoft"], ),
|
||||
M("bgf_sld_lr_a", __sld_lr(), "bin", conf=["max_conf", "entropy", "isoft"], gf=True),
|
||||
M("mul_sld_lr_a", __sld_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ),
|
||||
M("m3w_sld_lr_a", __sld_lr(), "mul", conf=["max_conf", "entropy", "isoft"], cf=True),
|
||||
M("mgf_sld_lr_a", __sld_lr(), "mul", conf=["max_conf", "entropy", "isoft"], gf=True),
|
||||
# gs sld
|
||||
G("bin_sld_lr_gs", __sld_lr(), "bin", pg="sld_lr" ),
|
||||
G("bgf_sld_lr_gs", __sld_lr(), "bin", pg="sld_lr", gf=True),
|
||||
G("mul_sld_lr_gs", __sld_lr(), "mul", pg="sld_lr" ),
|
||||
G("m3w_sld_lr_gs", __sld_lr(), "mul", pg="sld_lr", cf=True),
|
||||
G("mgf_sld_lr_gs", __sld_lr(), "mul", pg="sld_lr", gf=True),
|
||||
E("sld_lr_gs"),
|
||||
]
|
||||
|
||||
__dense_sld_lr_set = [
|
||||
|
@ -267,12 +296,18 @@ __dense_sld_lr_set = [
|
|||
M("d_mul_sld_lr_is", __sld_lr(), "mul", d=True, conf="isoft", ),
|
||||
M("d_m3w_sld_lr_is", __sld_lr(), "mul", d=True, conf="isoft", cf=True),
|
||||
M("d_mgf_sld_lr_is", __sld_lr(), "mul", d=True, conf="isoft", gf=True),
|
||||
# sld all
|
||||
M("d_bin_sld_lr_a", __sld_lr(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], ),
|
||||
M("d_bgf_sld_lr_a", __sld_lr(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], gf=True),
|
||||
M("d_mul_sld_lr_a", __sld_lr(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], ),
|
||||
M("d_m3w_sld_lr_a", __sld_lr(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], cf=True),
|
||||
M("d_mgf_sld_lr_a", __sld_lr(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], gf=True),
|
||||
# gs sld
|
||||
G("d_bin_sld_lr_gs", __sld_lr(), "bin", d=True, pg="sld_lr" ),
|
||||
G("d_bgf_sld_lr_gs", __sld_lr(), "bin", d=True, pg="sld_lr", gf=True),
|
||||
G("d_mul_sld_lr_gs", __sld_lr(), "mul", d=True, pg="sld_lr" ),
|
||||
G("d_m3w_sld_lr_gs", __sld_lr(), "mul", d=True, pg="sld_lr", cf=True),
|
||||
G("d_mgf_sld_lr_gs", __sld_lr(), "mul", d=True, pg="sld_lr", gf=True),
|
||||
G("d_bin_sld_lr_gs", __sld_lr(), "bin", d=True, pg="sld_lr" ),
|
||||
G("d_bgf_sld_lr_gs", __sld_lr(), "bin", d=True, pg="sld_lr", gf=True),
|
||||
G("d_mul_sld_lr_gs", __sld_lr(), "mul", d=True, pg="sld_lr" ),
|
||||
G("d_m3w_sld_lr_gs", __sld_lr(), "mul", d=True, pg="sld_lr", cf=True),
|
||||
G("d_mgf_sld_lr_gs", __sld_lr(), "mul", d=True, pg="sld_lr", gf=True),
|
||||
]
|
||||
|
||||
__dense_sld_rbf_set = [
|
||||
|
@ -305,6 +340,12 @@ __dense_sld_rbf_set = [
|
|||
M("d_mul_sld_rbf_is", __sld_rbf(), "mul", d=True, conf="isoft", ),
|
||||
M("d_m3w_sld_rbf_is", __sld_rbf(), "mul", d=True, conf="isoft", cf=True),
|
||||
M("d_mgf_sld_rbf_is", __sld_rbf(), "mul", d=True, conf="isoft", gf=True),
|
||||
# sld all
|
||||
M("d_bin_sld_rbf_a", __sld_rbf(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], ),
|
||||
M("d_bgf_sld_rbf_a", __sld_rbf(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], gf=True),
|
||||
M("d_mul_sld_rbf_a", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], ),
|
||||
M("d_m3w_sld_rbf_a", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], cf=True),
|
||||
M("d_mgf_sld_rbf_a", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], gf=True),
|
||||
# gs sld
|
||||
G("d_bin_sld_rbf_gs", __sld_rbf(), "bin", d=True, pg="sld_rbf", search="spider", ),
|
||||
G("d_bgf_sld_rbf_gs", __sld_rbf(), "bin", d=True, pg="sld_rbf", search="spider", gf=True),
|
||||
|
@ -334,10 +375,15 @@ __kde_lr_set = [
|
|||
M("bin_kde_lr_is", __kde_lr(), "bin", conf="isoft", ),
|
||||
M("mul_kde_lr_is", __kde_lr(), "mul", conf="isoft", ),
|
||||
M("m3w_kde_lr_is", __kde_lr(), "mul", conf="isoft", cf=True),
|
||||
# kde all
|
||||
M("bin_kde_lr_a", __kde_lr(), "bin", conf=["max_conf", "entropy", "isoft"], ),
|
||||
M("mul_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ),
|
||||
M("m3w_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], cf=True),
|
||||
# gs kde
|
||||
G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="spider" ),
|
||||
G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider" ),
|
||||
G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider", cf=True),
|
||||
G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="grid" ),
|
||||
G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid" ),
|
||||
G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid", cf=True),
|
||||
E("kde_lr_gs"),
|
||||
]
|
||||
|
||||
__dense_kde_lr_set = [
|
||||
|
@ -361,6 +407,10 @@ __dense_kde_lr_set = [
|
|||
M("d_bin_kde_lr_is", __kde_lr(), "bin", d=True, conf="isoft", ),
|
||||
M("d_mul_kde_lr_is", __kde_lr(), "mul", d=True, conf="isoft", ),
|
||||
M("d_m3w_kde_lr_is", __kde_lr(), "mul", d=True, conf="isoft", cf=True),
|
||||
# kde all
|
||||
M("d_bin_kde_lr_a", __kde_lr(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], ),
|
||||
M("d_mul_kde_lr_a", __kde_lr(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], ),
|
||||
M("d_m3w_kde_lr_a", __kde_lr(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], cf=True),
|
||||
# gs kde
|
||||
G("d_bin_kde_lr_gs", __kde_lr(), "bin", d=True, pg="kde_lr", search="spider" ),
|
||||
G("d_mul_kde_lr_gs", __kde_lr(), "mul", d=True, pg="kde_lr", search="spider" ),
|
||||
|
@ -388,6 +438,10 @@ __dense_kde_rbf_set = [
|
|||
M("d_bin_kde_rbf_is", __kde_rbf(), "bin", d=True, conf="isoft", ),
|
||||
M("d_mul_kde_rbf_is", __kde_rbf(), "mul", d=True, conf="isoft", ),
|
||||
M("d_m3w_kde_rbf_is", __kde_rbf(), "mul", d=True, conf="isoft", cf=True),
|
||||
# kde all
|
||||
M("d_bin_kde_rbf_a", __kde_rbf(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], ),
|
||||
M("d_mul_kde_rbf_a", __kde_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], ),
|
||||
M("d_m3w_kde_rbf_a", __kde_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], cf=True),
|
||||
# gs kde
|
||||
G("d_bin_kde_rbf_gs", __kde_rbf(), "bin", d=True, pg="kde_rbf", search="spider" ),
|
||||
G("d_mul_kde_rbf_gs", __kde_rbf(), "mul", d=True, pg="kde_rbf", search="spider" ),
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import json
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple
|
||||
|
||||
|
@ -39,6 +38,7 @@ class EvaluationReport:
|
|||
self.data: pd.DataFrame | None = None
|
||||
self.name = name if name is not None else "default"
|
||||
self.time = 0.0
|
||||
self.fit_score = None
|
||||
|
||||
def append_row(self, basep: np.ndarray | Tuple, **row):
|
||||
# bp = basep[1]
|
||||
|
@ -89,6 +89,7 @@ class CompReport:
|
|||
train_prev: np.ndarray = None,
|
||||
valid_prev: np.ndarray = None,
|
||||
times=None,
|
||||
fit_scores=None,
|
||||
g_time=None,
|
||||
):
|
||||
if isinstance(datas, pd.DataFrame):
|
||||
|
@ -105,6 +106,13 @@ class CompReport:
|
|||
.sort_index(axis=0, level=0, ascending=False, sort_remaining=False)
|
||||
)
|
||||
|
||||
if fit_scores is None:
|
||||
self.fit_scores = {
|
||||
er.name: er.fit_score for er in datas if er.fit_score is not None
|
||||
}
|
||||
else:
|
||||
self.fit_scores = fit_scores
|
||||
|
||||
if times is None:
|
||||
self.times = {er.name: er.time for er in datas}
|
||||
else:
|
||||
|
@ -114,6 +122,51 @@ class CompReport:
|
|||
self.train_prev = train_prev
|
||||
self.valid_prev = valid_prev
|
||||
|
||||
def postprocess(
|
||||
self,
|
||||
f_data: pd.DataFrame,
|
||||
_data: pd.DataFrame,
|
||||
metric=None,
|
||||
estimators=None,
|
||||
) -> pd.DataFrame:
|
||||
_mapping = {
|
||||
"sld_lr_gs": [
|
||||
"bin_sld_lr_gs",
|
||||
"mul_sld_lr_gs",
|
||||
"m3w_sld_lr_gs",
|
||||
],
|
||||
"kde_lr_gs": [
|
||||
"bin_kde_lr_gs",
|
||||
"mul_kde_lr_gs",
|
||||
"m3w_kde_lr_gs",
|
||||
],
|
||||
}
|
||||
|
||||
for name, methods in _mapping.items():
|
||||
if estimators is not None and name not in estimators:
|
||||
continue
|
||||
|
||||
if len(np.where(np.in1d(methods, self._data.columns.unique(1)))[0]) != len(
|
||||
methods
|
||||
):
|
||||
continue
|
||||
|
||||
_metric = _get_metric(metric)
|
||||
m_data = _data.loc[:, (_metric, methods)]
|
||||
_fit_scores = [(k, v) for (k, v) in self.fit_scores.items() if k in methods]
|
||||
_best_method = [k for k, v in _fit_scores][
|
||||
np.argmin([v for k, v in _fit_scores])
|
||||
]
|
||||
_metric = (
|
||||
[_metric]
|
||||
if _metric is isinstance(_metric, str)
|
||||
else m_data.columns.unique(0)
|
||||
)
|
||||
for _m in _metric:
|
||||
f_data.loc[:, (_m, name)] = m_data.loc[:, (_m, _best_method)]
|
||||
|
||||
return f_data
|
||||
|
||||
@property
|
||||
def prevs(self) -> np.ndarray:
|
||||
return self.data().index.unique(0)
|
||||
|
@ -149,6 +202,7 @@ class CompReport:
|
|||
train_prev=self.train_prev,
|
||||
valid_prev=self.valid_prev,
|
||||
times=self.times | other.times,
|
||||
fit_scores=self.fit_scores | other.fit_scores,
|
||||
g_time=self.times["tot"] + other.times["tot"],
|
||||
)
|
||||
|
||||
|
@ -159,7 +213,10 @@ class CompReport:
|
|||
_estimators = _get_estimators(
|
||||
estimators, self._data.loc[:, (_metric, slice(None))].columns.unique(1)
|
||||
)
|
||||
f_data: pd.DataFrame = self._data.copy().loc[:, (_metric, _estimators)]
|
||||
_data: pd.DataFrame = self._data.copy()
|
||||
f_data: pd.DataFrame = _data.loc[:, (_metric, _estimators)]
|
||||
|
||||
f_data = self.postprocess(f_data, _data, metric=metric, estimators=estimators)
|
||||
|
||||
if len(f_data.columns.unique(0)) == 1:
|
||||
f_data = f_data.droplevel(level=0, axis=1)
|
||||
|
@ -187,7 +244,11 @@ class CompReport:
|
|||
_estimators = _get_estimators(
|
||||
estimators, shift_data.loc[:, (_metric, slice(None))].columns.unique(1)
|
||||
)
|
||||
s_data: pd.DataFrame = shift_data
|
||||
shift_data: pd.DataFrame = shift_data.loc[:, (_metric, _estimators)]
|
||||
shift_data = self.postprocess(
|
||||
shift_data, s_data, metric=metric, estimators=estimators
|
||||
)
|
||||
|
||||
if len(shift_data.columns.unique(0)) == 1:
|
||||
shift_data = shift_data.droplevel(level=0, axis=1)
|
||||
|
@ -354,17 +415,27 @@ class CompReport:
|
|||
return res
|
||||
|
||||
|
||||
def _cr_train_prev(cr: CompReport):
|
||||
return tuple(np.around(cr.train_prev, decimals=2))
|
||||
|
||||
|
||||
def _cr_data(cr: CompReport, metric=None, estimators=None):
|
||||
return cr.data(metric, estimators)
|
||||
|
||||
|
||||
class DatasetReport:
|
||||
_default_dr_modes = [
|
||||
"delta_train",
|
||||
"stdev_train",
|
||||
"train_table",
|
||||
"train_std_table",
|
||||
"shift",
|
||||
"shift_table",
|
||||
"delta_test",
|
||||
"stdev_test",
|
||||
"test_table",
|
||||
"stats_table",
|
||||
"fit_scores",
|
||||
]
|
||||
_default_cr_modes = CompReport._default_modes
|
||||
|
||||
|
@ -380,15 +451,62 @@ class DatasetReport:
|
|||
|
||||
return DatasetReport(self.name, _crs)
|
||||
|
||||
def fit_scores(self, metric: str = None, estimators: List[str] = None):
|
||||
def _get_sort_idx(arr):
|
||||
return np.array([np.searchsorted(np.sort(a), a) + 1 for a in arr])
|
||||
|
||||
def _get_best_idx(arr):
|
||||
return np.argmin(arr, axis=1)
|
||||
|
||||
def _fdata_idx(idx) -> np.ndarray:
|
||||
return _fdata.loc[(idx, slice(None), slice(None)), :].to_numpy()
|
||||
|
||||
_crs_train = [_cr_train_prev(cr) for cr in self.crs]
|
||||
|
||||
for cr in self.crs:
|
||||
if not hasattr(cr, "fit_scores"):
|
||||
return None
|
||||
|
||||
_crs_fit_scores = [cr.fit_scores for cr in self.crs]
|
||||
|
||||
_fit_scores = pd.DataFrame(_crs_fit_scores, index=_crs_train)
|
||||
_fit_scores = _fit_scores.sort_index(axis=0, ascending=False)
|
||||
|
||||
_estimators = _get_estimators(estimators, _fit_scores.columns)
|
||||
if _estimators.shape[0] == 0:
|
||||
return None
|
||||
|
||||
_fdata = self.data(metric=metric, estimators=_estimators)
|
||||
|
||||
# ensure that columns in _fit_scores have the same ordering of _fdata
|
||||
_fit_scores = _fit_scores.loc[:, _fdata.columns]
|
||||
|
||||
_best_fit_estimators = _get_best_idx(_fit_scores.to_numpy())
|
||||
|
||||
# scores = np.array(
|
||||
# [
|
||||
# _get_sort_idx(
|
||||
# _fdata.loc[(idx, slice(None), slice(None)), :].to_numpy()
|
||||
# )[:, cl].mean()
|
||||
# for idx, cl in zip(_fit_scores.index, _best_fit_estimators)
|
||||
# ]
|
||||
# )
|
||||
# for idx, cl in zip(_fit_scores.index, _best_fit_estimators):
|
||||
# print(_fdata_idx(idx)[:, cl])
|
||||
# print(_fdata_idx(idx).min(axis=1), end="\n\n")
|
||||
|
||||
scores = np.array(
|
||||
[
|
||||
np.abs(_fdata_idx(idx)[:, cl] - _fdata_idx(idx).min(axis=1)).mean()
|
||||
for idx, cl in zip(_fit_scores.index, _best_fit_estimators)
|
||||
]
|
||||
)
|
||||
|
||||
return scores
|
||||
|
||||
def data(self, metric: str = None, estimators: List[str] = None) -> pd.DataFrame:
|
||||
def _cr_train_prev(cr: CompReport):
|
||||
return tuple(np.around(cr.train_prev, decimals=2))
|
||||
|
||||
def _cr_data(cr: CompReport):
|
||||
return cr.data(metric, estimators)
|
||||
|
||||
_crs_sorted = sorted(
|
||||
[(_cr_train_prev(cr), _cr_data(cr)) for cr in self.crs],
|
||||
[(_cr_train_prev(cr), _cr_data(cr, metric, estimators)) for cr in self.crs],
|
||||
key=lambda cr: len(cr[1].columns),
|
||||
reverse=True,
|
||||
)
|
||||
|
@ -460,6 +578,15 @@ class DatasetReport:
|
|||
avg_p.loc["mean", :] = f_data.mean()
|
||||
return avg_p
|
||||
|
||||
def train_std_table(self, metric: str = None, estimators: List[str] = None):
|
||||
f_data = self.data(metric=metric, estimators=estimators)
|
||||
avg_p = f_data.groupby(level=1, sort=False).mean()
|
||||
avg_p.loc["mean", :] = f_data.mean()
|
||||
avg_s = f_data.groupby(level=1, sort=False).std()
|
||||
avg_s.loc["mean", :] = f_data.std()
|
||||
avg_r = pd.concat([avg_p, avg_s], axis=1, keys=["avg", "std"])
|
||||
return avg_r
|
||||
|
||||
def test_table(
|
||||
self, metric: str = None, estimators: List[str] = None
|
||||
) -> pd.DataFrame:
|
||||
|
@ -591,6 +718,20 @@ class DatasetReport:
|
|||
base_path=base_path,
|
||||
backend=backend,
|
||||
)
|
||||
elif mode == "fit_scores":
|
||||
_fit_scores = self.fit_scores(metric, estimators) if data is None else data
|
||||
if _fit_scores is None:
|
||||
return None
|
||||
train_prevs = self.data(metric, estimators).index.unique(0)
|
||||
return plot.plot_fit_scores(
|
||||
train_prevs=train_prevs,
|
||||
scores=_fit_scores,
|
||||
metric=metric,
|
||||
name=conf,
|
||||
save_fig=save_fig,
|
||||
base_path=base_path,
|
||||
backend=backend,
|
||||
)
|
||||
|
||||
def to_md(
|
||||
self,
|
||||
|
|
|
@ -42,7 +42,7 @@ class BaseAccuracyEstimator(BaseQuantifier):
|
|||
pred_proba = self.classifier.predict_proba(coll.X)
|
||||
|
||||
return ExtendedCollection.from_lc(
|
||||
coll, pred_proba=pred_proba, extpol=self.extpol
|
||||
coll, pred_proba=pred_proba, ext=pred_proba, extpol=self.extpol
|
||||
)
|
||||
|
||||
def _extend_instances(self, instances: np.ndarray | sp.csr_matrix):
|
||||
|
|
|
@ -63,6 +63,13 @@ class Threshold(ConfidenceMetric):
|
|||
_exp = scores - self.threshold
|
||||
return _exp
|
||||
|
||||
# def conf(self, X, probas):
|
||||
# scores = self.get_scores(probas)
|
||||
# _exp = np.where(
|
||||
# scores >= self.threshold, np.ones(scores.shape), np.zeros(scores.shape)
|
||||
# )
|
||||
# return _exp[:, np.newaxis]
|
||||
|
||||
|
||||
@metric("linreg")
|
||||
class LinReg(ConfidenceMetric):
|
||||
|
|
|
@ -242,6 +242,11 @@ class GridSearchAE(BaseAccuracyEstimator):
|
|||
return self.best_model_
|
||||
raise ValueError("best_model called before fit")
|
||||
|
||||
def best_score(self):
|
||||
if hasattr(self, "best_score_"):
|
||||
return self.best_score_
|
||||
raise ValueError("best_score called before fit")
|
||||
|
||||
|
||||
class RandomizedSearchAE(GridSearchAE):
|
||||
ERR_THRESHOLD = 1e-4
|
||||
|
@ -473,3 +478,4 @@ class SpiderSearchAE(GridSearchAE):
|
|||
score += 1
|
||||
|
||||
return score
|
||||
|
||||
|
|
|
@ -1 +1,7 @@
|
|||
from quacc.plot.plot import get_backend, plot_delta, plot_diagonal, plot_shift
|
||||
from quacc.plot.plot import (
|
||||
get_backend,
|
||||
plot_delta,
|
||||
plot_diagonal,
|
||||
plot_shift,
|
||||
plot_fit_scores,
|
||||
)
|
||||
|
|
|
@ -52,3 +52,16 @@ class BasePlot:
|
|||
legend=True,
|
||||
):
|
||||
...
|
||||
|
||||
@classmethod
|
||||
def plot_fit_scores(
|
||||
train_prevs,
|
||||
scores,
|
||||
*,
|
||||
pos_class=1,
|
||||
title="default",
|
||||
x_label="prev.",
|
||||
y_label="position",
|
||||
legend=True,
|
||||
):
|
||||
...
|
||||
|
|
|
@ -142,3 +142,37 @@ def plot_shift(
|
|||
return fig, output_path
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def plot_fit_scores(
|
||||
train_prevs,
|
||||
scores,
|
||||
*,
|
||||
pos_class=1,
|
||||
metric="acc",
|
||||
name="default",
|
||||
legend=True,
|
||||
save_fig=False,
|
||||
base_path=None,
|
||||
backend=None,
|
||||
):
|
||||
backend = __backend if backend is None else backend
|
||||
title = f"fit_scores_{name}_avg_{metric}"
|
||||
|
||||
x_label = "train prev."
|
||||
y_label = "position"
|
||||
fig = backend.plot_fit_scores(
|
||||
train_prevs,
|
||||
scores,
|
||||
pos_class=pos_class,
|
||||
title=title,
|
||||
x_label=x_label,
|
||||
y_label=y_label,
|
||||
legend=legend,
|
||||
)
|
||||
|
||||
if save_fig:
|
||||
output_path = backend.save_fig(fig, base_path, title)
|
||||
return fig, output_path
|
||||
|
||||
return fig
|
||||
|
|
|
@ -8,10 +8,38 @@ import plotly.graph_objects as go
|
|||
from quacc.plot.base import BasePlot
|
||||
|
||||
|
||||
class PlotCfg:
|
||||
def __init__(self, mode, lwidth, font=None, legend=None, template="seaborn"):
|
||||
self.mode = mode
|
||||
self.lwidth = lwidth
|
||||
self.legend = {} if legend is None else legend
|
||||
self.font = {} if font is None else font
|
||||
self.template = template
|
||||
|
||||
|
||||
web_cfg = PlotCfg("lines+markers", 2)
|
||||
png_cfg = PlotCfg(
|
||||
"lines",
|
||||
5,
|
||||
legend=dict(
|
||||
orientation="h",
|
||||
yanchor="bottom",
|
||||
xanchor="right",
|
||||
y=1.02,
|
||||
x=1,
|
||||
font=dict(size=24),
|
||||
),
|
||||
font=dict(size=24),
|
||||
# template="ggplot2",
|
||||
)
|
||||
|
||||
_cfg = png_cfg
|
||||
|
||||
|
||||
class PlotlyPlot(BasePlot):
|
||||
__themes = defaultdict(
|
||||
lambda: {
|
||||
"template": "seaborn",
|
||||
"template": _cfg.template,
|
||||
}
|
||||
)
|
||||
__themes = __themes | {
|
||||
|
@ -35,7 +63,7 @@ class PlotlyPlot(BasePlot):
|
|||
case v if v > 10:
|
||||
__colors = plotly.colors.qualitative.Light24
|
||||
case _:
|
||||
__colors = plotly.colors.qualitative.Plotly
|
||||
__colors = plotly.colors.qualitative.G10
|
||||
|
||||
def __generator(cs):
|
||||
while True:
|
||||
|
@ -50,9 +78,8 @@ class PlotlyPlot(BasePlot):
|
|||
xaxis_title=x_label,
|
||||
yaxis_title=y_label,
|
||||
template=self.theme["template"],
|
||||
font=dict(
|
||||
size=18,
|
||||
),
|
||||
font=_cfg.font,
|
||||
legend=_cfg.legend,
|
||||
)
|
||||
|
||||
def save_fig(self, fig, base_path, title) -> Path:
|
||||
|
@ -82,9 +109,9 @@ class PlotlyPlot(BasePlot):
|
|||
go.Scatter(
|
||||
x=x,
|
||||
y=delta,
|
||||
mode="lines+markers",
|
||||
mode=_cfg.mode,
|
||||
name=name,
|
||||
line=dict(color=self.hex_to_rgb(color)),
|
||||
line=dict(color=self.hex_to_rgb(color), width=_cfg.lwidth),
|
||||
hovertemplate="prev.: %{x}<br>error: %{y:,.4f}",
|
||||
)
|
||||
]
|
||||
|
@ -193,9 +220,9 @@ class PlotlyPlot(BasePlot):
|
|||
x=x,
|
||||
y=delta,
|
||||
customdata=np.stack((counts[col_idx],), axis=-1),
|
||||
mode="lines+markers",
|
||||
mode=_cfg.mode,
|
||||
name=name,
|
||||
line=dict(color=self.hex_to_rgb(color)),
|
||||
line=dict(color=self.hex_to_rgb(color), width=_cfg.lwidth),
|
||||
hovertemplate="shift: %{x}<br>error: %{y}"
|
||||
+ "<br>count: %{customdata[0]}"
|
||||
if counts is not None
|
||||
|
@ -205,3 +232,29 @@ class PlotlyPlot(BasePlot):
|
|||
|
||||
self.update_layout(fig, title, x_label, y_label)
|
||||
return fig
|
||||
|
||||
def plot_fit_scores(
|
||||
self,
|
||||
train_prevs,
|
||||
scores,
|
||||
*,
|
||||
pos_class=1,
|
||||
title="default",
|
||||
x_label="prev.",
|
||||
y_label="position",
|
||||
legend=True,
|
||||
) -> go.Figure:
|
||||
fig = go.Figure()
|
||||
# x = train_prevs
|
||||
x = [str(tuple(bp)) for bp in train_prevs]
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=x,
|
||||
y=scores,
|
||||
mode="lines+markers",
|
||||
showlegend=False,
|
||||
),
|
||||
)
|
||||
|
||||
self.update_layout(fig, title, x_label, y_label)
|
||||
return fig
|
||||
|
|
6775
remote.log
6775
remote.log
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,116 @@
|
|||
abstention==0.1.3.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
ansi2html==1.9.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
appnope==0.1.3 ; python_version >= "3.10" and python_version < "4.0" and platform_system == "Darwin"
|
||||
asttokens==2.4.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
bcrypt==4.1.2 ; python_version >= "3.10" and python_version < "4.0"
|
||||
bleach==6.1.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
blinker==1.7.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
bokeh==3.3.4 ; python_version >= "3.10" and python_version < "4.0"
|
||||
certifi==2023.11.17 ; python_version >= "3.10" and python_version < "4.0"
|
||||
cffi==1.16.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
charset-normalizer==3.3.2 ; python_version >= "3.10" and python_version < "4.0"
|
||||
click==8.1.7 ; python_version >= "3.10" and python_version < "4.0"
|
||||
colorama==0.4.6 ; python_version >= "3.10" and python_version < "4" and sys_platform == "win32" or python_version >= "3.10" and python_version < "4" and platform_system == "Windows"
|
||||
comm==0.2.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
contourpy==1.2.0 ; python_version >= "3.10" and python_version < "4"
|
||||
coverage[toml]==7.4.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
cryptography==42.0.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
cycler==0.12.1 ; python_version >= "3.10" and python_version < "4"
|
||||
dash-bootstrap-components==1.5.0 ; python_version >= "3.10" and python_version < "4"
|
||||
dash-core-components==2.0.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
dash-html-components==2.0.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
dash-table==5.0.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
dash==2.14.2 ; python_version >= "3.10" and python_version < "4.0"
|
||||
debugpy==1.8.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
decorator==5.1.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
exceptiongroup==1.2.0 ; python_version >= "3.10" and python_version < "3.11"
|
||||
executing==2.0.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
flask==3.0.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
fonttools==4.47.2 ; python_version >= "3.10" and python_version < "4"
|
||||
gunicorn==21.2.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
idna==3.6 ; python_version >= "3.10" and python_version < "4.0"
|
||||
importlib-metadata==7.0.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
iniconfig==2.0.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
ipykernel==6.29.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
ipympl==0.9.3 ; python_version >= "3.10" and python_version < "4.0"
|
||||
ipython-genutils==0.2.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
ipython==8.20.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
ipywidgets-bokeh==1.5.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
ipywidgets==8.1.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
itsdangerous==2.1.2 ; python_version >= "3.10" and python_version < "4.0"
|
||||
jedi==0.19.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
jinja2==3.1.3 ; python_version >= "3.10" and python_version < "4.0"
|
||||
joblib==1.3.2 ; python_version >= "3.10" and python_version < "4"
|
||||
jupyter-client==8.6.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
jupyter-core==5.7.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
jupyterlab-widgets==3.0.9 ; python_version >= "3.10" and python_version < "4.0"
|
||||
kiwisolver==1.4.5 ; python_version >= "3.10" and python_version < "4"
|
||||
linkify-it-py==2.0.2 ; python_version >= "3.10" and python_version < "4.0"
|
||||
logging==0.4.9.6 ; python_version >= "3.10" and python_version < "4.0"
|
||||
markdown-it-py==3.0.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
markdown==3.5.2 ; python_version >= "3.10" and python_version < "4.0"
|
||||
markupsafe==2.1.4 ; python_version >= "3.10" and python_version < "4.0"
|
||||
matplotlib-inline==0.1.6 ; python_version >= "3.10" and python_version < "4.0"
|
||||
matplotlib==3.8.2 ; python_version >= "3.10" and python_version < "4"
|
||||
mdit-py-plugins==0.4.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
mdurl==0.1.2 ; python_version >= "3.10" and python_version < "4.0"
|
||||
nest-asyncio==1.6.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
numpy==1.26.3 ; python_version >= "3.10" and python_version < "4.0"
|
||||
packaging==23.2 ; python_version >= "3.10" and python_version < "4.0"
|
||||
pandas-stubs==2.1.4.231227 ; python_version >= "3.10" and python_version < "4.0"
|
||||
pandas==2.2.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
panel==1.3.8 ; python_version >= "3.10" and python_version < "4.0"
|
||||
param==2.0.2 ; python_version >= "3.10" and python_version < "4.0"
|
||||
paramiko==3.4.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
parso==0.8.3 ; python_version >= "3.10" and python_version < "4.0"
|
||||
pexpect==4.9.0 ; python_version >= "3.10" and python_version < "4.0" and sys_platform != "win32"
|
||||
pillow==10.2.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
platformdirs==4.1.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
plotly==5.18.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
pluggy==1.4.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
prompt-toolkit==3.0.43 ; python_version >= "3.10" and python_version < "4.0"
|
||||
psutil==5.9.8 ; python_version >= "3.10" and python_version < "4.0"
|
||||
ptyprocess==0.7.0 ; python_version >= "3.10" and python_version < "4.0" and sys_platform != "win32"
|
||||
pure-eval==0.2.2 ; python_version >= "3.10" and python_version < "4.0"
|
||||
pyarrow==15.0.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
pycparser==2.21 ; python_version >= "3.10" and python_version < "4.0"
|
||||
pygments==2.17.2 ; python_version >= "3.10" and python_version < "4.0"
|
||||
pylance==0.5.10 ; python_version >= "3.10" and python_version < "4.0"
|
||||
pynacl==1.5.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
pyparsing==3.1.1 ; python_version >= "3.10" and python_version < "4"
|
||||
pytest-cov==4.1.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
pytest-mock==3.12.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
pytest==8.0.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
python-dateutil==2.8.2 ; python_version >= "3.10" and python_version < "4.0"
|
||||
pytz==2023.4 ; python_version >= "3.10" and python_version < "4.0"
|
||||
pyviz-comms==3.0.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.10" and python_version < "4.0"
|
||||
pyyaml==6.0.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
pyzmq==25.1.2 ; python_version >= "3.10" and python_version < "4.0"
|
||||
quapy==0.1.7 ; python_version >= "3.10" and python_version < "4"
|
||||
requests==2.31.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
retrying==1.3.4 ; python_version >= "3.10" and python_version < "4.0"
|
||||
scikit-learn==1.4.0 ; python_version >= "3.10" and python_version < "4"
|
||||
scipy==1.12.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
setuptools==69.0.3 ; python_version >= "3.10" and python_version < "4.0"
|
||||
six==1.16.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
stack-data==0.6.3 ; python_version >= "3.10" and python_version < "4.0"
|
||||
tabulate==0.9.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
tenacity==8.2.3 ; python_version >= "3.10" and python_version < "4.0"
|
||||
threadpoolctl==3.2.0 ; python_version >= "3.10" and python_version < "4"
|
||||
tomli==2.0.1 ; python_version >= "3.10" and python_full_version <= "3.11.0a6"
|
||||
tornado==6.4 ; python_version >= "3.10" and python_version < "4.0"
|
||||
tqdm==4.66.1 ; python_version >= "3.10" and python_version < "4"
|
||||
traitlets==5.14.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
types-pytz==2023.4.0.20240130 ; python_version >= "3.10" and python_version < "4.0"
|
||||
typing-extensions==4.9.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
tzdata==2023.4 ; python_version >= "3.10" and python_version < "4.0"
|
||||
uc-micro-py==1.0.2 ; python_version >= "3.10" and python_version < "4.0"
|
||||
urllib3==2.1.0 ; python_version >= "3.10" and python_version < "4.0"
|
||||
wcwidth==0.2.13 ; python_version >= "3.10" and python_version < "4.0"
|
||||
webencodings==0.5.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
werkzeug==3.0.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
widgetsnbextension==4.0.9 ; python_version >= "3.10" and python_version < "4.0"
|
||||
xlrd==2.0.1 ; python_version >= "3.10" and python_version < "4"
|
||||
xyzservices==2023.10.1 ; python_version >= "3.10" and python_version < "4.0"
|
||||
zipp==3.17.0 ; python_version >= "3.10" and python_version < "4.0"
|
|
@ -0,0 +1,9 @@
|
|||
from quacc.evaluation.report import DatasetReport
|
||||
|
||||
dr = DatasetReport.unpickle("output/main/imdb/imdb.pickle")
|
||||
_estimators = ["sld_lr_gs", "bin_sld_lr_gs", "mul_sld_lr_gs", "m3w_sld_lr_gs"]
|
||||
_data = dr.data(metric="acc", estimators=_estimators)
|
||||
for idx, cr in zip(_data.index.unique(0), dr.crs[::-1]):
|
||||
print(cr.train_prev)
|
||||
print({k: v for k, v in cr.fit_scores.items() if k in _estimators})
|
||||
print(_data.loc[(idx, slice(None), slice(None)), :])
|
Loading…
Reference in New Issue