This commit is contained in:
Lorenzo Volpi 2024-01-30 13:56:17 +01:00
parent 6bf2fb9e1b
commit 2d8d4c3c68
26 changed files with 11884 additions and 622 deletions

View File

@ -0,0 +1,90 @@
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
import quapy as qp
from method.kdey import KDEyML, KDEyCS, KDEyHD
from quapy.protocol import APP
from quapy.method.aggregative import PACC, ACC, EMQ, PCC, CC, DMy
datasets = qp.datasets.UCI_DATASETS
# target = 'f1'
target = 'acc'
errors = []
# dataset_name = datasets[-2]
for dataset_name in datasets:
if dataset_name in ['balance.2', 'acute.a', 'acute.b', 'iris.1']:
continue
train, test = qp.datasets.fetch_UCIDataset(dataset_name).train_test
print(f'dataset name = {dataset_name}')
print(f'#train = {len(train)}')
print(f'#test = {len(test)}')
cls = LogisticRegression()
train, val = train.split_stratified(random_state=0)
cls.fit(*train.Xy)
y_val = val.labels
y_hat_val = cls.predict(val.instances)
for sample in APP(test, n_prevalences=11, repeats=1, sample_size=100, return_type='labelled_collection')():
print('='*80)
y_hat = cls.predict(sample.instances)
y = sample.labels
if target == 'acc':
acc = (y_hat==y).mean()
else:
acc = f1_score(y, y_hat, zero_division=0)
q = EMQ(cls)
q.fit(train, fit_classifier=False)
# q = EMQ(cls)
# q.fit(train, val_split=val, fit_classifier=False)
M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
p_hat = q.quantify(sample.instances)
cont_table_hat = p_hat * M_hat
tp = cont_table_hat[1,1]
tn = cont_table_hat[0,0]
fn = cont_table_hat[0,1]
fp = cont_table_hat[1,0]
if target == 'acc':
acc_hat = (tp+tn)
else:
den = (2*tp + fn + fp)
if den > 0:
acc_hat = 2*tp / den
else:
acc_hat = 0
error = abs(acc - acc_hat)
errors.append(error)
print('true_prev: ', sample.prevalence())
print('estim_prev: ', p_hat)
print('M-true:\n', M_true)
print('M-hat:\n', M_hat)
print('cont_table:\n', cont_table_hat)
print(f'classifier accuracy={acc:.3f}')
print(f'estimated accuracy={acc_hat:.3f}')
print(f'estimation error={error:.4f}')
print('process end')
print('='*80)
print(f'mean error = {np.mean(errors)}')
print(f'std error = {np.std(errors)}')

View File

@ -0,0 +1,269 @@
import numpy as np
import scipy.special
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
import quapy as qp
from quapy.protocol import APP
from quapy.method.aggregative import PACC, ACC, EMQ, PCC, CC, DMy, T50, MS2, KDEyML, KDEyCS, KDEyHD
from sklearn import clone
import quapy.functional as F
# datasets = qp.datasets.UCI_DATASETS
datasets = ['imdb']
# target = 'f1'
target = 'acc'
errors = []
def method_1(cls, train, val, sample, y=None, y_hat=None):
"""
Converts a misclassification matrix computed in validation (i.e., in the train distribution P) into
the corresponding equivalent misclassification matrix in test (i.e., in the test distribution Q)
by relying on the PPS assumptions.
:return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
"""
y_val = val.labels
y_hat_val = cls.predict(val.instances)
# q = EMQ(LogisticRegression(class_weight='balanced'))
# q.fit(val, fit_classifier=True)
q = EMQ(cls)
q.fit(train, fit_classifier=False)
# q = KDEyML(cls)
# q.fit(train, val_split=val, fit_classifier=False)
M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
p_hat = q.quantify(sample.instances)
cont_table_hat = p_hat * M_hat
# cont_table_hat = np.clip(cont_table_hat, 0, 1)
# cont_table_hat = cont_table_hat / cont_table_hat.sum()
print('true_prev: ', sample.prevalence())
print('estim_prev: ', p_hat)
print('M-true:\n', M_true)
print('M-hat:\n', M_hat)
print('cont_table:\n', cont_table_hat)
print('cont_table Sum :\n', cont_table_hat.sum())
tp = cont_table_hat[1, 1]
tn = cont_table_hat[0, 0]
fn = cont_table_hat[0, 1]
fp = cont_table_hat[1, 0]
return tn, fn, fp, tp
def method_2(cls, train, val, sample, y=None, y_hat=None):
"""
Assume P and Q are the training and test distributions
Solves the following system of linear equations:
tp + fp = CC (the classify & count estimate, observed)
fn + tp = Q(Y=1) (this is not observed but is estimated via quantification)
tp + fp + fn + tn = 1 (trivial)
There are 4 unknowns and 3 equations. The fourth required one is established
by assuming that the PPS conditions hold, i.e., that P(X|Y)=Q(X|Y); note that
this implies P(hatY|Y)=Q(hatY|Y) if hatY is computed by any measurable function.
In particular, we consider that the tpr in P (estimated via validation, hereafter tpr) and
in Q (unknown, hereafter tpr_Q) should
be the same. This means:
tpr = tpr_Q = tp / (tp + fn)
after some manipulation:
tp (tpr-1) + fn (tpr) = 0 <-- our last equation
Note that the last equation relies on the estimate tpr. It is likely that, the more
positives we have, the more reliable this estimate is. This suggests that, in cases
in which we have more negatives in the validation set than positives, it might be
convenient to resort to the true negative rate (tnr) instead. This gives rise to
the alternative fourth equation:
tn (tnr-1) + fp (tnr) = 0
:return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
"""
y_val = val.labels
y_hat_val = cls.predict(val.instances)
q = ACC(cls)
q.fit(train, val_split=val, fit_classifier=False)
p_hat = q.quantify(sample.instances)
pos_prev = p_hat[1]
# pos_prev = sample.prevalence()[1]
cc = CC(cls)
cc.fit(train, fit_classifier=False)
cc_prev = cc.quantify(sample.instances)[1]
M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
cont_table_true = sample.prevalence() * M_true
if val.prevalence()[1] > 0.5:
# in this case, the tpr might be a more reliable estimate than tnr
tpr_hat = M_hat[1, 1]
A = np.asarray([
[0, 0, 1, 1],
[0, 1, 0, 1],
[1, 1, 1, 1],
[0, tpr_hat, 0, tpr_hat - 1]
])
else:
# in this case, the tnr might be a more reliable estimate than tpr
tnr_hat = M_hat[0, 0]
A = np.asarray([
[0, 0, 1, 1],
[0, 1, 0, 1],
[1, 1, 1, 1],
[tnr_hat-1, 0, tnr_hat, 0]
])
b = np.asarray(
[cc_prev, pos_prev, 1, 0]
)
tn, fn, fp, tp = np.linalg.solve(A, b)
cont_table_estim = np.asarray([
[tn, fn],
[fp, tp]
])
# if (cont_table_estim < 0).any() or (cont_table_estim>1).any():
# cont_table_estim = scipy.special.softmax(cont_table_estim)
print('true_prev: ', sample.prevalence())
print('estim_prev: ', p_hat)
print('true_cont_table:\n', cont_table_true)
print('estim_cont_table:\n', cont_table_estim)
# print('true_tpr', M_true[1,1])
# print('estim_tpr', tpr_hat)
return tn, fn, fp, tp
def method_3(cls, train, val, sample, y=None, y_hat=None):
"""
This is just method 2 but without involving any quapy's quantifier.
:return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
"""
classes = val.classes_
y_val = val.labels
y_hat_val = cls.predict(val.instances)
M_hat = ACC.getPteCondEstim(classes, y_val, y_hat_val)
y_hat_test = cls.predict(sample.instances)
pos_prev_cc = F.prevalence_from_labels(y_hat_test, classes)[1]
tpr_hat = M_hat[1,1]
fpr_hat = M_hat[1,0]
tnr_hat = M_hat[0,0]
pos_prev_test_hat = (pos_prev_cc - fpr_hat) / (tpr_hat - fpr_hat)
pos_prev_test_hat = np.clip(pos_prev_test_hat, 0, 1)
pos_prev_val = val.prevalence()[1]
if pos_prev_val > 0.5:
# in this case, the tpr might be a more reliable estimate than tnr
A = np.asarray([
[0, 0, 1, 1],
[0, 1, 0, 1],
[1, 1, 1, 1],
[0, tpr_hat, 0, tpr_hat - 1]
])
else:
# in this case, the tnr might be a more reliable estimate than tpr
A = np.asarray([
[0, 0, 1, 1],
[0, 1, 0, 1],
[1, 1, 1, 1],
[tnr_hat-1, 0, tnr_hat, 0]
])
b = np.asarray(
[pos_prev_cc, pos_prev_test_hat, 1, 0]
)
tn, fn, fp, tp = np.linalg.solve(A, b)
return tn, fn, fp, tp
def cls_eval_from_counters(tn, fn, fp, tp):
if target == 'acc':
acc_hat = (tp + tn)
else:
den = (2 * tp + fn + fp)
if den > 0:
acc_hat = 2 * tp / den
else:
acc_hat = 0
return acc_hat
def cls_eval_from_labels(y, y_hat):
if target == 'acc':
acc = (y_hat == y).mean()
else:
acc = f1_score(y, y_hat, zero_division=0)
return acc
for dataset_name in datasets:
train_orig, test = qp.datasets.fetch_reviews(dataset_name, tfidf=True, min_df=10).train_test
train_prot = APP(train_orig, n_prevalences=11, repeats=1, return_type='labelled_collection', random_state=0, sample_size=10000)
for train in train_prot():
if np.product(train.prevalence()) == 0:
# skip experiments with no positives or no negatives in training
continue
cls = LogisticRegression(class_weight='balanced')
train, val = train.split_stratified(train_prop=0.5, random_state=0)
print(f'dataset name = {dataset_name}')
print(f'#train = {len(train)}, prev={F.strprev(train.prevalence())}')
print(f'#val = {len(val)}, prev={F.strprev(val.prevalence())}')
print(f'#test = {len(test)}, prev={F.strprev(test.prevalence())}')
cls.fit(*train.Xy)
for sample in APP(test, n_prevalences=21, repeats=10, sample_size=1000, return_type='labelled_collection')():
print('='*80)
y_hat = cls.predict(sample.instances)
y = sample.labels
acc_true = cls_eval_from_labels(y, y_hat)
tn, fn, fp, tp = method_3(cls, train, val, sample, y, y_hat)
acc_hat = cls_eval_from_counters(tn, fn, fp, tp)
error = abs(acc_true - acc_hat)
errors.append(error)
print(f'classifier accuracy={acc_true:.3f}')
print(f'estimated accuracy={acc_hat:.3f}')
print(f'estimation error={error:.4f}')
print('process end')
print('='*80)
print(f'mean error = {np.mean(errors)}')
print(f'std error = {np.std(errors)}')

164
conf.yaml
View File

@ -5,47 +5,80 @@ debug_conf: &debug_conf
OUT_DIR_NAME: output/debug
DATASET_N_PREVS: 9
COMP_ESTIMATORS:
- bin_sld_lr
- mul_sld_lr
- m3w_sld_lr
- d_bin_sld_lr
- d_mul_sld_lr
- d_m3w_sld_lr
- d_bin_sld_rbf
- d_mul_sld_rbf
- d_m3w_sld_rbf
- bin_kde_lr
- mul_kde_lr
- m3w_kde_lr
- d_bin_kde_lr
- d_mul_kde_lr
- d_m3w_kde_lr
- d_bin_kde_rbf
- d_mul_kde_rbf
- d_m3w_kde_rbf
# - bin_sld_lr
# - mul_sld_lr
# - m3w_sld_lr
# - d_bin_sld_lr
# - d_mul_sld_lr
# - d_m3w_sld_lr
# - d_bin_sld_rbf
# - d_mul_sld_rbf
# - d_m3w_sld_rbf
# - bin_kde_lr
# - mul_kde_lr
# - m3w_kde_lr
# - d_bin_kde_lr
# - d_mul_kde_lr
# - d_m3w_kde_lr
# - d_bin_kde_rbf
# - d_mul_kde_rbf
# - d_m3w_kde_rbf
# - mandoline
# - rca
- bin_sld_lr_is
- mul_sld_lr_is
- m3w_sld_lr_is
- rca
- rca_star
- doc
- atc_mc
N_JOBS: -2
confs:
- DATASET_NAME: imdb
- DATASET_NAME: rcv1
DATASET_TARGET: CCAT
other_confs:
- DATASET_NAME: twitter_gasp
- DATASET_NAME: rcv1
DATASET_TARGET: CCAT
test_conf: &test_conf
global:
METRICS:
- acc
- f1
OUT_DIR_NAME: output/test
DATASET_N_PREVS: 9
COMP_ESTIMATORS:
- cross
- cross2
- bin_sld_lr
- mul_sld_lr
- m3w_sld_lr
- bin_sld_lr_is
- mul_sld_lr_is
- m3w_sld_lr_is
- doc
- atc_mc
N_JOBS: -2
confs:
- DATASET_NAME: imdb
- DATASET_NAME: rcv1
DATASET_TARGET: CCAT
other_confs:
- DATASET_NAME: twitter_gasp
main:
confs: &main_confs
- DATASET_NAME: rcv1
DATASET_TARGET: MCAT
DATASET_TARGET: CCAT
other_confs:
- DATASET_NAME: imdb
- DATASET_NAME: rcv1
DATASET_TARGET: CCAT
- DATASET_NAME: rcv1
DATASET_TARGET: GCAT
- DATASET_NAME: rcv1
DATASET_TARGET: MCAT
sld_lr_conf: &sld_lr_conf
@ -72,6 +105,9 @@ sld_lr_conf: &sld_lr_conf
- bin_sld_lr_is
- mul_sld_lr_is
- m3w_sld_lr_is
- bin_sld_lr_a
- mul_sld_lr_a
- m3w_sld_lr_a
- bin_sld_lr_gs
- mul_sld_lr_gs
- m3w_sld_lr_gs
@ -116,6 +152,9 @@ d_sld_lr_conf: &d_sld_lr_conf
- d_bin_sld_lr_is
- d_mul_sld_lr_is
- d_m3w_sld_lr_is
- d_bin_sld_lr_a
- d_mul_sld_lr_a
- d_m3w_sld_lr_a
- d_bin_sld_lr_gs
- d_mul_sld_lr_gs
- d_m3w_sld_lr_gs
@ -160,6 +199,9 @@ d_sld_rbf_conf: &d_sld_rbf_conf
- d_bin_sld_rbf_is
- d_mul_sld_rbf_is
- d_m3w_sld_rbf_is
- d_bin_sld_rbf_a
- d_mul_sld_rbf_a
- d_m3w_sld_rbf_a
- d_bin_sld_rbf_gs
- d_mul_sld_rbf_gs
- d_m3w_sld_rbf_gs
@ -202,6 +244,9 @@ kde_lr_conf: &kde_lr_conf
- bin_kde_lr_is
- mul_kde_lr_is
- m3w_kde_lr_is
- bin_kde_lr_a
- mul_kde_lr_a
- m3w_kde_lr_a
- bin_kde_lr_gs
- mul_kde_lr_gs
- m3w_kde_lr_gs
@ -238,6 +283,9 @@ d_kde_lr_conf: &d_kde_lr_conf
- d_bin_kde_lr_is
- d_mul_kde_lr_is
- d_m3w_kde_lr_is
- d_bin_kde_lr_a
- d_mul_kde_lr_a
- d_m3w_kde_lr_a
- d_bin_kde_lr_gs
- d_mul_kde_lr_gs
- d_m3w_kde_lr_gs
@ -274,6 +322,9 @@ d_kde_rbf_conf: &d_kde_rbf_conf
- d_bin_kde_rbf_is
- d_mul_kde_rbf_is
- d_m3w_kde_rbf_is
- d_bin_kde_rbf_a
- d_mul_kde_rbf_a
- d_m3w_kde_rbf_a
- d_bin_kde_rbf_gs
- d_mul_kde_rbf_gs
- d_m3w_kde_rbf_gs
@ -287,5 +338,72 @@ d_kde_rbf_conf: &d_kde_rbf_conf
- DATASET_NAME: rcv1
DATASET_TARGET: CCAT
baselines_conf: &baselines_conf
global:
METRICS:
- acc
- f1
OUT_DIR_NAME: output/baselines
DATASET_N_PREVS: 9
COMP_ESTIMATORS:
- doc
- atc_mc
- mandoline
- rca
- rca_star
N_JOBS: -2
exec: *d_sld_rbf_conf
confs: *main_confs
other_confs:
- DATASET_NAME: imdb
- DATASET_NAME: rcv1
DATASET_TARGET: CCAT
kde_lr_gs_conf: &kde_lr_gs_conf
global:
METRICS:
- acc
- f1
OUT_DIR_NAME: output/kde_lr_gs
DATASET_N_PREVS: 9
COMP_ESTIMATORS:
- bin_kde_lr_gs
- mul_kde_lr_gs
- m3w_kde_lr_gs
N_JOBS: -2
confs: *main_confs
timing_conf: &timing_conf
global:
METRICS:
- acc
- f1
OUT_DIR_NAME: output/timing
DATASET_N_PREVS: 1
COMP_ESTIMATORS:
- bin_sld_lr_a
- mul_sld_lr_a
- m3w_sld_lr_a
- bin_kde_lr_a
- mul_kde_lr_a
- m3w_kde_lr_a
- bin_sld_lr_gs
- mul_sld_lr_gs
- m3w_sld_lr_gs
- bin_kde_lr_gs
- mul_kde_lr_gs
- m3w_kde_lr_gs
- doc
- atc_mc
- rca
- rca_star
- mandoline
N_JOBS: 1
PROTOCOL_N_PREVS: 1,
PROTOCOL_REPEATS: 1,
SAMPLE_SIZE: 1000,
confs: *main_confs
exec: *kde_lr_gs_conf

11
copy_source.sh Executable file
View File

@ -0,0 +1,11 @@
#!/bin/bash
CMD="cp"
DEST="~/tesi_docker/"
bash -c "${CMD} -r quacc ${DEST}"
bash -c "${CMD} -r baselines ${DEST}"
bash -c "${CMD} run.py ${DEST}"
bash -c "${CMD} remote.py ${DEST}"
bash -c "${CMD} conf.yaml ${DEST}"
bash -c "${CMD} requirements.txt ${DEST}"

8
log Executable file
View File

@ -0,0 +1,8 @@
#!/bin/bash
if [[ "${1}" == "r" ]]; then
scp volpi@ilona.isti.cnr.it:~/tesi/quacc.log ~/tesi/remote.log &>/dev/null
ssh volpi@ilona.isti.cnr.it tail -n 500 -f /home/volpi/tesi/quacc.log | bat -P --language=log
else
tail -n 500 -f /home/lorev/tesi/quacc.log | bat --paging=never --language log
fi

1102
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -13,6 +13,7 @@ jinja2 = "^3.1.2"
pyyaml = "^6.0.1"
logging = "^0.4.9.6"
abstention = "^0.1.3.1"
pytest = "^8.0.0"
[tool.poetry.scripts]
main = "quacc.main:main"
@ -34,21 +35,20 @@ dash = "gunicorn qcdash.app:server -b ilona.isti.cnr.it:33421"
shell = """
scp {$HOST}:~/tesi/quacc.log ~/tesi/remote.log &> /dev/null
ssh {$HOST} tail -n 0 -f /home/volpi/tesi/quacc.log >> ~/tesi/remote.log
"""
[tool.poe.tasks.logrf]
shell = """
scp {$HOST}:~/tesi/quacc.log ~/tesi/remote.log &> /dev/null
ssh {$HOST} tail -n 500 -f /home/volpi/tesi/quacc.log | bat --paging=never --language log
ssh {$HOST} tail -n 500 -f /home/volpi/tesi/quacc.log | bat -P --language=log
"""
[tool.poe.tasks.logf]
shell = """
tail -n 500 -f /home/lorev/tesi/quacc.log | bat --paging=never --language log
"""
interpreter = "fish"
env = { HOST = "volpi@ilona.isti.cnr.it" }
[tool.poetry.group.dev.dependencies]
pytest = "^7.4.0"
pylance = "^0.5.9"
pytest-mock = "^3.11.1"
pytest-cov = "^4.1.0"

View File

@ -85,6 +85,8 @@ def get_table(dr: DatasetReport, metric, estimators, view, mode):
case ("avg", "train_table"):
# return dr.data(metric=metric, estimators=estimators).groupby(level=1).mean()
return dr.train_table(metric=metric, estimators=estimators)
case ("avg", "train_std_table"):
return dr.train_std_table(metric=metric, estimators=estimators)
case ("avg", "test_table"):
# return dr.data(metric=metric, estimators=estimators).groupby(level=0).mean()
return dr.test_table(metric=metric, estimators=estimators)
@ -121,24 +123,44 @@ def get_DataTable(df, mode):
_index_name = dict(
train_table="test prev.",
train_std_table="train prev.",
test_table="train prev.",
shift_table="shift",
stats_table="method",
)
df = df.reset_index()
if mode == "train_std_table":
columns_format = Format()
df_columns = np.concatenate([["index"], df.columns.unique(1)[1:]])
data = [
dict(
index="(" + ", ".join([f"{v:.2f}" for v in idx]) + ")"
if isinstance(idx, tuple | list | np.ndarray)
else str(idx)
)
| {
k: f"{df.loc[i,('avg',k)]:.4f}~{df.loc[i,('std',k)]:.3f}"
for k in df.columns.unique(1)[1:]
}
for i, idx in zip(df.index, df.loc[:, ("index", "")])
]
else:
columns_format = Format(precision=6, scheme=Scheme.exponent, nully="nan")
df_columns = df.columns
data = df.to_dict("records")
columns = {
c: dict(
id=c,
name=_index_name[mode] if c == "index" else c,
type="numeric",
format=Format(precision=6, scheme=Scheme.exponent, nully="nan"),
format=columns_format,
)
for c in df.columns
for c in df_columns
}
# columns["index"]["format"] = Format(precision=2, scheme=Scheme.fixed)
columns["index"]["format"] = Format()
columns = list(columns.values())
data = df.to_dict("records")
for d in data:
if isinstance(d["index"], tuple | list | np.ndarray):
d["index"] = "(" + ", ".join([f"{v:.2f}" for v in d["index"]]) + ")"

3223
quacc.log

File diff suppressed because it is too large Load Diff

View File

@ -320,25 +320,59 @@ def rcv1_info():
n_train = 23149
targets = []
for target in range(103):
train_t_prev = np.average(dataset.target[:n_train, target].toarray().flatten())
test_t_prev = np.average(dataset.target[n_train:, target].toarray().flatten())
for target in ["CCAT", "MCAT", "GCAT"]:
target_index = np.where(dataset.target_names == target)[0]
train_t_prev = np.average(
dataset.target[:n_train, target_index].toarray().flatten()
)
test_t_prev = np.average(
dataset.target[n_train:, target_index].toarray().flatten()
)
d = Dataset(name="rcv1", target=target)()[0]
targets.append(
(
dataset.target_names[target],
target,
{
"train": (1.0 - train_t_prev, train_t_prev),
"test": (1.0 - test_t_prev, test_t_prev),
"train_size": len(d.train),
"val_size": len(d.validation),
"test_size": len(d.test),
},
)
)
targets.sort(key=lambda t: t[1]["train"][1])
for n, d in targets:
print(f"{n}:")
for k, (fp, tp) in d.items():
print(f"\t{k}: {fp:.4f}, {tp:.4f}")
for k, v in d.items():
if isinstance(v, tuple):
print(f"\t{k}: {v[0]:.4f}, {v[1]:.4f}")
else:
print(f"\t{k}: {v}")
def imdb_info():
train, test = qp.datasets.fetch_reviews("imdb", tfidf=True, min_df=3).train_test
train_t_prev = train.prevalence()
test_t_prev = test.prevalence()
dst = Dataset(name="imdb")()[0]
d = {
"train": (train_t_prev[0], train_t_prev[1]),
"test": (test_t_prev[0], test_t_prev[1]),
"train_size": len(dst.train),
"val_size": len(dst.validation),
"test_size": len(dst.test),
}
print("imdb:")
for k, v in d.items():
if isinstance(v, tuple):
print(f"\t{k}: {v[0]:.4f}, {v[1]:.4f}")
else:
print(f"\t{k}: {v}")
if __name__ == "__main__":
fetch_cifar100()
rcv1_info()
imdb_info()

115
quacc/evaluation/alt.py Normal file
View File

@ -0,0 +1,115 @@
from functools import wraps
import numpy as np
import quapy.functional as F
import sklearn.metrics as metrics
from quapy.method.aggregative import ACC, EMQ
from sklearn import clone
from sklearn.linear_model import LogisticRegression
import quacc as qc
from quacc.evaluation.report import EvaluationReport
_alts = {}
def alt(func):
@wraps(func)
def wrapper(c_model, validation, protocol):
return func(c_model, validation, protocol)
wrapper.name = func.__name__
_alts[func.__name__] = wrapper
return wrapper
@alt
def cross(c_model, validation, protocol):
y_val = validation.labels
y_hat_val = c_model.predict(validation.instances)
qcls = clone(c_model)
qcls.fit(*validation.Xy)
er = EvaluationReport(name="cross")
for sample in protocol():
y_hat = c_model.predict(sample.instances)
y = sample.labels
ground_acc = (y_hat == y).mean()
ground_f1 = metrics.f1_score(y, y_hat, zero_division=0)
q = EMQ(qcls)
q.fit(validation, fit_classifier=False)
M_hat = ACC.getPteCondEstim(validation.classes_, y_val, y_hat_val)
p_hat = q.quantify(sample.instances)
cont_table_hat = p_hat * M_hat
acc_score = qc.error.acc(cont_table_hat)
f1_score = qc.error.f1(cont_table_hat)
meta_acc = abs(acc_score - ground_acc)
meta_f1 = abs(f1_score - ground_f1)
er.append_row(
sample.prevalence(),
acc=meta_acc,
f1=meta_f1,
acc_score=acc_score,
f1_score=f1_score,
)
return er
@alt
def cross2(c_model, validation, protocol):
classes = validation.classes_
y_val = validation.labels
y_hat_val = c_model.predict(validation.instances)
M_hat = ACC.getPteCondEstim(classes, y_val, y_hat_val)
pos_prev_val = validation.prevalence()[1]
er = EvaluationReport(name="cross2")
for sample in protocol():
y_test = sample.labels
y_hat_test = c_model.predict(sample.instances)
ground_acc = (y_hat_test == y_test).mean()
ground_f1 = metrics.f1_score(y_test, y_hat_test, zero_division=0)
pos_prev_cc = F.prevalence_from_labels(y_hat_test, classes)[1]
tpr_hat = M_hat[1, 1]
fpr_hat = M_hat[1, 0]
tnr_hat = M_hat[0, 0]
pos_prev_test_hat = (pos_prev_cc - fpr_hat) / (tpr_hat - fpr_hat)
pos_prev_test_hat = np.clip(pos_prev_test_hat, 0, 1)
if pos_prev_val > 0.5:
# in this case, the tpr might be a more reliable estimate than tnr
A = np.asarray(
[[0, 0, 1, 1], [0, 1, 0, 1], [1, 1, 1, 1], [0, tpr_hat, 0, tpr_hat - 1]]
)
else:
# in this case, the tnr might be a more reliable estimate than tpr
A = np.asarray(
[[0, 0, 1, 1], [0, 1, 0, 1], [1, 1, 1, 1], [tnr_hat - 1, 0, tnr_hat, 0]]
)
b = np.asarray([pos_prev_cc, pos_prev_test_hat, 1, 0])
tn, fn, fp, tp = np.linalg.solve(A, b)
cont_table_hat = np.array([[tn, fp], [fn, tp]])
acc_score = qc.error.acc(cont_table_hat)
f1_score = qc.error.f1(cont_table_hat)
meta_acc = abs(acc_score - ground_acc)
meta_f1 = abs(f1_score - ground_f1)
er.append_row(
sample.prevalence(),
acc=meta_acc,
f1=meta_f1,
acc_score=acc_score,
f1_score=f1_score,
)
return er

View File

@ -288,21 +288,76 @@ def rca(
):
"""elsahar19"""
c_model_predict = getattr(c_model, predict_method)
val_pred1 = c_model_predict(validation.X)
f1_average = "binary" if validation.n_classes == 2 else "macro"
val1, val2 = validation.split_stratified(train_prop=0.5, random_state=env._R_SEED)
val1_pred1 = c_model_predict(val1.X)
val2_protocol = APP(
val2,
n_prevalences=21,
repeats=100,
return_type="labelled_collection",
)
val2_prot_preds = []
val2_rca = []
val2_prot_preds = []
val2_prot_y = []
for v2 in val2_protocol():
_preds = c_model_predict(v2.X)
try:
c_model2 = clone_fit(c_model, v2.X, _preds)
c_model2_predict = getattr(c_model2, predict_method)
val1_pred2 = c_model2_predict(val1.X)
rca_score = 1.0 - rcalib.get_score(val1_pred1, val1_pred2, val1.y)
val2_rca.append(rca_score)
val2_prot_preds.append(_preds)
val2_prot_y.append(v2.y)
except ValueError:
pass
val_targets_acc = np.array(
[
metrics.accuracy_score(v2_y, v2_preds)
for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds)
]
)
reg_acc = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_acc)
val_targets_f1 = np.array(
[
metrics.f1_score(v2_y, v2_preds, average=f1_average)
for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds)
]
)
reg_f1 = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_f1)
report = EvaluationReport(name="rca")
for test in protocol():
try:
test_pred = c_model_predict(test.X)
c_model2 = clone_fit(c_model, test.X, test_pred)
test_preds = c_model_predict(test.X)
c_model2 = clone_fit(c_model, test.X, test_preds)
c_model2_predict = getattr(c_model2, predict_method)
val_pred2 = c_model2_predict(validation.X)
rca_score = 1.0 - rcalib.get_score(val_pred1, val_pred2, validation.y)
meta_score = abs(rca_score - metrics.accuracy_score(test.y, test_pred))
report.append_row(test.prevalence(), acc=meta_score, acc_score=rca_score)
val1_pred2 = c_model2_predict(val1.X)
rca_score = 1.0 - rcalib.get_score(val1_pred1, val1_pred2, val1.y)
acc_score = reg_acc.predict(np.array([[rca_score]]))[0]
f1_score = reg_f1.predict(np.array([[rca_score]]))[0]
meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_preds))
meta_f1 = abs(
f1_score - metrics.f1_score(test.y, test_preds, average=f1_average)
)
report.append_row(
test.prevalence(),
acc=meta_acc,
acc_score=acc_score,
f1=meta_f1,
f1_score=f1_score,
)
except ValueError:
report.append_row(
test.prevalence(), acc=float("nan"), acc_score=float("nan")
test.prevalence(),
acc=np.nan,
acc_score=np.nan,
f1=np.nan,
f1_score=np.nan,
)
return report
@ -317,13 +372,56 @@ def rca_star(
):
"""elsahar19"""
c_model_predict = getattr(c_model, predict_method)
validation1, validation2 = validation.split_stratified(
f1_average = "binary" if validation.n_classes == 2 else "macro"
validation1, val2 = validation.split_stratified(
train_prop=0.5, random_state=env._R_SEED
)
val1_pred = c_model_predict(validation1.X)
c_model1 = clone_fit(c_model, validation1.X, val1_pred)
val11, val12 = validation1.split_stratified(
train_prop=0.5, random_state=env._R_SEED
)
val11_pred = c_model_predict(val11.X)
c_model1 = clone_fit(c_model, val11.X, val11_pred)
c_model1_predict = getattr(c_model1, predict_method)
val2_pred1 = c_model1_predict(validation2.X)
val12_pred1 = c_model1_predict(val12.X)
val2_protocol = APP(
val2,
n_prevalences=21,
repeats=100,
return_type="labelled_collection",
)
val2_prot_preds = []
val2_rca = []
val2_prot_preds = []
val2_prot_y = []
for v2 in val2_protocol():
_preds = c_model_predict(v2.X)
try:
c_model2 = clone_fit(c_model, v2.X, _preds)
c_model2_predict = getattr(c_model2, predict_method)
val12_pred2 = c_model2_predict(val12.X)
rca_score = 1.0 - rcalib.get_score(val12_pred1, val12_pred2, val12.y)
val2_rca.append(rca_score)
val2_prot_preds.append(_preds)
val2_prot_y.append(v2.y)
except ValueError:
pass
val_targets_acc = np.array(
[
metrics.accuracy_score(v2_y, v2_preds)
for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds)
]
)
reg_acc = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_acc)
val_targets_f1 = np.array(
[
metrics.f1_score(v2_y, v2_preds, average=f1_average)
for v2_y, v2_preds in zip(val2_prot_y, val2_prot_preds)
]
)
reg_f1 = LinearRegression().fit(np.array(val2_rca)[:, np.newaxis], val_targets_f1)
report = EvaluationReport(name="rca_star")
for test in protocol():
@ -331,17 +429,28 @@ def rca_star(
test_pred = c_model_predict(test.X)
c_model2 = clone_fit(c_model, test.X, test_pred)
c_model2_predict = getattr(c_model2, predict_method)
val2_pred2 = c_model2_predict(validation2.X)
rca_star_score = 1.0 - rcalib.get_score(
val2_pred1, val2_pred2, validation2.y
val12_pred2 = c_model2_predict(val12.X)
rca_star_score = 1.0 - rcalib.get_score(val12_pred1, val12_pred2, val12.y)
acc_score = reg_acc.predict(np.array([[rca_star_score]]))[0]
f1_score = reg_f1.predict(np.array([[rca_score]]))[0]
meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_pred))
meta_f1 = abs(
f1_score - metrics.f1_score(test.y, test_pred, average=f1_average)
)
meta_score = abs(rca_star_score - metrics.accuracy_score(test.y, test_pred))
report.append_row(
test.prevalence(), acc=meta_score, acc_score=rca_star_score
test.prevalence(),
acc=meta_acc,
acc_score=acc_score,
f1=meta_f1,
f1_score=f1_score,
)
except ValueError:
report.append_row(
test.prevalence(), acc=float("nan"), acc_score=float("nan")
test.prevalence(),
acc=np.nan,
acc_score=np.nan,
f1=np.nan,
f1_score=np.nan,
)
return report
@ -447,3 +556,4 @@ def kdex2(
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
return report

View File

@ -57,6 +57,8 @@ def estimate_worker(_estimate, train, validation, test, q=None):
def split_tasks(estimators, train, validation, test, q):
_par, _seq = [], []
for estim in estimators:
if hasattr(estim, "nocall"):
continue
_task = [estim, train, validation, test]
match estim.name:
case n if n.endswith("_gs"):

View File

@ -2,7 +2,7 @@ from typing import List
import numpy as np
from quacc.evaluation import baseline, method
from quacc.evaluation import baseline, method, alt
class CompEstimatorFunc_:
@ -40,7 +40,7 @@ class CompEstimatorName_:
class CompEstimator:
def __get(cls, e: str | List[str], get_ref=True):
_dict = method._methods | baseline._baselines
_dict = alt._alts | method._methods | baseline._baselines
match e:
case "__all":

View File

@ -26,7 +26,12 @@ def _param_grid(method, X_fit: np.ndarray):
"q__classifier__C": np.logspace(-3, 3, 7),
"q__classifier__class_weight": [None, "balanced"],
"q__recalib": [None, "bcts"],
"confidence": [None, ["isoft"], ["max_conf", "entropy"]],
"confidence": [
None,
["isoft"],
["max_conf", "entropy"],
["max_conf", "entropy", "isoft"],
],
}
case "sld_rbf":
_scale = 1.0 / (X_fit.shape[1] * X_fit.var())
@ -35,7 +40,12 @@ def _param_grid(method, X_fit: np.ndarray):
"q__classifier__class_weight": [None, "balanced"],
"q__classifier__gamma": _scale * np.logspace(-2, 2, 5),
"q__recalib": [None, "bcts"],
"confidence": [None, ["isoft"], ["max_conf", "entropy"]],
"confidence": [
None,
["isoft"],
["max_conf", "entropy"],
["max_conf", "entropy", "isoft"],
],
}
case "pacc":
return {
@ -48,7 +58,7 @@ def _param_grid(method, X_fit: np.ndarray):
"q__classifier__C": np.logspace(-3, 3, 7),
"q__classifier__class_weight": [None, "balanced"],
"q__bandwidth": np.linspace(0.01, 0.2, 20),
"confidence": [None, ["isoft"]],
"confidence": [None, ["isoft"], ["max_conf", "entropy", "isoft"]],
}
case "kde_rbf":
_scale = 1.0 / (X_fit.shape[1] * X_fit.var())
@ -57,7 +67,7 @@ def _param_grid(method, X_fit: np.ndarray):
"q__classifier__class_weight": [None, "balanced"],
"q__classifier__gamma": _scale * np.logspace(-2, 2, 5),
"q__bandwidth": np.linspace(0.01, 0.2, 20),
"confidence": [None, ["isoft"]],
"confidence": [None, ["isoft"], ["max_conf", "entropy", "isoft"]],
}
@ -96,6 +106,15 @@ def evaluation_report(
return report
@dataclass(frozen=True)
class EmptyMethod:
name: str
nocall: bool = True
def __call__(self, c_model, validation, protocol) -> EvaluationReport:
pass
@dataclass(frozen=True)
class EvaluationMethod:
name: str
@ -162,13 +181,16 @@ class EvaluationMethodGridSearch(EvaluationMethod):
verbose=False,
**_search_params,
).fit(v_train)
return evaluation_report(
er = evaluation_report(
estimator=est,
protocol=protocol,
method_name=self.name,
)
er.fit_score = est.best_score()
return er
E = EmptyMethod
M = EvaluationMethod
G = EvaluationMethodGridSearch
@ -229,12 +251,19 @@ __sld_lr_set = [
M("mul_sld_lr_is", __sld_lr(), "mul", conf="isoft", ),
M("m3w_sld_lr_is", __sld_lr(), "mul", conf="isoft", cf=True),
M("mgf_sld_lr_is", __sld_lr(), "mul", conf="isoft", gf=True),
# sld all
M("bin_sld_lr_a", __sld_lr(), "bin", conf=["max_conf", "entropy", "isoft"], ),
M("bgf_sld_lr_a", __sld_lr(), "bin", conf=["max_conf", "entropy", "isoft"], gf=True),
M("mul_sld_lr_a", __sld_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ),
M("m3w_sld_lr_a", __sld_lr(), "mul", conf=["max_conf", "entropy", "isoft"], cf=True),
M("mgf_sld_lr_a", __sld_lr(), "mul", conf=["max_conf", "entropy", "isoft"], gf=True),
# gs sld
G("bin_sld_lr_gs", __sld_lr(), "bin", pg="sld_lr" ),
G("bgf_sld_lr_gs", __sld_lr(), "bin", pg="sld_lr", gf=True),
G("mul_sld_lr_gs", __sld_lr(), "mul", pg="sld_lr" ),
G("m3w_sld_lr_gs", __sld_lr(), "mul", pg="sld_lr", cf=True),
G("mgf_sld_lr_gs", __sld_lr(), "mul", pg="sld_lr", gf=True),
E("sld_lr_gs"),
]
__dense_sld_lr_set = [
@ -267,12 +296,18 @@ __dense_sld_lr_set = [
M("d_mul_sld_lr_is", __sld_lr(), "mul", d=True, conf="isoft", ),
M("d_m3w_sld_lr_is", __sld_lr(), "mul", d=True, conf="isoft", cf=True),
M("d_mgf_sld_lr_is", __sld_lr(), "mul", d=True, conf="isoft", gf=True),
# sld all
M("d_bin_sld_lr_a", __sld_lr(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], ),
M("d_bgf_sld_lr_a", __sld_lr(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], gf=True),
M("d_mul_sld_lr_a", __sld_lr(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], ),
M("d_m3w_sld_lr_a", __sld_lr(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], cf=True),
M("d_mgf_sld_lr_a", __sld_lr(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], gf=True),
# gs sld
G("d_bin_sld_lr_gs", __sld_lr(), "bin", d=True, pg="sld_lr" ),
G("d_bgf_sld_lr_gs", __sld_lr(), "bin", d=True, pg="sld_lr", gf=True),
G("d_mul_sld_lr_gs", __sld_lr(), "mul", d=True, pg="sld_lr" ),
G("d_m3w_sld_lr_gs", __sld_lr(), "mul", d=True, pg="sld_lr", cf=True),
G("d_mgf_sld_lr_gs", __sld_lr(), "mul", d=True, pg="sld_lr", gf=True),
G("d_bin_sld_lr_gs", __sld_lr(), "bin", d=True, pg="sld_lr" ),
G("d_bgf_sld_lr_gs", __sld_lr(), "bin", d=True, pg="sld_lr", gf=True),
G("d_mul_sld_lr_gs", __sld_lr(), "mul", d=True, pg="sld_lr" ),
G("d_m3w_sld_lr_gs", __sld_lr(), "mul", d=True, pg="sld_lr", cf=True),
G("d_mgf_sld_lr_gs", __sld_lr(), "mul", d=True, pg="sld_lr", gf=True),
]
__dense_sld_rbf_set = [
@ -305,6 +340,12 @@ __dense_sld_rbf_set = [
M("d_mul_sld_rbf_is", __sld_rbf(), "mul", d=True, conf="isoft", ),
M("d_m3w_sld_rbf_is", __sld_rbf(), "mul", d=True, conf="isoft", cf=True),
M("d_mgf_sld_rbf_is", __sld_rbf(), "mul", d=True, conf="isoft", gf=True),
# sld all
M("d_bin_sld_rbf_a", __sld_rbf(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], ),
M("d_bgf_sld_rbf_a", __sld_rbf(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], gf=True),
M("d_mul_sld_rbf_a", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], ),
M("d_m3w_sld_rbf_a", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], cf=True),
M("d_mgf_sld_rbf_a", __sld_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], gf=True),
# gs sld
G("d_bin_sld_rbf_gs", __sld_rbf(), "bin", d=True, pg="sld_rbf", search="spider", ),
G("d_bgf_sld_rbf_gs", __sld_rbf(), "bin", d=True, pg="sld_rbf", search="spider", gf=True),
@ -334,10 +375,15 @@ __kde_lr_set = [
M("bin_kde_lr_is", __kde_lr(), "bin", conf="isoft", ),
M("mul_kde_lr_is", __kde_lr(), "mul", conf="isoft", ),
M("m3w_kde_lr_is", __kde_lr(), "mul", conf="isoft", cf=True),
# kde all
M("bin_kde_lr_a", __kde_lr(), "bin", conf=["max_conf", "entropy", "isoft"], ),
M("mul_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ),
M("m3w_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], cf=True),
# gs kde
G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="spider" ),
G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider" ),
G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider", cf=True),
G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="grid" ),
G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid" ),
G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid", cf=True),
E("kde_lr_gs"),
]
__dense_kde_lr_set = [
@ -361,6 +407,10 @@ __dense_kde_lr_set = [
M("d_bin_kde_lr_is", __kde_lr(), "bin", d=True, conf="isoft", ),
M("d_mul_kde_lr_is", __kde_lr(), "mul", d=True, conf="isoft", ),
M("d_m3w_kde_lr_is", __kde_lr(), "mul", d=True, conf="isoft", cf=True),
# kde all
M("d_bin_kde_lr_a", __kde_lr(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], ),
M("d_mul_kde_lr_a", __kde_lr(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], ),
M("d_m3w_kde_lr_a", __kde_lr(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], cf=True),
# gs kde
G("d_bin_kde_lr_gs", __kde_lr(), "bin", d=True, pg="kde_lr", search="spider" ),
G("d_mul_kde_lr_gs", __kde_lr(), "mul", d=True, pg="kde_lr", search="spider" ),
@ -388,6 +438,10 @@ __dense_kde_rbf_set = [
M("d_bin_kde_rbf_is", __kde_rbf(), "bin", d=True, conf="isoft", ),
M("d_mul_kde_rbf_is", __kde_rbf(), "mul", d=True, conf="isoft", ),
M("d_m3w_kde_rbf_is", __kde_rbf(), "mul", d=True, conf="isoft", cf=True),
# kde all
M("d_bin_kde_rbf_a", __kde_rbf(), "bin", d=True, conf=["max_conf", "entropy", "isoft"], ),
M("d_mul_kde_rbf_a", __kde_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], ),
M("d_m3w_kde_rbf_a", __kde_rbf(), "mul", d=True, conf=["max_conf", "entropy", "isoft"], cf=True),
# gs kde
G("d_bin_kde_rbf_gs", __kde_rbf(), "bin", d=True, pg="kde_rbf", search="spider" ),
G("d_mul_kde_rbf_gs", __kde_rbf(), "mul", d=True, pg="kde_rbf", search="spider" ),

View File

@ -1,7 +1,6 @@
import json
import pickle
from collections import defaultdict
from itertools import chain
from pathlib import Path
from typing import List, Tuple
@ -39,6 +38,7 @@ class EvaluationReport:
self.data: pd.DataFrame | None = None
self.name = name if name is not None else "default"
self.time = 0.0
self.fit_score = None
def append_row(self, basep: np.ndarray | Tuple, **row):
# bp = basep[1]
@ -89,6 +89,7 @@ class CompReport:
train_prev: np.ndarray = None,
valid_prev: np.ndarray = None,
times=None,
fit_scores=None,
g_time=None,
):
if isinstance(datas, pd.DataFrame):
@ -105,6 +106,13 @@ class CompReport:
.sort_index(axis=0, level=0, ascending=False, sort_remaining=False)
)
if fit_scores is None:
self.fit_scores = {
er.name: er.fit_score for er in datas if er.fit_score is not None
}
else:
self.fit_scores = fit_scores
if times is None:
self.times = {er.name: er.time for er in datas}
else:
@ -114,6 +122,51 @@ class CompReport:
self.train_prev = train_prev
self.valid_prev = valid_prev
def postprocess(
self,
f_data: pd.DataFrame,
_data: pd.DataFrame,
metric=None,
estimators=None,
) -> pd.DataFrame:
_mapping = {
"sld_lr_gs": [
"bin_sld_lr_gs",
"mul_sld_lr_gs",
"m3w_sld_lr_gs",
],
"kde_lr_gs": [
"bin_kde_lr_gs",
"mul_kde_lr_gs",
"m3w_kde_lr_gs",
],
}
for name, methods in _mapping.items():
if estimators is not None and name not in estimators:
continue
if len(np.where(np.in1d(methods, self._data.columns.unique(1)))[0]) != len(
methods
):
continue
_metric = _get_metric(metric)
m_data = _data.loc[:, (_metric, methods)]
_fit_scores = [(k, v) for (k, v) in self.fit_scores.items() if k in methods]
_best_method = [k for k, v in _fit_scores][
np.argmin([v for k, v in _fit_scores])
]
_metric = (
[_metric]
if _metric is isinstance(_metric, str)
else m_data.columns.unique(0)
)
for _m in _metric:
f_data.loc[:, (_m, name)] = m_data.loc[:, (_m, _best_method)]
return f_data
@property
def prevs(self) -> np.ndarray:
return self.data().index.unique(0)
@ -149,6 +202,7 @@ class CompReport:
train_prev=self.train_prev,
valid_prev=self.valid_prev,
times=self.times | other.times,
fit_scores=self.fit_scores | other.fit_scores,
g_time=self.times["tot"] + other.times["tot"],
)
@ -159,7 +213,10 @@ class CompReport:
_estimators = _get_estimators(
estimators, self._data.loc[:, (_metric, slice(None))].columns.unique(1)
)
f_data: pd.DataFrame = self._data.copy().loc[:, (_metric, _estimators)]
_data: pd.DataFrame = self._data.copy()
f_data: pd.DataFrame = _data.loc[:, (_metric, _estimators)]
f_data = self.postprocess(f_data, _data, metric=metric, estimators=estimators)
if len(f_data.columns.unique(0)) == 1:
f_data = f_data.droplevel(level=0, axis=1)
@ -187,7 +244,11 @@ class CompReport:
_estimators = _get_estimators(
estimators, shift_data.loc[:, (_metric, slice(None))].columns.unique(1)
)
s_data: pd.DataFrame = shift_data
shift_data: pd.DataFrame = shift_data.loc[:, (_metric, _estimators)]
shift_data = self.postprocess(
shift_data, s_data, metric=metric, estimators=estimators
)
if len(shift_data.columns.unique(0)) == 1:
shift_data = shift_data.droplevel(level=0, axis=1)
@ -354,17 +415,27 @@ class CompReport:
return res
def _cr_train_prev(cr: CompReport):
return tuple(np.around(cr.train_prev, decimals=2))
def _cr_data(cr: CompReport, metric=None, estimators=None):
return cr.data(metric, estimators)
class DatasetReport:
_default_dr_modes = [
"delta_train",
"stdev_train",
"train_table",
"train_std_table",
"shift",
"shift_table",
"delta_test",
"stdev_test",
"test_table",
"stats_table",
"fit_scores",
]
_default_cr_modes = CompReport._default_modes
@ -380,15 +451,62 @@ class DatasetReport:
return DatasetReport(self.name, _crs)
def fit_scores(self, metric: str = None, estimators: List[str] = None):
def _get_sort_idx(arr):
return np.array([np.searchsorted(np.sort(a), a) + 1 for a in arr])
def _get_best_idx(arr):
return np.argmin(arr, axis=1)
def _fdata_idx(idx) -> np.ndarray:
return _fdata.loc[(idx, slice(None), slice(None)), :].to_numpy()
_crs_train = [_cr_train_prev(cr) for cr in self.crs]
for cr in self.crs:
if not hasattr(cr, "fit_scores"):
return None
_crs_fit_scores = [cr.fit_scores for cr in self.crs]
_fit_scores = pd.DataFrame(_crs_fit_scores, index=_crs_train)
_fit_scores = _fit_scores.sort_index(axis=0, ascending=False)
_estimators = _get_estimators(estimators, _fit_scores.columns)
if _estimators.shape[0] == 0:
return None
_fdata = self.data(metric=metric, estimators=_estimators)
# ensure that columns in _fit_scores have the same ordering of _fdata
_fit_scores = _fit_scores.loc[:, _fdata.columns]
_best_fit_estimators = _get_best_idx(_fit_scores.to_numpy())
# scores = np.array(
# [
# _get_sort_idx(
# _fdata.loc[(idx, slice(None), slice(None)), :].to_numpy()
# )[:, cl].mean()
# for idx, cl in zip(_fit_scores.index, _best_fit_estimators)
# ]
# )
# for idx, cl in zip(_fit_scores.index, _best_fit_estimators):
# print(_fdata_idx(idx)[:, cl])
# print(_fdata_idx(idx).min(axis=1), end="\n\n")
scores = np.array(
[
np.abs(_fdata_idx(idx)[:, cl] - _fdata_idx(idx).min(axis=1)).mean()
for idx, cl in zip(_fit_scores.index, _best_fit_estimators)
]
)
return scores
def data(self, metric: str = None, estimators: List[str] = None) -> pd.DataFrame:
def _cr_train_prev(cr: CompReport):
return tuple(np.around(cr.train_prev, decimals=2))
def _cr_data(cr: CompReport):
return cr.data(metric, estimators)
_crs_sorted = sorted(
[(_cr_train_prev(cr), _cr_data(cr)) for cr in self.crs],
[(_cr_train_prev(cr), _cr_data(cr, metric, estimators)) for cr in self.crs],
key=lambda cr: len(cr[1].columns),
reverse=True,
)
@ -460,6 +578,15 @@ class DatasetReport:
avg_p.loc["mean", :] = f_data.mean()
return avg_p
def train_std_table(self, metric: str = None, estimators: List[str] = None):
f_data = self.data(metric=metric, estimators=estimators)
avg_p = f_data.groupby(level=1, sort=False).mean()
avg_p.loc["mean", :] = f_data.mean()
avg_s = f_data.groupby(level=1, sort=False).std()
avg_s.loc["mean", :] = f_data.std()
avg_r = pd.concat([avg_p, avg_s], axis=1, keys=["avg", "std"])
return avg_r
def test_table(
self, metric: str = None, estimators: List[str] = None
) -> pd.DataFrame:
@ -591,6 +718,20 @@ class DatasetReport:
base_path=base_path,
backend=backend,
)
elif mode == "fit_scores":
_fit_scores = self.fit_scores(metric, estimators) if data is None else data
if _fit_scores is None:
return None
train_prevs = self.data(metric, estimators).index.unique(0)
return plot.plot_fit_scores(
train_prevs=train_prevs,
scores=_fit_scores,
metric=metric,
name=conf,
save_fig=save_fig,
base_path=base_path,
backend=backend,
)
def to_md(
self,

View File

@ -42,7 +42,7 @@ class BaseAccuracyEstimator(BaseQuantifier):
pred_proba = self.classifier.predict_proba(coll.X)
return ExtendedCollection.from_lc(
coll, pred_proba=pred_proba, extpol=self.extpol
coll, pred_proba=pred_proba, ext=pred_proba, extpol=self.extpol
)
def _extend_instances(self, instances: np.ndarray | sp.csr_matrix):

View File

@ -63,6 +63,13 @@ class Threshold(ConfidenceMetric):
_exp = scores - self.threshold
return _exp
# def conf(self, X, probas):
# scores = self.get_scores(probas)
# _exp = np.where(
# scores >= self.threshold, np.ones(scores.shape), np.zeros(scores.shape)
# )
# return _exp[:, np.newaxis]
@metric("linreg")
class LinReg(ConfidenceMetric):

View File

@ -242,6 +242,11 @@ class GridSearchAE(BaseAccuracyEstimator):
return self.best_model_
raise ValueError("best_model called before fit")
def best_score(self):
if hasattr(self, "best_score_"):
return self.best_score_
raise ValueError("best_score called before fit")
class RandomizedSearchAE(GridSearchAE):
ERR_THRESHOLD = 1e-4
@ -473,3 +478,4 @@ class SpiderSearchAE(GridSearchAE):
score += 1
return score

View File

@ -1 +1,7 @@
from quacc.plot.plot import get_backend, plot_delta, plot_diagonal, plot_shift
from quacc.plot.plot import (
get_backend,
plot_delta,
plot_diagonal,
plot_shift,
plot_fit_scores,
)

View File

@ -52,3 +52,16 @@ class BasePlot:
legend=True,
):
...
@classmethod
def plot_fit_scores(
train_prevs,
scores,
*,
pos_class=1,
title="default",
x_label="prev.",
y_label="position",
legend=True,
):
...

View File

@ -142,3 +142,37 @@ def plot_shift(
return fig, output_path
return fig
def plot_fit_scores(
train_prevs,
scores,
*,
pos_class=1,
metric="acc",
name="default",
legend=True,
save_fig=False,
base_path=None,
backend=None,
):
backend = __backend if backend is None else backend
title = f"fit_scores_{name}_avg_{metric}"
x_label = "train prev."
y_label = "position"
fig = backend.plot_fit_scores(
train_prevs,
scores,
pos_class=pos_class,
title=title,
x_label=x_label,
y_label=y_label,
legend=legend,
)
if save_fig:
output_path = backend.save_fig(fig, base_path, title)
return fig, output_path
return fig

View File

@ -8,10 +8,38 @@ import plotly.graph_objects as go
from quacc.plot.base import BasePlot
class PlotCfg:
def __init__(self, mode, lwidth, font=None, legend=None, template="seaborn"):
self.mode = mode
self.lwidth = lwidth
self.legend = {} if legend is None else legend
self.font = {} if font is None else font
self.template = template
web_cfg = PlotCfg("lines+markers", 2)
png_cfg = PlotCfg(
"lines",
5,
legend=dict(
orientation="h",
yanchor="bottom",
xanchor="right",
y=1.02,
x=1,
font=dict(size=24),
),
font=dict(size=24),
# template="ggplot2",
)
_cfg = png_cfg
class PlotlyPlot(BasePlot):
__themes = defaultdict(
lambda: {
"template": "seaborn",
"template": _cfg.template,
}
)
__themes = __themes | {
@ -35,7 +63,7 @@ class PlotlyPlot(BasePlot):
case v if v > 10:
__colors = plotly.colors.qualitative.Light24
case _:
__colors = plotly.colors.qualitative.Plotly
__colors = plotly.colors.qualitative.G10
def __generator(cs):
while True:
@ -50,9 +78,8 @@ class PlotlyPlot(BasePlot):
xaxis_title=x_label,
yaxis_title=y_label,
template=self.theme["template"],
font=dict(
size=18,
),
font=_cfg.font,
legend=_cfg.legend,
)
def save_fig(self, fig, base_path, title) -> Path:
@ -82,9 +109,9 @@ class PlotlyPlot(BasePlot):
go.Scatter(
x=x,
y=delta,
mode="lines+markers",
mode=_cfg.mode,
name=name,
line=dict(color=self.hex_to_rgb(color)),
line=dict(color=self.hex_to_rgb(color), width=_cfg.lwidth),
hovertemplate="prev.: %{x}<br>error: %{y:,.4f}",
)
]
@ -193,9 +220,9 @@ class PlotlyPlot(BasePlot):
x=x,
y=delta,
customdata=np.stack((counts[col_idx],), axis=-1),
mode="lines+markers",
mode=_cfg.mode,
name=name,
line=dict(color=self.hex_to_rgb(color)),
line=dict(color=self.hex_to_rgb(color), width=_cfg.lwidth),
hovertemplate="shift: %{x}<br>error: %{y}"
+ "<br>count: %{customdata[0]}"
if counts is not None
@ -205,3 +232,29 @@ class PlotlyPlot(BasePlot):
self.update_layout(fig, title, x_label, y_label)
return fig
def plot_fit_scores(
self,
train_prevs,
scores,
*,
pos_class=1,
title="default",
x_label="prev.",
y_label="position",
legend=True,
) -> go.Figure:
fig = go.Figure()
# x = train_prevs
x = [str(tuple(bp)) for bp in train_prevs]
fig.add_trace(
go.Scatter(
x=x,
y=scores,
mode="lines+markers",
showlegend=False,
),
)
self.update_layout(fig, title, x_label, y_label)
return fig

6775
remote.log

File diff suppressed because it is too large Load Diff

116
requirements.txt Normal file
View File

@ -0,0 +1,116 @@
abstention==0.1.3.1 ; python_version >= "3.10" and python_version < "4.0"
ansi2html==1.9.1 ; python_version >= "3.10" and python_version < "4.0"
appnope==0.1.3 ; python_version >= "3.10" and python_version < "4.0" and platform_system == "Darwin"
asttokens==2.4.1 ; python_version >= "3.10" and python_version < "4.0"
bcrypt==4.1.2 ; python_version >= "3.10" and python_version < "4.0"
bleach==6.1.0 ; python_version >= "3.10" and python_version < "4.0"
blinker==1.7.0 ; python_version >= "3.10" and python_version < "4.0"
bokeh==3.3.4 ; python_version >= "3.10" and python_version < "4.0"
certifi==2023.11.17 ; python_version >= "3.10" and python_version < "4.0"
cffi==1.16.0 ; python_version >= "3.10" and python_version < "4.0"
charset-normalizer==3.3.2 ; python_version >= "3.10" and python_version < "4.0"
click==8.1.7 ; python_version >= "3.10" and python_version < "4.0"
colorama==0.4.6 ; python_version >= "3.10" and python_version < "4" and sys_platform == "win32" or python_version >= "3.10" and python_version < "4" and platform_system == "Windows"
comm==0.2.1 ; python_version >= "3.10" and python_version < "4.0"
contourpy==1.2.0 ; python_version >= "3.10" and python_version < "4"
coverage[toml]==7.4.1 ; python_version >= "3.10" and python_version < "4.0"
cryptography==42.0.1 ; python_version >= "3.10" and python_version < "4.0"
cycler==0.12.1 ; python_version >= "3.10" and python_version < "4"
dash-bootstrap-components==1.5.0 ; python_version >= "3.10" and python_version < "4"
dash-core-components==2.0.0 ; python_version >= "3.10" and python_version < "4.0"
dash-html-components==2.0.0 ; python_version >= "3.10" and python_version < "4.0"
dash-table==5.0.0 ; python_version >= "3.10" and python_version < "4.0"
dash==2.14.2 ; python_version >= "3.10" and python_version < "4.0"
debugpy==1.8.0 ; python_version >= "3.10" and python_version < "4.0"
decorator==5.1.1 ; python_version >= "3.10" and python_version < "4.0"
exceptiongroup==1.2.0 ; python_version >= "3.10" and python_version < "3.11"
executing==2.0.1 ; python_version >= "3.10" and python_version < "4.0"
flask==3.0.1 ; python_version >= "3.10" and python_version < "4.0"
fonttools==4.47.2 ; python_version >= "3.10" and python_version < "4"
gunicorn==21.2.0 ; python_version >= "3.10" and python_version < "4.0"
idna==3.6 ; python_version >= "3.10" and python_version < "4.0"
importlib-metadata==7.0.1 ; python_version >= "3.10" and python_version < "4.0"
iniconfig==2.0.0 ; python_version >= "3.10" and python_version < "4.0"
ipykernel==6.29.0 ; python_version >= "3.10" and python_version < "4.0"
ipympl==0.9.3 ; python_version >= "3.10" and python_version < "4.0"
ipython-genutils==0.2.0 ; python_version >= "3.10" and python_version < "4.0"
ipython==8.20.0 ; python_version >= "3.10" and python_version < "4.0"
ipywidgets-bokeh==1.5.0 ; python_version >= "3.10" and python_version < "4.0"
ipywidgets==8.1.1 ; python_version >= "3.10" and python_version < "4.0"
itsdangerous==2.1.2 ; python_version >= "3.10" and python_version < "4.0"
jedi==0.19.1 ; python_version >= "3.10" and python_version < "4.0"
jinja2==3.1.3 ; python_version >= "3.10" and python_version < "4.0"
joblib==1.3.2 ; python_version >= "3.10" and python_version < "4"
jupyter-client==8.6.0 ; python_version >= "3.10" and python_version < "4.0"
jupyter-core==5.7.1 ; python_version >= "3.10" and python_version < "4.0"
jupyterlab-widgets==3.0.9 ; python_version >= "3.10" and python_version < "4.0"
kiwisolver==1.4.5 ; python_version >= "3.10" and python_version < "4"
linkify-it-py==2.0.2 ; python_version >= "3.10" and python_version < "4.0"
logging==0.4.9.6 ; python_version >= "3.10" and python_version < "4.0"
markdown-it-py==3.0.0 ; python_version >= "3.10" and python_version < "4.0"
markdown==3.5.2 ; python_version >= "3.10" and python_version < "4.0"
markupsafe==2.1.4 ; python_version >= "3.10" and python_version < "4.0"
matplotlib-inline==0.1.6 ; python_version >= "3.10" and python_version < "4.0"
matplotlib==3.8.2 ; python_version >= "3.10" and python_version < "4"
mdit-py-plugins==0.4.0 ; python_version >= "3.10" and python_version < "4.0"
mdurl==0.1.2 ; python_version >= "3.10" and python_version < "4.0"
nest-asyncio==1.6.0 ; python_version >= "3.10" and python_version < "4.0"
numpy==1.26.3 ; python_version >= "3.10" and python_version < "4.0"
packaging==23.2 ; python_version >= "3.10" and python_version < "4.0"
pandas-stubs==2.1.4.231227 ; python_version >= "3.10" and python_version < "4.0"
pandas==2.2.0 ; python_version >= "3.10" and python_version < "4.0"
panel==1.3.8 ; python_version >= "3.10" and python_version < "4.0"
param==2.0.2 ; python_version >= "3.10" and python_version < "4.0"
paramiko==3.4.0 ; python_version >= "3.10" and python_version < "4.0"
parso==0.8.3 ; python_version >= "3.10" and python_version < "4.0"
pexpect==4.9.0 ; python_version >= "3.10" and python_version < "4.0" and sys_platform != "win32"
pillow==10.2.0 ; python_version >= "3.10" and python_version < "4.0"
platformdirs==4.1.0 ; python_version >= "3.10" and python_version < "4.0"
plotly==5.18.0 ; python_version >= "3.10" and python_version < "4.0"
pluggy==1.4.0 ; python_version >= "3.10" and python_version < "4.0"
prompt-toolkit==3.0.43 ; python_version >= "3.10" and python_version < "4.0"
psutil==5.9.8 ; python_version >= "3.10" and python_version < "4.0"
ptyprocess==0.7.0 ; python_version >= "3.10" and python_version < "4.0" and sys_platform != "win32"
pure-eval==0.2.2 ; python_version >= "3.10" and python_version < "4.0"
pyarrow==15.0.0 ; python_version >= "3.10" and python_version < "4.0"
pycparser==2.21 ; python_version >= "3.10" and python_version < "4.0"
pygments==2.17.2 ; python_version >= "3.10" and python_version < "4.0"
pylance==0.5.10 ; python_version >= "3.10" and python_version < "4.0"
pynacl==1.5.0 ; python_version >= "3.10" and python_version < "4.0"
pyparsing==3.1.1 ; python_version >= "3.10" and python_version < "4"
pytest-cov==4.1.0 ; python_version >= "3.10" and python_version < "4.0"
pytest-mock==3.12.0 ; python_version >= "3.10" and python_version < "4.0"
pytest==8.0.0 ; python_version >= "3.10" and python_version < "4.0"
python-dateutil==2.8.2 ; python_version >= "3.10" and python_version < "4.0"
pytz==2023.4 ; python_version >= "3.10" and python_version < "4.0"
pyviz-comms==3.0.1 ; python_version >= "3.10" and python_version < "4.0"
pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.10" and python_version < "4.0"
pyyaml==6.0.1 ; python_version >= "3.10" and python_version < "4.0"
pyzmq==25.1.2 ; python_version >= "3.10" and python_version < "4.0"
quapy==0.1.7 ; python_version >= "3.10" and python_version < "4"
requests==2.31.0 ; python_version >= "3.10" and python_version < "4.0"
retrying==1.3.4 ; python_version >= "3.10" and python_version < "4.0"
scikit-learn==1.4.0 ; python_version >= "3.10" and python_version < "4"
scipy==1.12.0 ; python_version >= "3.10" and python_version < "4.0"
setuptools==69.0.3 ; python_version >= "3.10" and python_version < "4.0"
six==1.16.0 ; python_version >= "3.10" and python_version < "4.0"
stack-data==0.6.3 ; python_version >= "3.10" and python_version < "4.0"
tabulate==0.9.0 ; python_version >= "3.10" and python_version < "4.0"
tenacity==8.2.3 ; python_version >= "3.10" and python_version < "4.0"
threadpoolctl==3.2.0 ; python_version >= "3.10" and python_version < "4"
tomli==2.0.1 ; python_version >= "3.10" and python_full_version <= "3.11.0a6"
tornado==6.4 ; python_version >= "3.10" and python_version < "4.0"
tqdm==4.66.1 ; python_version >= "3.10" and python_version < "4"
traitlets==5.14.1 ; python_version >= "3.10" and python_version < "4.0"
types-pytz==2023.4.0.20240130 ; python_version >= "3.10" and python_version < "4.0"
typing-extensions==4.9.0 ; python_version >= "3.10" and python_version < "4.0"
tzdata==2023.4 ; python_version >= "3.10" and python_version < "4.0"
uc-micro-py==1.0.2 ; python_version >= "3.10" and python_version < "4.0"
urllib3==2.1.0 ; python_version >= "3.10" and python_version < "4.0"
wcwidth==0.2.13 ; python_version >= "3.10" and python_version < "4.0"
webencodings==0.5.1 ; python_version >= "3.10" and python_version < "4.0"
werkzeug==3.0.1 ; python_version >= "3.10" and python_version < "4.0"
widgetsnbextension==4.0.9 ; python_version >= "3.10" and python_version < "4.0"
xlrd==2.0.1 ; python_version >= "3.10" and python_version < "4"
xyzservices==2023.10.1 ; python_version >= "3.10" and python_version < "4.0"
zipp==3.17.0 ; python_version >= "3.10" and python_version < "4.0"

9
test_postprocess.py Normal file
View File

@ -0,0 +1,9 @@
from quacc.evaluation.report import DatasetReport
dr = DatasetReport.unpickle("output/main/imdb/imdb.pickle")
_estimators = ["sld_lr_gs", "bin_sld_lr_gs", "mul_sld_lr_gs", "m3w_sld_lr_gs"]
_data = dr.data(metric="acc", estimators=_estimators)
for idx, cr in zip(_data.index.unique(0), dr.crs[::-1]):
print(cr.train_prev)
print({k: v for k, v in cr.fit_scores.items() if k in _estimators})
print(_data.loc[(idx, slice(None), slice(None)), :])