cleaning
This commit is contained in:
parent
af0f1c7085
commit
4c6a5e69f3
|
@ -7,6 +7,7 @@ quavenv/*
|
||||||
*__pycache__*
|
*__pycache__*
|
||||||
htmlcov/*
|
htmlcov/*
|
||||||
|
|
||||||
|
accuracy_prediction*.py
|
||||||
test*.py
|
test*.py
|
||||||
selected_gs.py
|
selected_gs.py
|
||||||
|
|
||||||
|
|
|
@ -1,90 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
from sklearn.linear_model import LogisticRegression
|
|
||||||
from sklearn.metrics import f1_score
|
|
||||||
|
|
||||||
import quapy as qp
|
|
||||||
from method.kdey import KDEyML, KDEyCS, KDEyHD
|
|
||||||
from quapy.protocol import APP
|
|
||||||
from quapy.method.aggregative import PACC, ACC, EMQ, PCC, CC, DMy
|
|
||||||
|
|
||||||
datasets = qp.datasets.UCI_DATASETS
|
|
||||||
|
|
||||||
# target = 'f1'
|
|
||||||
target = 'acc'
|
|
||||||
|
|
||||||
errors = []
|
|
||||||
|
|
||||||
# dataset_name = datasets[-2]
|
|
||||||
for dataset_name in datasets:
|
|
||||||
if dataset_name in ['balance.2', 'acute.a', 'acute.b', 'iris.1']:
|
|
||||||
continue
|
|
||||||
train, test = qp.datasets.fetch_UCIDataset(dataset_name).train_test
|
|
||||||
|
|
||||||
print(f'dataset name = {dataset_name}')
|
|
||||||
print(f'#train = {len(train)}')
|
|
||||||
print(f'#test = {len(test)}')
|
|
||||||
|
|
||||||
cls = LogisticRegression()
|
|
||||||
|
|
||||||
train, val = train.split_stratified(random_state=0)
|
|
||||||
|
|
||||||
|
|
||||||
cls.fit(*train.Xy)
|
|
||||||
y_val = val.labels
|
|
||||||
y_hat_val = cls.predict(val.instances)
|
|
||||||
|
|
||||||
for sample in APP(test, n_prevalences=11, repeats=1, sample_size=100, return_type='labelled_collection')():
|
|
||||||
print('='*80)
|
|
||||||
y_hat = cls.predict(sample.instances)
|
|
||||||
y = sample.labels
|
|
||||||
if target == 'acc':
|
|
||||||
acc = (y_hat==y).mean()
|
|
||||||
else:
|
|
||||||
acc = f1_score(y, y_hat, zero_division=0)
|
|
||||||
|
|
||||||
q = EMQ(cls)
|
|
||||||
q.fit(train, fit_classifier=False)
|
|
||||||
|
|
||||||
# q = EMQ(cls)
|
|
||||||
# q.fit(train, val_split=val, fit_classifier=False)
|
|
||||||
M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
|
|
||||||
M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
|
|
||||||
p_hat = q.quantify(sample.instances)
|
|
||||||
cont_table_hat = p_hat * M_hat
|
|
||||||
|
|
||||||
tp = cont_table_hat[1,1]
|
|
||||||
tn = cont_table_hat[0,0]
|
|
||||||
fn = cont_table_hat[0,1]
|
|
||||||
fp = cont_table_hat[1,0]
|
|
||||||
|
|
||||||
if target == 'acc':
|
|
||||||
acc_hat = (tp+tn)
|
|
||||||
else:
|
|
||||||
den = (2*tp + fn + fp)
|
|
||||||
if den > 0:
|
|
||||||
acc_hat = 2*tp / den
|
|
||||||
else:
|
|
||||||
acc_hat = 0
|
|
||||||
|
|
||||||
error = abs(acc - acc_hat)
|
|
||||||
errors.append(error)
|
|
||||||
|
|
||||||
print('true_prev: ', sample.prevalence())
|
|
||||||
print('estim_prev: ', p_hat)
|
|
||||||
print('M-true:\n', M_true)
|
|
||||||
print('M-hat:\n', M_hat)
|
|
||||||
print('cont_table:\n', cont_table_hat)
|
|
||||||
print(f'classifier accuracy={acc:.3f}')
|
|
||||||
print(f'estimated accuracy={acc_hat:.3f}')
|
|
||||||
print(f'estimation error={error:.4f}')
|
|
||||||
|
|
||||||
print('process end')
|
|
||||||
print('='*80)
|
|
||||||
print(f'mean error = {np.mean(errors)}')
|
|
||||||
print(f'std error = {np.std(errors)}')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,269 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
import scipy.special
|
|
||||||
from sklearn.linear_model import LogisticRegression
|
|
||||||
from sklearn.metrics import f1_score
|
|
||||||
|
|
||||||
import quapy as qp
|
|
||||||
from quapy.protocol import APP
|
|
||||||
from quapy.method.aggregative import PACC, ACC, EMQ, PCC, CC, DMy, T50, MS2, KDEyML, KDEyCS, KDEyHD
|
|
||||||
from sklearn import clone
|
|
||||||
import quapy.functional as F
|
|
||||||
|
|
||||||
# datasets = qp.datasets.UCI_DATASETS
|
|
||||||
datasets = ['imdb']
|
|
||||||
|
|
||||||
# target = 'f1'
|
|
||||||
target = 'acc'
|
|
||||||
|
|
||||||
errors = []
|
|
||||||
|
|
||||||
def method_1(cls, train, val, sample, y=None, y_hat=None):
|
|
||||||
"""
|
|
||||||
Converts a misclassification matrix computed in validation (i.e., in the train distribution P) into
|
|
||||||
the corresponding equivalent misclassification matrix in test (i.e., in the test distribution Q)
|
|
||||||
by relying on the PPS assumptions.
|
|
||||||
|
|
||||||
:return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
|
|
||||||
"""
|
|
||||||
|
|
||||||
y_val = val.labels
|
|
||||||
y_hat_val = cls.predict(val.instances)
|
|
||||||
|
|
||||||
# q = EMQ(LogisticRegression(class_weight='balanced'))
|
|
||||||
# q.fit(val, fit_classifier=True)
|
|
||||||
q = EMQ(cls)
|
|
||||||
q.fit(train, fit_classifier=False)
|
|
||||||
|
|
||||||
|
|
||||||
# q = KDEyML(cls)
|
|
||||||
# q.fit(train, val_split=val, fit_classifier=False)
|
|
||||||
M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
|
|
||||||
M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
|
|
||||||
p_hat = q.quantify(sample.instances)
|
|
||||||
cont_table_hat = p_hat * M_hat
|
|
||||||
# cont_table_hat = np.clip(cont_table_hat, 0, 1)
|
|
||||||
# cont_table_hat = cont_table_hat / cont_table_hat.sum()
|
|
||||||
|
|
||||||
print('true_prev: ', sample.prevalence())
|
|
||||||
print('estim_prev: ', p_hat)
|
|
||||||
print('M-true:\n', M_true)
|
|
||||||
print('M-hat:\n', M_hat)
|
|
||||||
print('cont_table:\n', cont_table_hat)
|
|
||||||
print('cont_table Sum :\n', cont_table_hat.sum())
|
|
||||||
|
|
||||||
tp = cont_table_hat[1, 1]
|
|
||||||
tn = cont_table_hat[0, 0]
|
|
||||||
fn = cont_table_hat[0, 1]
|
|
||||||
fp = cont_table_hat[1, 0]
|
|
||||||
|
|
||||||
return tn, fn, fp, tp
|
|
||||||
|
|
||||||
|
|
||||||
def method_2(cls, train, val, sample, y=None, y_hat=None):
|
|
||||||
"""
|
|
||||||
Assume P and Q are the training and test distributions
|
|
||||||
Solves the following system of linear equations:
|
|
||||||
tp + fp = CC (the classify & count estimate, observed)
|
|
||||||
fn + tp = Q(Y=1) (this is not observed but is estimated via quantification)
|
|
||||||
tp + fp + fn + tn = 1 (trivial)
|
|
||||||
|
|
||||||
There are 4 unknowns and 3 equations. The fourth required one is established
|
|
||||||
by assuming that the PPS conditions hold, i.e., that P(X|Y)=Q(X|Y); note that
|
|
||||||
this implies P(hatY|Y)=Q(hatY|Y) if hatY is computed by any measurable function.
|
|
||||||
In particular, we consider that the tpr in P (estimated via validation, hereafter tpr) and
|
|
||||||
in Q (unknown, hereafter tpr_Q) should
|
|
||||||
be the same. This means:
|
|
||||||
tpr = tpr_Q = tp / (tp + fn)
|
|
||||||
after some manipulation:
|
|
||||||
tp (tpr-1) + fn (tpr) = 0 <-- our last equation
|
|
||||||
|
|
||||||
Note that the last equation relies on the estimate tpr. It is likely that, the more
|
|
||||||
positives we have, the more reliable this estimate is. This suggests that, in cases
|
|
||||||
in which we have more negatives in the validation set than positives, it might be
|
|
||||||
convenient to resort to the true negative rate (tnr) instead. This gives rise to
|
|
||||||
the alternative fourth equation:
|
|
||||||
tn (tnr-1) + fp (tnr) = 0
|
|
||||||
|
|
||||||
:return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
|
|
||||||
"""
|
|
||||||
|
|
||||||
y_val = val.labels
|
|
||||||
y_hat_val = cls.predict(val.instances)
|
|
||||||
|
|
||||||
q = ACC(cls)
|
|
||||||
q.fit(train, val_split=val, fit_classifier=False)
|
|
||||||
p_hat = q.quantify(sample.instances)
|
|
||||||
pos_prev = p_hat[1]
|
|
||||||
# pos_prev = sample.prevalence()[1]
|
|
||||||
|
|
||||||
cc = CC(cls)
|
|
||||||
cc.fit(train, fit_classifier=False)
|
|
||||||
cc_prev = cc.quantify(sample.instances)[1]
|
|
||||||
|
|
||||||
M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
|
|
||||||
M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
|
|
||||||
cont_table_true = sample.prevalence() * M_true
|
|
||||||
|
|
||||||
if val.prevalence()[1] > 0.5:
|
|
||||||
|
|
||||||
# in this case, the tpr might be a more reliable estimate than tnr
|
|
||||||
tpr_hat = M_hat[1, 1]
|
|
||||||
|
|
||||||
A = np.asarray([
|
|
||||||
[0, 0, 1, 1],
|
|
||||||
[0, 1, 0, 1],
|
|
||||||
[1, 1, 1, 1],
|
|
||||||
[0, tpr_hat, 0, tpr_hat - 1]
|
|
||||||
])
|
|
||||||
|
|
||||||
else:
|
|
||||||
|
|
||||||
# in this case, the tnr might be a more reliable estimate than tpr
|
|
||||||
tnr_hat = M_hat[0, 0]
|
|
||||||
|
|
||||||
A = np.asarray([
|
|
||||||
[0, 0, 1, 1],
|
|
||||||
[0, 1, 0, 1],
|
|
||||||
[1, 1, 1, 1],
|
|
||||||
[tnr_hat-1, 0, tnr_hat, 0]
|
|
||||||
])
|
|
||||||
|
|
||||||
b = np.asarray(
|
|
||||||
[cc_prev, pos_prev, 1, 0]
|
|
||||||
)
|
|
||||||
|
|
||||||
tn, fn, fp, tp = np.linalg.solve(A, b)
|
|
||||||
|
|
||||||
cont_table_estim = np.asarray([
|
|
||||||
[tn, fn],
|
|
||||||
[fp, tp]
|
|
||||||
])
|
|
||||||
|
|
||||||
# if (cont_table_estim < 0).any() or (cont_table_estim>1).any():
|
|
||||||
# cont_table_estim = scipy.special.softmax(cont_table_estim)
|
|
||||||
|
|
||||||
print('true_prev: ', sample.prevalence())
|
|
||||||
print('estim_prev: ', p_hat)
|
|
||||||
print('true_cont_table:\n', cont_table_true)
|
|
||||||
print('estim_cont_table:\n', cont_table_estim)
|
|
||||||
# print('true_tpr', M_true[1,1])
|
|
||||||
# print('estim_tpr', tpr_hat)
|
|
||||||
|
|
||||||
|
|
||||||
return tn, fn, fp, tp
|
|
||||||
|
|
||||||
|
|
||||||
def method_3(cls, train, val, sample, y=None, y_hat=None):
|
|
||||||
"""
|
|
||||||
This is just method 2 but without involving any quapy's quantifier.
|
|
||||||
|
|
||||||
:return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1
|
|
||||||
"""
|
|
||||||
|
|
||||||
classes = val.classes_
|
|
||||||
y_val = val.labels
|
|
||||||
y_hat_val = cls.predict(val.instances)
|
|
||||||
M_hat = ACC.getPteCondEstim(classes, y_val, y_hat_val)
|
|
||||||
y_hat_test = cls.predict(sample.instances)
|
|
||||||
pos_prev_cc = F.prevalence_from_labels(y_hat_test, classes)[1]
|
|
||||||
tpr_hat = M_hat[1,1]
|
|
||||||
fpr_hat = M_hat[1,0]
|
|
||||||
tnr_hat = M_hat[0,0]
|
|
||||||
pos_prev_test_hat = (pos_prev_cc - fpr_hat) / (tpr_hat - fpr_hat)
|
|
||||||
pos_prev_test_hat = np.clip(pos_prev_test_hat, 0, 1)
|
|
||||||
pos_prev_val = val.prevalence()[1]
|
|
||||||
|
|
||||||
if pos_prev_val > 0.5:
|
|
||||||
# in this case, the tpr might be a more reliable estimate than tnr
|
|
||||||
A = np.asarray([
|
|
||||||
[0, 0, 1, 1],
|
|
||||||
[0, 1, 0, 1],
|
|
||||||
[1, 1, 1, 1],
|
|
||||||
[0, tpr_hat, 0, tpr_hat - 1]
|
|
||||||
])
|
|
||||||
else:
|
|
||||||
# in this case, the tnr might be a more reliable estimate than tpr
|
|
||||||
A = np.asarray([
|
|
||||||
[0, 0, 1, 1],
|
|
||||||
[0, 1, 0, 1],
|
|
||||||
[1, 1, 1, 1],
|
|
||||||
[tnr_hat-1, 0, tnr_hat, 0]
|
|
||||||
])
|
|
||||||
|
|
||||||
b = np.asarray(
|
|
||||||
[pos_prev_cc, pos_prev_test_hat, 1, 0]
|
|
||||||
)
|
|
||||||
|
|
||||||
tn, fn, fp, tp = np.linalg.solve(A, b)
|
|
||||||
|
|
||||||
return tn, fn, fp, tp
|
|
||||||
|
|
||||||
|
|
||||||
def cls_eval_from_counters(tn, fn, fp, tp):
|
|
||||||
if target == 'acc':
|
|
||||||
acc_hat = (tp + tn)
|
|
||||||
else:
|
|
||||||
den = (2 * tp + fn + fp)
|
|
||||||
if den > 0:
|
|
||||||
acc_hat = 2 * tp / den
|
|
||||||
else:
|
|
||||||
acc_hat = 0
|
|
||||||
return acc_hat
|
|
||||||
|
|
||||||
|
|
||||||
def cls_eval_from_labels(y, y_hat):
|
|
||||||
if target == 'acc':
|
|
||||||
acc = (y_hat == y).mean()
|
|
||||||
else:
|
|
||||||
acc = f1_score(y, y_hat, zero_division=0)
|
|
||||||
return acc
|
|
||||||
|
|
||||||
|
|
||||||
for dataset_name in datasets:
|
|
||||||
|
|
||||||
train_orig, test = qp.datasets.fetch_reviews(dataset_name, tfidf=True, min_df=10).train_test
|
|
||||||
|
|
||||||
train_prot = APP(train_orig, n_prevalences=11, repeats=1, return_type='labelled_collection', random_state=0, sample_size=10000)
|
|
||||||
for train in train_prot():
|
|
||||||
if np.product(train.prevalence()) == 0:
|
|
||||||
# skip experiments with no positives or no negatives in training
|
|
||||||
continue
|
|
||||||
|
|
||||||
cls = LogisticRegression(class_weight='balanced')
|
|
||||||
|
|
||||||
train, val = train.split_stratified(train_prop=0.5, random_state=0)
|
|
||||||
|
|
||||||
print(f'dataset name = {dataset_name}')
|
|
||||||
print(f'#train = {len(train)}, prev={F.strprev(train.prevalence())}')
|
|
||||||
print(f'#val = {len(val)}, prev={F.strprev(val.prevalence())}')
|
|
||||||
print(f'#test = {len(test)}, prev={F.strprev(test.prevalence())}')
|
|
||||||
|
|
||||||
cls.fit(*train.Xy)
|
|
||||||
|
|
||||||
for sample in APP(test, n_prevalences=21, repeats=10, sample_size=1000, return_type='labelled_collection')():
|
|
||||||
print('='*80)
|
|
||||||
y_hat = cls.predict(sample.instances)
|
|
||||||
y = sample.labels
|
|
||||||
acc_true = cls_eval_from_labels(y, y_hat)
|
|
||||||
|
|
||||||
tn, fn, fp, tp = method_3(cls, train, val, sample, y, y_hat)
|
|
||||||
|
|
||||||
acc_hat = cls_eval_from_counters(tn, fn, fp, tp)
|
|
||||||
|
|
||||||
error = abs(acc_true - acc_hat)
|
|
||||||
errors.append(error)
|
|
||||||
|
|
||||||
print(f'classifier accuracy={acc_true:.3f}')
|
|
||||||
print(f'estimated accuracy={acc_hat:.3f}')
|
|
||||||
print(f'estimation error={error:.4f}')
|
|
||||||
|
|
||||||
print('process end')
|
|
||||||
print('='*80)
|
|
||||||
print(f'mean error = {np.mean(errors)}')
|
|
||||||
print(f'std error = {np.std(errors)}')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue