91 lines
2.4 KiB
Python
91 lines
2.4 KiB
Python
import numpy as np
|
|
from sklearn.linear_model import LogisticRegression
|
|
from sklearn.metrics import f1_score
|
|
|
|
import quapy as qp
|
|
from method.kdey import KDEyML, KDEyCS, KDEyHD
|
|
from quapy.protocol import APP
|
|
from quapy.method.aggregative import PACC, ACC, EMQ, PCC, CC, DMy
|
|
|
|
datasets = qp.datasets.UCI_DATASETS
|
|
|
|
# target = 'f1'
|
|
target = 'acc'
|
|
|
|
errors = []
|
|
|
|
# dataset_name = datasets[-2]
|
|
for dataset_name in datasets:
|
|
if dataset_name in ['balance.2', 'acute.a', 'acute.b', 'iris.1']:
|
|
continue
|
|
train, test = qp.datasets.fetch_UCIDataset(dataset_name).train_test
|
|
|
|
print(f'dataset name = {dataset_name}')
|
|
print(f'#train = {len(train)}')
|
|
print(f'#test = {len(test)}')
|
|
|
|
cls = LogisticRegression()
|
|
|
|
train, val = train.split_stratified(random_state=0)
|
|
|
|
|
|
cls.fit(*train.Xy)
|
|
y_val = val.labels
|
|
y_hat_val = cls.predict(val.instances)
|
|
|
|
for sample in APP(test, n_prevalences=11, repeats=1, sample_size=100, return_type='labelled_collection')():
|
|
print('='*80)
|
|
y_hat = cls.predict(sample.instances)
|
|
y = sample.labels
|
|
if target == 'acc':
|
|
acc = (y_hat==y).mean()
|
|
else:
|
|
acc = f1_score(y, y_hat, zero_division=0)
|
|
|
|
q = EMQ(cls)
|
|
q.fit(train, fit_classifier=False)
|
|
|
|
# q = EMQ(cls)
|
|
# q.fit(train, val_split=val, fit_classifier=False)
|
|
M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val)
|
|
M_true = ACC.getPteCondEstim(train.classes_, y, y_hat)
|
|
p_hat = q.quantify(sample.instances)
|
|
cont_table_hat = p_hat * M_hat
|
|
|
|
tp = cont_table_hat[1,1]
|
|
tn = cont_table_hat[0,0]
|
|
fn = cont_table_hat[0,1]
|
|
fp = cont_table_hat[1,0]
|
|
|
|
if target == 'acc':
|
|
acc_hat = (tp+tn)
|
|
else:
|
|
den = (2*tp + fn + fp)
|
|
if den > 0:
|
|
acc_hat = 2*tp / den
|
|
else:
|
|
acc_hat = 0
|
|
|
|
error = abs(acc - acc_hat)
|
|
errors.append(error)
|
|
|
|
print('true_prev: ', sample.prevalence())
|
|
print('estim_prev: ', p_hat)
|
|
print('M-true:\n', M_true)
|
|
print('M-hat:\n', M_hat)
|
|
print('cont_table:\n', cont_table_hat)
|
|
print(f'classifier accuracy={acc:.3f}')
|
|
print(f'estimated accuracy={acc_hat:.3f}')
|
|
print(f'estimation error={error:.4f}')
|
|
|
|
print('process end')
|
|
print('='*80)
|
|
print(f'mean error = {np.mean(errors)}')
|
|
print(f'std error = {np.std(errors)}')
|
|
|
|
|
|
|
|
|
|
|
|
|