diff --git a/.gitignore b/.gitignore index 0b97a44..fb056a4 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ quavenv/* *__pycache__* htmlcov/* +accuracy_prediction*.py test*.py selected_gs.py diff --git a/accuracy_prediction_via_quantification.py b/accuracy_prediction_via_quantification.py deleted file mode 100644 index 032b709..0000000 --- a/accuracy_prediction_via_quantification.py +++ /dev/null @@ -1,90 +0,0 @@ -import numpy as np -from sklearn.linear_model import LogisticRegression -from sklearn.metrics import f1_score - -import quapy as qp -from method.kdey import KDEyML, KDEyCS, KDEyHD -from quapy.protocol import APP -from quapy.method.aggregative import PACC, ACC, EMQ, PCC, CC, DMy - -datasets = qp.datasets.UCI_DATASETS - -# target = 'f1' -target = 'acc' - -errors = [] - -# dataset_name = datasets[-2] -for dataset_name in datasets: - if dataset_name in ['balance.2', 'acute.a', 'acute.b', 'iris.1']: - continue - train, test = qp.datasets.fetch_UCIDataset(dataset_name).train_test - - print(f'dataset name = {dataset_name}') - print(f'#train = {len(train)}') - print(f'#test = {len(test)}') - - cls = LogisticRegression() - - train, val = train.split_stratified(random_state=0) - - - cls.fit(*train.Xy) - y_val = val.labels - y_hat_val = cls.predict(val.instances) - - for sample in APP(test, n_prevalences=11, repeats=1, sample_size=100, return_type='labelled_collection')(): - print('='*80) - y_hat = cls.predict(sample.instances) - y = sample.labels - if target == 'acc': - acc = (y_hat==y).mean() - else: - acc = f1_score(y, y_hat, zero_division=0) - - q = EMQ(cls) - q.fit(train, fit_classifier=False) - - # q = EMQ(cls) - # q.fit(train, val_split=val, fit_classifier=False) - M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val) - M_true = ACC.getPteCondEstim(train.classes_, y, y_hat) - p_hat = q.quantify(sample.instances) - cont_table_hat = p_hat * M_hat - - tp = cont_table_hat[1,1] - tn = cont_table_hat[0,0] - fn = cont_table_hat[0,1] - fp = cont_table_hat[1,0] - - if target == 'acc': - acc_hat = (tp+tn) - else: - den = (2*tp + fn + fp) - if den > 0: - acc_hat = 2*tp / den - else: - acc_hat = 0 - - error = abs(acc - acc_hat) - errors.append(error) - - print('true_prev: ', sample.prevalence()) - print('estim_prev: ', p_hat) - print('M-true:\n', M_true) - print('M-hat:\n', M_hat) - print('cont_table:\n', cont_table_hat) - print(f'classifier accuracy={acc:.3f}') - print(f'estimated accuracy={acc_hat:.3f}') - print(f'estimation error={error:.4f}') - -print('process end') -print('='*80) -print(f'mean error = {np.mean(errors)}') -print(f'std error = {np.std(errors)}') - - - - - - diff --git a/accuracy_prediction_via_quantification2.py b/accuracy_prediction_via_quantification2.py deleted file mode 100644 index 8c3e87b..0000000 --- a/accuracy_prediction_via_quantification2.py +++ /dev/null @@ -1,269 +0,0 @@ -import numpy as np -import scipy.special -from sklearn.linear_model import LogisticRegression -from sklearn.metrics import f1_score - -import quapy as qp -from quapy.protocol import APP -from quapy.method.aggregative import PACC, ACC, EMQ, PCC, CC, DMy, T50, MS2, KDEyML, KDEyCS, KDEyHD -from sklearn import clone -import quapy.functional as F - -# datasets = qp.datasets.UCI_DATASETS -datasets = ['imdb'] - -# target = 'f1' -target = 'acc' - -errors = [] - -def method_1(cls, train, val, sample, y=None, y_hat=None): - """ - Converts a misclassification matrix computed in validation (i.e., in the train distribution P) into - the corresponding equivalent misclassification matrix in test (i.e., in the test distribution Q) - by relying on the PPS assumptions. - - :return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1 - """ - - y_val = val.labels - y_hat_val = cls.predict(val.instances) - - # q = EMQ(LogisticRegression(class_weight='balanced')) - # q.fit(val, fit_classifier=True) - q = EMQ(cls) - q.fit(train, fit_classifier=False) - - - # q = KDEyML(cls) - # q.fit(train, val_split=val, fit_classifier=False) - M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val) - M_true = ACC.getPteCondEstim(train.classes_, y, y_hat) - p_hat = q.quantify(sample.instances) - cont_table_hat = p_hat * M_hat - # cont_table_hat = np.clip(cont_table_hat, 0, 1) - # cont_table_hat = cont_table_hat / cont_table_hat.sum() - - print('true_prev: ', sample.prevalence()) - print('estim_prev: ', p_hat) - print('M-true:\n', M_true) - print('M-hat:\n', M_hat) - print('cont_table:\n', cont_table_hat) - print('cont_table Sum :\n', cont_table_hat.sum()) - - tp = cont_table_hat[1, 1] - tn = cont_table_hat[0, 0] - fn = cont_table_hat[0, 1] - fp = cont_table_hat[1, 0] - - return tn, fn, fp, tp - - -def method_2(cls, train, val, sample, y=None, y_hat=None): - """ - Assume P and Q are the training and test distributions - Solves the following system of linear equations: - tp + fp = CC (the classify & count estimate, observed) - fn + tp = Q(Y=1) (this is not observed but is estimated via quantification) - tp + fp + fn + tn = 1 (trivial) - - There are 4 unknowns and 3 equations. The fourth required one is established - by assuming that the PPS conditions hold, i.e., that P(X|Y)=Q(X|Y); note that - this implies P(hatY|Y)=Q(hatY|Y) if hatY is computed by any measurable function. - In particular, we consider that the tpr in P (estimated via validation, hereafter tpr) and - in Q (unknown, hereafter tpr_Q) should - be the same. This means: - tpr = tpr_Q = tp / (tp + fn) - after some manipulation: - tp (tpr-1) + fn (tpr) = 0 <-- our last equation - - Note that the last equation relies on the estimate tpr. It is likely that, the more - positives we have, the more reliable this estimate is. This suggests that, in cases - in which we have more negatives in the validation set than positives, it might be - convenient to resort to the true negative rate (tnr) instead. This gives rise to - the alternative fourth equation: - tn (tnr-1) + fp (tnr) = 0 - - :return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1 - """ - - y_val = val.labels - y_hat_val = cls.predict(val.instances) - - q = ACC(cls) - q.fit(train, val_split=val, fit_classifier=False) - p_hat = q.quantify(sample.instances) - pos_prev = p_hat[1] - # pos_prev = sample.prevalence()[1] - - cc = CC(cls) - cc.fit(train, fit_classifier=False) - cc_prev = cc.quantify(sample.instances)[1] - - M_hat = ACC.getPteCondEstim(train.classes_, y_val, y_hat_val) - M_true = ACC.getPteCondEstim(train.classes_, y, y_hat) - cont_table_true = sample.prevalence() * M_true - - if val.prevalence()[1] > 0.5: - - # in this case, the tpr might be a more reliable estimate than tnr - tpr_hat = M_hat[1, 1] - - A = np.asarray([ - [0, 0, 1, 1], - [0, 1, 0, 1], - [1, 1, 1, 1], - [0, tpr_hat, 0, tpr_hat - 1] - ]) - - else: - - # in this case, the tnr might be a more reliable estimate than tpr - tnr_hat = M_hat[0, 0] - - A = np.asarray([ - [0, 0, 1, 1], - [0, 1, 0, 1], - [1, 1, 1, 1], - [tnr_hat-1, 0, tnr_hat, 0] - ]) - - b = np.asarray( - [cc_prev, pos_prev, 1, 0] - ) - - tn, fn, fp, tp = np.linalg.solve(A, b) - - cont_table_estim = np.asarray([ - [tn, fn], - [fp, tp] - ]) - - # if (cont_table_estim < 0).any() or (cont_table_estim>1).any(): - # cont_table_estim = scipy.special.softmax(cont_table_estim) - - print('true_prev: ', sample.prevalence()) - print('estim_prev: ', p_hat) - print('true_cont_table:\n', cont_table_true) - print('estim_cont_table:\n', cont_table_estim) - # print('true_tpr', M_true[1,1]) - # print('estim_tpr', tpr_hat) - - - return tn, fn, fp, tp - - -def method_3(cls, train, val, sample, y=None, y_hat=None): - """ - This is just method 2 but without involving any quapy's quantifier. - - :return: tuple (tn, fn, fp, tp,) of floats in [0,1] summing up to 1 - """ - - classes = val.classes_ - y_val = val.labels - y_hat_val = cls.predict(val.instances) - M_hat = ACC.getPteCondEstim(classes, y_val, y_hat_val) - y_hat_test = cls.predict(sample.instances) - pos_prev_cc = F.prevalence_from_labels(y_hat_test, classes)[1] - tpr_hat = M_hat[1,1] - fpr_hat = M_hat[1,0] - tnr_hat = M_hat[0,0] - pos_prev_test_hat = (pos_prev_cc - fpr_hat) / (tpr_hat - fpr_hat) - pos_prev_test_hat = np.clip(pos_prev_test_hat, 0, 1) - pos_prev_val = val.prevalence()[1] - - if pos_prev_val > 0.5: - # in this case, the tpr might be a more reliable estimate than tnr - A = np.asarray([ - [0, 0, 1, 1], - [0, 1, 0, 1], - [1, 1, 1, 1], - [0, tpr_hat, 0, tpr_hat - 1] - ]) - else: - # in this case, the tnr might be a more reliable estimate than tpr - A = np.asarray([ - [0, 0, 1, 1], - [0, 1, 0, 1], - [1, 1, 1, 1], - [tnr_hat-1, 0, tnr_hat, 0] - ]) - - b = np.asarray( - [pos_prev_cc, pos_prev_test_hat, 1, 0] - ) - - tn, fn, fp, tp = np.linalg.solve(A, b) - - return tn, fn, fp, tp - - -def cls_eval_from_counters(tn, fn, fp, tp): - if target == 'acc': - acc_hat = (tp + tn) - else: - den = (2 * tp + fn + fp) - if den > 0: - acc_hat = 2 * tp / den - else: - acc_hat = 0 - return acc_hat - - -def cls_eval_from_labels(y, y_hat): - if target == 'acc': - acc = (y_hat == y).mean() - else: - acc = f1_score(y, y_hat, zero_division=0) - return acc - - -for dataset_name in datasets: - - train_orig, test = qp.datasets.fetch_reviews(dataset_name, tfidf=True, min_df=10).train_test - - train_prot = APP(train_orig, n_prevalences=11, repeats=1, return_type='labelled_collection', random_state=0, sample_size=10000) - for train in train_prot(): - if np.product(train.prevalence()) == 0: - # skip experiments with no positives or no negatives in training - continue - - cls = LogisticRegression(class_weight='balanced') - - train, val = train.split_stratified(train_prop=0.5, random_state=0) - - print(f'dataset name = {dataset_name}') - print(f'#train = {len(train)}, prev={F.strprev(train.prevalence())}') - print(f'#val = {len(val)}, prev={F.strprev(val.prevalence())}') - print(f'#test = {len(test)}, prev={F.strprev(test.prevalence())}') - - cls.fit(*train.Xy) - - for sample in APP(test, n_prevalences=21, repeats=10, sample_size=1000, return_type='labelled_collection')(): - print('='*80) - y_hat = cls.predict(sample.instances) - y = sample.labels - acc_true = cls_eval_from_labels(y, y_hat) - - tn, fn, fp, tp = method_3(cls, train, val, sample, y, y_hat) - - acc_hat = cls_eval_from_counters(tn, fn, fp, tp) - - error = abs(acc_true - acc_hat) - errors.append(error) - - print(f'classifier accuracy={acc_true:.3f}') - print(f'estimated accuracy={acc_hat:.3f}') - print(f'estimation error={error:.4f}') - -print('process end') -print('='*80) -print(f'mean error = {np.mean(errors)}') -print(f'std error = {np.std(errors)}') - - - - - -