from collections import defaultdict from sklearn.base import BaseEstimator from sklearn.linear_model import LogisticRegression import numpy as np from sklearn.metrics import confusion_matrix from method.aggregative import PACC, EMQ from utils import * import quapy.data.datasets import quapy as qp from models_multiclass import * from quapy.data import LabelledCollection from quapy.protocol import UPP from quapy.data.datasets import fetch_UCIMulticlassLabelledCollection, UCI_MULTICLASS_DATASETS def split(data: LabelledCollection): train_val, test = data.split_stratified(train_prop=0.66) train, val = train_val.split_stratified(train_prop=0.5) return train, val, test def gen_datasets()-> [str,[LabelledCollection,LabelledCollection,LabelledCollection]]: for dataset_name in UCI_MULTICLASS_DATASETS: dataset = fetch_UCIMulticlassLabelledCollection(dataset_name) yield dataset_name, split(dataset) def gen_CAP(h, acc_fn)->[str,ClassifierAccuracyPrediction]: yield 'Naive', NaiveCAP(h, acc_fn) yield 'CT-PPS-PACC', ContTableTransferCAP(h, acc_fn, PACC(LogisticRegression())) yield 'CT-PPSh-PACC', ContTableWithHTransferCAP(h, acc_fn, PACC) def true_acc(h:BaseEstimator, acc_fn: callable, U: LabelledCollection): y_pred = h.predict(U.X) y_true = U.y conf_table = confusion_matrix(y_true, y_pred=y_pred, labels=U.classes_) return acc_fn(conf_table) def acc_fn(cont_table): return np.diag(cont_table).sum() / cont_table.sum() qp.environ['SAMPLE_SIZE'] = 100 h = LogisticRegression() acc_trues = [] acc_predicted = defaultdict(lambda :[]) for dataset_name, (L, V, U) in gen_datasets(): print(dataset_name) h.fit(*L.Xy) test_prot = UPP(U, repeats=100, return_type='labelled_collection') acc_trues.extend(true_acc(h, acc_fn, Ui) for Ui in test_prot()) for method_name, method in gen_CAP(h, acc_fn): method.fit(V) for Ui in test_prot(): acc_hat = method.predict(Ui.X) acc_predicted[method_name].append(acc_hat) acc_predicted = list(acc_predicted.items()) plot_diagonal('./plots/diagonal.png', acc_trues, acc_predicted)