1
0
Fork 0
QuaPy/ClassifierAccuracy/deprecated/models_binary.py

179 lines
6.4 KiB
Python

import numpy as np
import quapy as qp
from sklearn import clone
from sklearn.metrics import confusion_matrix
import scipy
from scipy.sparse import issparse, csr_matrix
from data import LabelledCollection
from abc import ABC, abstractmethod
from sklearn.model_selection import cross_val_predict
class ConfusionMatrixPredictor(ABC):
"""
Abstract class of predictors of a confusion matrix for the performance of a classifier.
For the binary case, this accounts to predicting the 4-cell contingency table consisting of the
true positives (TP), true negatives (TN), false positives (FP), and false negatives (FN) that
most evaluation metrics make use of.
"""
@abstractmethod
def fit(self, train: LabelledCollection):
pass
@abstractmethod
def predict(self, test):
pass
class MLCMEstimator(ConfusionMatrixPredictor):
"""
The Maximum Likelihood Confusion Matrix Estimator is a method that relies on the IID assumption, and thus
computes, via k-FCV (or any other technique) the counters of the confusion matrix, assuming that those are
good estimates for the test case.
"""
def __init__(self, classifier, strategy='kfcv', **kwargs):
assert strategy in ['kfcv'], 'unknown strategy'
if strategy=='kfcv':
assert 'k' in kwargs, 'strategy "kfcv" requires "k" to be passed as an argument'
self.classifier = classifier
self.strategy = strategy
self.kwargs = kwargs
def sout(self, msg):
if 'verbose' in self.kwargs:
print(msg)
def fit(self, train: LabelledCollection):
X, y = train.Xy
if self.strategy == 'kfcv':
k=self.kwargs['k']
n_jobs = self.kwargs['n_jobs'] if 'n_jobs' in self.kwargs else 1
predict = self.kwargs['predict'] if 'predict' in self.kwargs else 'predict'
self.sout(f'{self.__class__.__name__}: '
f'running cross_val_predict with k={k} n_jobs={n_jobs} predict={predict}')
predictions = cross_val_predict(self.classifier, X, y, cv=k, n_jobs=n_jobs, method=predict)
self.conf_matrix = confusion_matrix(y, predictions, labels=train.classes_)
return self
def predict(self, test):
"""
This method disregards the test set, under the assumption that it is IID wrt the training. This meaning that
the confusion matrix for the test data should coincide with the one computed for training (using any cross
validation strategy).
:param test: test collection (ignored)
:return: a confusion matrix in the return format of `sklearn.metrics.confusion_matrix`
"""
return self.conf_matrix
class UpperBound(ConfusionMatrixPredictor):
def __init__(self, classifier, y_test):
self.classifier = classifier
self.y_test = y_test
def fit(self, train: LabelledCollection):
self.classifier.fit(*train.Xy)
self.classes = train.classes_
return self
def show_true_labels(self, y_test):
self.y_test = y_test
def predict(self, test):
predictions = self.classifier.predict(test)
return confusion_matrix(self.y_test, predictions, labels=self.classes)
def get_counters(y_true, y_pred):
counters = np.full(shape=y_true.shape, fill_value=-1)
counters[np.logical_and(y_true == 1, y_pred == 1)] = 0
counters[np.logical_and(y_true == 1, y_pred == 0)] = 1
counters[np.logical_and(y_true == 0, y_pred == 1)] = 2
counters[np.logical_and(y_true == 0, y_pred == 0)] = 3
class_map = {
0:'tp',
1:'fn',
2:'fp',
3:'tn'
}
return counters, class_map
def safehstack(matrix, posteriors):
if issparse(matrix):
instances = csr_matrix(scipy.sparse.hstack([matrix, posteriors]))
else:
instances = np.hstack([matrix, posteriors])
return instances
class QuantificationCMPredictor(ConfusionMatrixPredictor):
"""
"""
def __init__(self, classifier, quantifier, strategy='kfcv', **kwargs):
assert strategy in ['kfcv'], 'unknown strategy'
if strategy=='kfcv':
assert 'k' in kwargs, 'strategy "kfcv" requires "k" to be passed as an argument'
self.classifier = clone(classifier)
self.quantifier = quantifier
self.strategy = strategy
self.kwargs = kwargs
def sout(self, msg):
if 'verbose' in self.kwargs:
print(msg)
def fit(self, train: LabelledCollection):
X, y = train.Xy
if self.strategy == 'kfcv':
k=self.kwargs['k']
n_jobs = self.kwargs['n_jobs'] if 'n_jobs' in self.kwargs else 1
self.sout(f'{self.__class__.__name__}: '
f'running cross_val_predict with k={k} n_jobs={n_jobs}')
predictions = cross_val_predict(self.classifier, X, y, cv=k, n_jobs=n_jobs, method='predict')
posteriors = cross_val_predict(self.classifier, X, y, cv=k, n_jobs=n_jobs, method='predict_proba')
self.classifier.fit(X, y)
instances = safehstack(train.instances, posteriors)
counters, class_map = get_counters(train.labels, predictions)
q_data = LabelledCollection(instances=instances, labels=counters, classes_=[0,1,2,3])
print('counters prevalence', q_data.counts())
self.quantifier.fit(q_data)
return self
def predict(self, test):
"""
:param test: test collection (ignored)
:return: a confusion matrix in the return format of `sklearn.metrics.confusion_matrix`
"""
posteriors = self.classifier.predict_proba(test)
instances = safehstack(test, posteriors)
counters = self.quantifier.quantify(instances)
tp, fn, fp, tn = counters
conf_matrix = np.asarray([[tn, fp], [fn, tp]])
return conf_matrix
def quantify(self, test):
posteriors = self.classifier.predict_proba(test)
instances = safehstack(test, posteriors)
counters = self.quantifier.quantify(instances)
tp, fn, fp, tn = counters
den_tpr = (tp+fn)
if den_tpr>0:
tpr = tp/den_tpr
else:
tpr = 1
den_fpr = (fp+tn)
if den_fpr>0:
fpr = fp / den_fpr
else:
fpr = 0
pcc = posteriors.sum(axis=0)[1]
pacc = (pcc-fpr)/(tpr-fpr)
pacc = np.clip(pacc, 0, 1)
q = tp+fn
return q