collapse_false added, confidence refactored
This commit is contained in:
parent
156477ce0e
commit
22601a1952
|
@ -8,6 +8,7 @@ from quapy.data import LabelledCollection
|
||||||
from quapy.method.aggregative import BaseQuantifier
|
from quapy.method.aggregative import BaseQuantifier
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
|
|
||||||
|
import quacc.method.confidence as conf
|
||||||
from quacc.data import ExtendedCollection, ExtendedData, ExtensionPolicy
|
from quacc.data import ExtendedCollection, ExtendedData, ExtensionPolicy
|
||||||
|
|
||||||
|
|
||||||
|
@ -16,10 +17,11 @@ class BaseAccuracyEstimator(BaseQuantifier):
|
||||||
self,
|
self,
|
||||||
classifier: BaseEstimator,
|
classifier: BaseEstimator,
|
||||||
quantifier: BaseQuantifier,
|
quantifier: BaseQuantifier,
|
||||||
|
collapse_false=False,
|
||||||
):
|
):
|
||||||
self.__check_classifier(classifier)
|
self.__check_classifier(classifier)
|
||||||
self.quantifier = quantifier
|
self.quantifier = quantifier
|
||||||
self.extpol = ExtensionPolicy()
|
self.extpol = ExtensionPolicy(collapse_false=collapse_false)
|
||||||
|
|
||||||
def __check_classifier(self, classifier):
|
def __check_classifier(self, classifier):
|
||||||
if not hasattr(classifier, "predict_proba"):
|
if not hasattr(classifier, "predict_proba"):
|
||||||
|
@ -36,10 +38,8 @@ class BaseAccuracyEstimator(BaseQuantifier):
|
||||||
coll, pred_proba=pred_proba, extpol=self.extpol
|
coll, pred_proba=pred_proba, extpol=self.extpol
|
||||||
)
|
)
|
||||||
|
|
||||||
def _extend_instances(self, instances: np.ndarray | sp.csr_matrix, pred_proba=None):
|
def _extend_instances(self, instances: np.ndarray | sp.csr_matrix):
|
||||||
if pred_proba is None:
|
|
||||||
pred_proba = self.classifier.predict_proba(instances)
|
pred_proba = self.classifier.predict_proba(instances)
|
||||||
|
|
||||||
return ExtendedData(instances, pred_proba=pred_proba, extpol=self.extpol)
|
return ExtendedData(instances, pred_proba=pred_proba, extpol=self.extpol)
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
@ -50,16 +50,26 @@ class BaseAccuracyEstimator(BaseQuantifier):
|
||||||
def estimate(self, instances, ext=False) -> np.ndarray:
|
def estimate(self, instances, ext=False) -> np.ndarray:
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@property
|
||||||
|
def collapse_false(self):
|
||||||
|
return self.extpol.collapse_false
|
||||||
|
|
||||||
|
|
||||||
class ConfidenceBasedAccuracyEstimator(BaseAccuracyEstimator):
|
class ConfidenceBasedAccuracyEstimator(BaseAccuracyEstimator):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
classifier: BaseEstimator,
|
classifier: BaseEstimator,
|
||||||
quantifier: BaseQuantifier,
|
quantifier: BaseQuantifier,
|
||||||
|
collapse_false=False,
|
||||||
confidence=None,
|
confidence=None,
|
||||||
):
|
):
|
||||||
super().__init__(classifier, quantifier)
|
super().__init__(
|
||||||
|
classifier=classifier,
|
||||||
|
quantifier=quantifier,
|
||||||
|
collapse_false=collapse_false,
|
||||||
|
)
|
||||||
self.__check_confidence(confidence)
|
self.__check_confidence(confidence)
|
||||||
|
self.calibrator = None
|
||||||
|
|
||||||
def __check_confidence(self, confidence):
|
def __check_confidence(self, confidence):
|
||||||
if isinstance(confidence, str):
|
if isinstance(confidence, str):
|
||||||
|
@ -69,46 +79,37 @@ class ConfidenceBasedAccuracyEstimator(BaseAccuracyEstimator):
|
||||||
else:
|
else:
|
||||||
self.confidence = None
|
self.confidence = None
|
||||||
|
|
||||||
def __get_confidence(self):
|
def _fit_confidence(self, X, y, probas):
|
||||||
def max_conf(probas):
|
self.confidence_metrics = conf.get_metrics(self.confidence)
|
||||||
_mc = np.max(probas, axis=-1)
|
if self.confidence_metrics is None:
|
||||||
_min = 1.0 / probas.shape[1]
|
return
|
||||||
_norm_mc = (_mc - _min) / (1.0 - _min)
|
|
||||||
return _norm_mc
|
|
||||||
|
|
||||||
def entropy(probas):
|
for m in self.confidence_metrics:
|
||||||
_ent = np.sum(np.multiply(probas, np.log(probas + 1e-20)), axis=1)
|
m.fit(X, y, probas)
|
||||||
return _ent
|
|
||||||
|
|
||||||
if self.confidence is None:
|
def _get_pred_ext(self, pred_proba: np.ndarray):
|
||||||
return []
|
return pred_proba
|
||||||
|
|
||||||
__confs = {
|
def __get_ext(
|
||||||
"max_conf": max_conf,
|
self, X: np.ndarray | sp.csr_matrix, pred_proba: np.ndarray
|
||||||
"entropy": entropy,
|
) -> np.ndarray:
|
||||||
}
|
if self.confidence_metrics is None or len(self.confidence_metrics) == 0:
|
||||||
return [__confs.get(c, None) for c in self.confidence]
|
return pred_proba
|
||||||
|
|
||||||
def __get_ext(self, pred_proba: np.ndarray) -> np.ndarray:
|
_conf_ext = np.concatenate(
|
||||||
__confidence = self.__get_confidence()
|
[m.conf(X, pred_proba) for m in self.confidence_metrics],
|
||||||
|
|
||||||
if __confidence is None or len(__confidence) == 0:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return np.concatenate(
|
|
||||||
[
|
|
||||||
_f_conf(pred_proba).reshape((len(pred_proba), 1))
|
|
||||||
for _f_conf in __confidence
|
|
||||||
if _f_conf is not None
|
|
||||||
],
|
|
||||||
axis=1,
|
axis=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
_pred_ext = self._get_pred_ext(pred_proba)
|
||||||
|
|
||||||
|
return np.concatenate([_conf_ext, _pred_ext], axis=1)
|
||||||
|
|
||||||
def extend(self, coll: LabelledCollection, pred_proba=None) -> ExtendedCollection:
|
def extend(self, coll: LabelledCollection, pred_proba=None) -> ExtendedCollection:
|
||||||
if pred_proba is None:
|
if pred_proba is None:
|
||||||
pred_proba = self.classifier.predict_proba(coll.X)
|
pred_proba = self.classifier.predict_proba(coll.X)
|
||||||
|
|
||||||
_ext = self.__get_ext(pred_proba)
|
_ext = self.__get_ext(coll.X, pred_proba)
|
||||||
return ExtendedCollection.from_lc(
|
return ExtendedCollection.from_lc(
|
||||||
coll, pred_proba=pred_proba, ext=_ext, extpol=self.extpol
|
coll, pred_proba=pred_proba, ext=_ext, extpol=self.extpol
|
||||||
)
|
)
|
||||||
|
@ -116,12 +117,9 @@ class ConfidenceBasedAccuracyEstimator(BaseAccuracyEstimator):
|
||||||
def _extend_instances(
|
def _extend_instances(
|
||||||
self,
|
self,
|
||||||
instances: np.ndarray | sp.csr_matrix,
|
instances: np.ndarray | sp.csr_matrix,
|
||||||
pred_proba=None,
|
|
||||||
) -> ExtendedData:
|
) -> ExtendedData:
|
||||||
if pred_proba is None:
|
|
||||||
pred_proba = self.classifier.predict_proba(instances)
|
pred_proba = self.classifier.predict_proba(instances)
|
||||||
|
_ext = self.__get_ext(instances, pred_proba)
|
||||||
_ext = self.__get_ext(pred_proba)
|
|
||||||
return ExtendedData(
|
return ExtendedData(
|
||||||
instances, pred_proba=pred_proba, ext=_ext, extpol=self.extpol
|
instances, pred_proba=pred_proba, ext=_ext, extpol=self.extpol
|
||||||
)
|
)
|
||||||
|
@ -139,12 +137,17 @@ class MultiClassAccuracyEstimator(ConfidenceBasedAccuracyEstimator):
|
||||||
classifier=classifier,
|
classifier=classifier,
|
||||||
quantifier=quantifier,
|
quantifier=quantifier,
|
||||||
confidence=confidence,
|
confidence=confidence,
|
||||||
|
collapse_false=collapse_false,
|
||||||
)
|
)
|
||||||
self.e_train = None
|
self.e_train = None
|
||||||
self.extpol = ExtensionPolicy(collapse_false=collapse_false)
|
|
||||||
|
def _get_pred_ext(self, pred_proba: np.ndarray):
|
||||||
|
return np.argmax(pred_proba, axis=1, keepdims=True)
|
||||||
|
|
||||||
def fit(self, train: LabelledCollection):
|
def fit(self, train: LabelledCollection):
|
||||||
self.e_train = self.extend(train)
|
pred_proba = self.classifier.predict_proba(train.X)
|
||||||
|
self._fit_confidence(train.X, train.y, pred_proba)
|
||||||
|
self.e_train = self.extend(train, pred_proba=pred_proba)
|
||||||
|
|
||||||
self.quantifier.fit(self.e_train)
|
self.quantifier.fit(self.e_train)
|
||||||
|
|
||||||
|
@ -173,10 +176,6 @@ class MultiClassAccuracyEstimator(ConfidenceBasedAccuracyEstimator):
|
||||||
estim_prev = np.insert(estim_prev, _cls, [0.0], axis=0)
|
estim_prev = np.insert(estim_prev, _cls, [0.0], axis=0)
|
||||||
return estim_prev
|
return estim_prev
|
||||||
|
|
||||||
@property
|
|
||||||
def collapse_false(self):
|
|
||||||
return self.extpol.collapse_false
|
|
||||||
|
|
||||||
|
|
||||||
class BinaryQuantifierAccuracyEstimator(ConfidenceBasedAccuracyEstimator):
|
class BinaryQuantifierAccuracyEstimator(ConfidenceBasedAccuracyEstimator):
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -184,6 +183,7 @@ class BinaryQuantifierAccuracyEstimator(ConfidenceBasedAccuracyEstimator):
|
||||||
classifier: BaseEstimator,
|
classifier: BaseEstimator,
|
||||||
quantifier: BaseAccuracyEstimator,
|
quantifier: BaseAccuracyEstimator,
|
||||||
confidence: str = None,
|
confidence: str = None,
|
||||||
|
collapse_false=False,
|
||||||
):
|
):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
classifier=classifier,
|
classifier=classifier,
|
||||||
|
@ -193,7 +193,9 @@ class BinaryQuantifierAccuracyEstimator(ConfidenceBasedAccuracyEstimator):
|
||||||
self.quantifiers = []
|
self.quantifiers = []
|
||||||
|
|
||||||
def fit(self, train: LabelledCollection | ExtendedCollection):
|
def fit(self, train: LabelledCollection | ExtendedCollection):
|
||||||
self.e_train = self.extend(train)
|
pred_proba = self.classifier.predict_proba(train.X)
|
||||||
|
self._fit_confidence(train.X, train.y, pred_proba)
|
||||||
|
self.e_train = self.extend(train, pred_proba=pred_proba)
|
||||||
|
|
||||||
self.n_classes = self.e_train.n_classes
|
self.n_classes = self.e_train.n_classes
|
||||||
e_trains = self.e_train.split_by_pred()
|
e_trains = self.e_train.split_by_pred()
|
||||||
|
@ -217,7 +219,8 @@ class BinaryQuantifierAccuracyEstimator(ConfidenceBasedAccuracyEstimator):
|
||||||
norms = [s_i.shape[0] / len(e_inst) for s_i in s_inst]
|
norms = [s_i.shape[0] / len(e_inst) for s_i in s_inst]
|
||||||
estim_prevs = self._quantify_helper(s_inst, norms)
|
estim_prevs = self._quantify_helper(s_inst, norms)
|
||||||
|
|
||||||
estim_prev = np.array([prev_row for prev_row in zip(*estim_prevs)]).flatten()
|
# estim_prev = np.array([prev_row for prev_row in zip(*estim_prevs)]).flatten()
|
||||||
|
estim_prev = np.concatenate(estim_prevs.T)
|
||||||
return estim_prev
|
return estim_prev
|
||||||
|
|
||||||
def _quantify_helper(
|
def _quantify_helper(
|
||||||
|
@ -232,7 +235,7 @@ class BinaryQuantifierAccuracyEstimator(ConfidenceBasedAccuracyEstimator):
|
||||||
else:
|
else:
|
||||||
estim_prevs.append(np.asarray([0.0, 0.0]))
|
estim_prevs.append(np.asarray([0.0, 0.0]))
|
||||||
|
|
||||||
return estim_prevs
|
return np.array(estim_prevs)
|
||||||
|
|
||||||
|
|
||||||
BAE = BaseAccuracyEstimator
|
BAE = BaseAccuracyEstimator
|
||||||
|
|
|
@ -0,0 +1,91 @@
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import scipy.sparse as sp
|
||||||
|
from sklearn.linear_model import LinearRegression
|
||||||
|
|
||||||
|
import baselines.atc as atc
|
||||||
|
|
||||||
|
__confs = {}
|
||||||
|
|
||||||
|
|
||||||
|
def metric(name):
|
||||||
|
def wrapper(cl):
|
||||||
|
__confs[name] = cl
|
||||||
|
return cl
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
class ConfidenceMetric:
|
||||||
|
def fit(self, X, y, probas):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def conf(self, X, probas):
|
||||||
|
return probas
|
||||||
|
|
||||||
|
|
||||||
|
@metric("max_conf")
|
||||||
|
class MaxConf(ConfidenceMetric):
|
||||||
|
def conf(self, X, probas):
|
||||||
|
_mc = np.max(probas, axis=1, keepdims=True)
|
||||||
|
return _mc
|
||||||
|
|
||||||
|
|
||||||
|
@metric("entropy")
|
||||||
|
class Entropy(ConfidenceMetric):
|
||||||
|
def conf(self, X, probas):
|
||||||
|
_ent = np.sum(
|
||||||
|
np.multiply(probas, np.log(probas + 1e-20)), axis=1, keepdims=True
|
||||||
|
)
|
||||||
|
return _ent
|
||||||
|
|
||||||
|
|
||||||
|
@metric("isoft")
|
||||||
|
class InverseSoftmax(ConfidenceMetric):
|
||||||
|
def conf(self, X, probas):
|
||||||
|
_probas = probas / np.sum(probas, axis=1, keepdims=True)
|
||||||
|
_probas = np.log(_probas) - np.mean(np.log(_probas), axis=1, keepdims=True)
|
||||||
|
return np.max(_probas, axis=1, keepdims=True)
|
||||||
|
|
||||||
|
|
||||||
|
@metric("threshold")
|
||||||
|
class Threshold(ConfidenceMetric):
|
||||||
|
def get_scores(self, probas, keepdims=False):
|
||||||
|
return np.max(probas, axis=1, keepdims=keepdims)
|
||||||
|
|
||||||
|
def fit(self, X, y, probas):
|
||||||
|
scores = self.get_scores(probas)
|
||||||
|
_, self.threshold = atc.find_ATC_threshold(scores, y)
|
||||||
|
|
||||||
|
def conf(self, X, probas):
|
||||||
|
scores = self.get_scores(probas, keepdims=True)
|
||||||
|
_exp = scores - self.threshold
|
||||||
|
return _exp
|
||||||
|
|
||||||
|
|
||||||
|
@metric("linreg")
|
||||||
|
class LinReg(ConfidenceMetric):
|
||||||
|
def extend(self, X, probas):
|
||||||
|
if sp.issparse(X):
|
||||||
|
return sp.hstack([X, probas])
|
||||||
|
else:
|
||||||
|
return np.concatenate([X, probas], axis=1)
|
||||||
|
|
||||||
|
def fit(self, X, y, probas):
|
||||||
|
reg_X = self.extend(X, probas)
|
||||||
|
reg_y = probas[np.arange(probas.shape[0]), y]
|
||||||
|
self.reg = LinearRegression()
|
||||||
|
self.reg.fit(reg_X, reg_y)
|
||||||
|
|
||||||
|
def conf(self, X, probas):
|
||||||
|
reg_X = self.extend(X, probas)
|
||||||
|
return self.reg.predict(reg_X)[:, np.newaxis]
|
||||||
|
|
||||||
|
|
||||||
|
def get_metrics(names: List[str]):
|
||||||
|
if names is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
__fnames = [n for n in names if n in __confs]
|
||||||
|
return [__confs[m]() for m in __fnames]
|
Loading…
Reference in New Issue