Merge branch 'master' of github.com:HLT-ISTI/QuaPy

2022-04-12 17:23:39 +02:00 · 2022-04-12 17:23:39 +02:00 · 9f4a9cb3fd
parent 524ec37f83 fa577abdd2
commit 9f4a9cb3fd
1 changed files with 51 additions and 11 deletions
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -308,16 +308,27 @@ class ACC(AggregativeQuantifier):
            self.learner, val_data = _training_helper(self.learner, data, fit_learner, val_split=val_split)
            y_ = self.learner.predict(val_data.instances)
            y = val_data.labels
            class_count = val_data.counts()
        self.cc = CC(self.learner)
-        # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
+        self.Pte_cond_estim_ = self.getPteCondEstim(data.classes_, y, y_)
        # document that belongs to yj ends up being classified as belonging to yi
        self.Pte_cond_estim_ = confusion_matrix(y, y_).T / class_count
        return self
    @classmethod
    def getPteCondEstim(cls, classes, y, y_):
        # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
        # document that belongs to yj ends up being classified as belonging to yi
        conf = confusion_matrix(y, y_, labels=classes).T
        conf = conf.astype(np.float)
        class_counts = conf.sum(axis=0)
        for i, _ in enumerate(classes):
            if class_counts[i] == 0:
                conf[i, i] = 1
            else:
                conf[:, i] /= class_counts[i]
        return conf
    def classify(self, data):
        return self.cc.classify(data)
@ -439,10 +450,23 @@ class PACC(AggregativeProbabilisticQuantifier):
        for i, class_ in enumerate(classes):
            confusion[i] = y_[y == class_].mean(axis=0)
-        self.Pte_cond_estim_ = confusion.T
+        self.Pte_cond_estim_ = self.getPteCondEstim(classes, y, y_)
        return self
    @classmethod
    def getPteCondEstim(cls, classes, y, y_):
        # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
        # document that belongs to yj ends up being classified as belonging to yi
        n_classes = len(classes)
        confusion = np.eye(n_classes)
        for i, class_ in enumerate(classes):
            idx = y == class_
            if idx.any():
                confusion[i] = y_[idx].mean(axis=0)
        return confusion.T
    def aggregate(self, classif_posteriors):
        prevs_estim = self.pcc.aggregate(classif_posteriors)
        return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim)
@ -458,15 +482,25 @@ class EMQ(AggregativeProbabilisticQuantifier):
    EMQ consists of using the well-known `Expectation Maximization algorithm` to iteratively update the posterior
    probabilities generated by a probabilistic classifier and the class prevalence estimates obtained via
    maximum-likelihood estimation, in a mutually recursive way, until convergence.
    The `transform_prior` callback allows you to introduce ad-hoc regularizations which are not part of the
    original EMQ algorithm. This callback can, for instance, enhance or diminish small class prevalences if
    sparse or dense solutions should be promoted.
    The original method is described in:
    Saerens, M., Latinne, P., and Decaestecker, C. (2002).
    Adjusting the outputs of a classifier to new a priori probabilities: A simple procedure.
    Neural Computation, 14(1): 21–41.
    :param learner: a sklearn's Estimator that generates a classifier
    :param transform_prior: an optional function :math:`R^c -> R^c` that transforms each intermediate estimate
    """
    MAX_ITER = 1000
    EPSILON = 1e-4
-    def __init__(self, learner: BaseEstimator):
+    def __init__(self, learner: BaseEstimator, transform_prior=None):
        self.learner = learner
        self.transform_prior = transform_prior
    def fit(self, data: LabelledCollection, fit_learner=True):
        self.learner, _ = _training_helper(self.learner, data, fit_learner, ensure_probabilistic=True)
@ -474,27 +508,28 @@ class EMQ(AggregativeProbabilisticQuantifier):
        return self
    def aggregate(self, classif_posteriors, epsilon=EPSILON):
-        priors, posteriors = self.EM(self.train_prevalence, classif_posteriors, epsilon)
+        priors, posteriors = self.EM(self.train_prevalence, classif_posteriors, epsilon, self.transform_prior)
        return priors
    def predict_proba(self, instances, epsilon=EPSILON):
        classif_posteriors = self.learner.predict_proba(instances)
-        priors, posteriors = self.EM(self.train_prevalence, classif_posteriors, epsilon)
+        priors, posteriors = self.EM(self.train_prevalence, classif_posteriors, epsilon, self.transform_prior)
        return posteriors
    @classmethod
-    def EM(cls, tr_prev, posterior_probabilities, epsilon=EPSILON):
+    def EM(cls, tr_prev, posterior_probabilities, epsilon=EPSILON, transform_prior=None):
        """
        Computes the `Expectation Maximization` routine.
        :param tr_prev: array-like, the training prevalence
        :param posterior_probabilities: `np.ndarray` of shape `(n_instances, n_classes,)` with the
            posterior probabilities
        :param epsilon: float, the threshold different between two consecutive iterations
            to reach before stopping the loop
        :param transform_prior: an optional function :math:`R^c -> R^c` that transforms each intermediate estimate
        :return: a tuple with the estimated prevalence values (shape `(n_classes,)`) and
            the corrected posterior probabilities (shape `(n_instances, n_classes,)`)
        """
        Px = posterior_probabilities
        Ptr = np.copy(tr_prev)
        qs = np.copy(Ptr)  # qs (the running estimate) is initialized as the training prevalence
@ -515,12 +550,17 @@ class EMQ(AggregativeProbabilisticQuantifier):
            qs_prev_ = qs
            s += 1
            # transformation of intermediate estimates
            if transform_prior is not None and not converged:
                qs = transform_prior(qs)
        if not converged:
            print('[warning] the method has reached the maximum number of iterations; it might have not converged')
        return qs, ps
 class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
    """
    `Hellinger Distance y <https://www.sciencedirect.com/science/article/pii/S0020025512004069>`_ (HDy).