From 74efa9751d8d1f1202f85a3f9cccecd095b7eb17 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 25 Jan 2024 16:43:00 +0100
Subject: [PATCH] adding the approximate solution to ACC and PACC as suggested
 by Mirko Bunse

---
 quapy/CHANGE_LOG.txt        |  7 +++++++
 quapy/method/aggregative.py | 33 ++++++++++++++++++++++++---------
 quapy/model_selection.py    | 17 ++++++++++++++++-
 3 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt
index 8b2e38f..ced7dd9 100644
--- a/quapy/CHANGE_LOG.txt
+++ b/quapy/CHANGE_LOG.txt
@@ -1,6 +1,13 @@
 Change Log 0.1.8
 ----------------
 
+- Added different solvers for ACC and PACC quantifiers. In quapy < 0.1.8 these quantifiers try to solve the system
+    of equations Ax=B exactly (by means of np.linalg.solve). As noted by Mirko Bunse (thanks!), such an exact solution
+    does sometimes not exist. In cases like this, quapy < 0.1.8 resorted to CC for providing a plausible solution.
+    ACC and PACC now resorts to an approximated solution in such cases (minimizing the L2-norm of the difference
+    between Ax-B) as proposed by Mirko Bunse. A quick experiment reveals this heuristic greatly improves the results
+    of ACC and PACC in T2A@LeQua.
+
 - Fixed ThresholdOptimization methods (X, T50, MAX, MS and MS2). Thanks to Tobias Schumacher and colleagues for pointing
     this out in Appendix A of "Schumacher, T., Strohmaier, M., & Lemmerich, F. (2021). A comparative evaluation of 
     quantification methods. arXiv:2103.03223v3 [cs.LG]"
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index dd277e0..1fb693c 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -349,23 +349,25 @@ class ACC(AggregativeCrispQuantifier):
         Alternatively, this set can be specified at fit time by indicating the exact set of data
         on which the predictions are to be generated.
     :param n_jobs: number of parallel workers
-    :param solver: indicates the method to be used for obtaining the final esimates. The default choice
-        is 'exact', which comes down to solving the system of linear equations `Ax=B` where `A` is a 
+    :param solver: indicates the method to be used for obtaining the final estimates. The choice
+        'exact' comes down to solving the system of linear equations `Ax=B` where `A` is a
         matrix containing the class-conditional probabilities of the predictions (e.g., the tpr and fpr in 
         binary) and `B` is the vector of prevalence values estimated via CC, as $x=A^{-1}B$. This solution 
         might not exist for degenerated classifiers, in which case the method defaults to classify and count 
         (i.e., does not attempt any adjustment).
         Another option is to search for the prevalence vector that minimizes the loss |Ax-B|. The latter is
-        achieved by indicating solver='minimize'.
+        achieved by indicating solver='minimize'. This one generally works better, and is the default parameter.
     """
 
-    def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None, solver='exact'):
+    def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None, solver='minimize'):
         self.classifier = classifier
         self.val_split = val_split
         self.n_jobs = qp._get_njobs(n_jobs)
-        assert solver in ['exact', 'minimize'], "unknown solver; valid ones are 'exact', 'minimize'"
         self.solver = solver
 
+    def _check_init_parameters(self):
+        assert self.solver in ['exact', 'minimize'], "unknown solver; valid ones are 'exact', 'minimize'"
+
     def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
         """
         Estimates the misclassification rates.
@@ -408,20 +410,29 @@ class ACC(AggregativeCrispQuantifier):
              'optim_minimize' (minimizes a norm --always exists). 
         :return: an adjusted `np.ndarray` of shape `(n_classes,)` with the corrected class prevalence estimates
         """
+
         A = PteCondEstim
         B = prevs_estim
+
         if solver == 'exact':
+            # attempts an exact solution of the linear system (may fail)
+
             try:
                 adjusted_prevs = np.linalg.solve(A, B)
                 adjusted_prevs = np.clip(adjusted_prevs, 0, 1)
                 adjusted_prevs /= adjusted_prevs.sum()
             except np.linalg.LinAlgError:
                 adjusted_prevs = prevs_estim  # no way to adjust them!
+
+            return adjusted_prevs
+
         elif solver == 'minimize':
+            # poses the problem as an optimization one, and tries to minimize the norm of the differences
+
             def loss(prev):
-                return np.linalg.norm(A@prev - B)
+                return np.linalg.norm(A @ prev - B)
+
             return F.optim_minimize(loss, n_classes=A.shape[0])
-        return adjusted_prevs
 
 
 class PCC(AggregativeSoftQuantifier):
@@ -462,10 +473,14 @@ class PACC(AggregativeSoftQuantifier):
     :param n_jobs: number of parallel workers
     """
 
-    def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None):
+    def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None, solver='minimize'):
         self.classifier = classifier
         self.val_split = val_split
         self.n_jobs = qp._get_njobs(n_jobs)
+        self.solver = solver
+
+    def _check_init_parameters(self):
+        assert self.solver in ['exact', 'minimize'], "unknown solver; valid ones are 'exact', 'minimize'"
 
     def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
         """
@@ -479,7 +494,7 @@ class PACC(AggregativeSoftQuantifier):
 
     def aggregate(self, classif_posteriors):
         prevs_estim = self.pcc.aggregate(classif_posteriors)
-        return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim)
+        return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim, solver=self.solver)
 
     @classmethod
     def getPteCondEstim(cls, classes, y, y_):
diff --git a/quapy/model_selection.py b/quapy/model_selection.py
index 307e7d3..9baace9 100644
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@@ -143,6 +143,21 @@ class GridSearchQ(BaseQuantifier):
         self._print_status(params, score, status, took)
         return model, params, score, status, took
 
+    def _break_down_fit(self):
+        """
+        Decides whether to break down the fit phase in two (classifier-fit followed by aggregation-fit).
+        In order to do so, some conditions should be met: a) the quantifier is of type aggregative,
+        b) the set of hyperparameters can be split into two disjoint non-empty groups.
+
+        :return: True if the conditions are met, False otherwise
+        """
+        if not isinstance(self.model, AggregativeQuantifier):
+            return False
+        cls_configs, q_configs = group_params(self.param_grid)
+        if (len(cls_configs) == 1) or (len(q_configs)==1):
+            return False
+        return True
+
     def _compute_scores_aggregative(self, training):
         # break down the set of hyperparameters into two: classifier-specific, quantifier-specific
         cls_configs, q_configs = group_params(self.param_grid)
@@ -214,7 +229,7 @@ class GridSearchQ(BaseQuantifier):
         self.error_collector = []
 
         self._sout(f'starting model selection with n_jobs={self.n_jobs}')
-        if isinstance(self.model, AggregativeQuantifier):
+        if self._break_down_fit():
             results = self._compute_scores_aggregative(training)
         else:
             results = self._compute_scores_nonaggregative(training)