forked from moreo/QuaPy
adding the approximate solution to ACC and PACC as suggested by Mirko Bunse
This commit is contained in:
@ -1,6 +1,13 @@
Change Log 0.1.8
- Added different solvers for ACC and PACC quantifiers. In quapy < 0.1.8 these quantifiers try to solve the system
of equations Ax=B exactly (by means of np.linalg.solve). As noted by Mirko Bunse (thanks!), such an exact solution
does sometimes not exist. In cases like this, quapy < 0.1.8 resorted to CC for providing a plausible solution.
ACC and PACC now resorts to an approximated solution in such cases (minimizing the L2-norm of the difference
between Ax-B) as proposed by Mirko Bunse. A quick experiment reveals this heuristic greatly improves the results
of ACC and PACC in T2A@LeQua.
- Fixed ThresholdOptimization methods (X, T50, MAX, MS and MS2). Thanks to Tobias Schumacher and colleagues for pointing
this out in Appendix A of "Schumacher, T., Strohmaier, M., & Lemmerich, F. (2021). A comparative evaluation of
quantification methods. arXiv:2103.03223v3 [cs.LG]"
@ -349,23 +349,25 @@ class ACC(AggregativeCrispQuantifier):
Alternatively, this set can be specified at fit time by indicating the exact set of data
on which the predictions are to be generated.
:param n_jobs: number of parallel workers
:param solver: indicates the method to be used for obtaining the final esimates. The default choice
is 'exact', which comes down to solving the system of linear equations `Ax=B` where `A` is a
:param solver: indicates the method to be used for obtaining the final estimates. The choice
'exact' comes down to solving the system of linear equations `Ax=B` where `A` is a
matrix containing the class-conditional probabilities of the predictions (e.g., the tpr and fpr in
binary) and `B` is the vector of prevalence values estimated via CC, as $x=A^{-1}B$. This solution
might not exist for degenerated classifiers, in which case the method defaults to classify and count
(i.e., does not attempt any adjustment).
Another option is to search for the prevalence vector that minimizes the loss |Ax-B|. The latter is
achieved by indicating solver='minimize'.
achieved by indicating solver='minimize'. This one generally works better, and is the default parameter.
def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None, solver='exact'):
def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None, solver='minimize'):
self.classifier = classifier
self.val_split = val_split
self.n_jobs = qp._get_njobs(n_jobs)
assert solver in ['exact', 'minimize'], "unknown solver; valid ones are 'exact', 'minimize'"
self.solver = solver
def _check_init_parameters(self):
assert self.solver in ['exact', 'minimize'], "unknown solver; valid ones are 'exact', 'minimize'"
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
Estimates the misclassification rates.
@ -408,20 +410,29 @@ class ACC(AggregativeCrispQuantifier):
'optim_minimize' (minimizes a norm --always exists).
:return: an adjusted `np.ndarray` of shape `(n_classes,)` with the corrected class prevalence estimates
A = PteCondEstim
B = prevs_estim
if solver == 'exact':
# attempts an exact solution of the linear system (may fail)
adjusted_prevs = np.linalg.solve(A, B)
adjusted_prevs = np.clip(adjusted_prevs, 0, 1)
adjusted_prevs /= adjusted_prevs.sum()
except np.linalg.LinAlgError:
adjusted_prevs = prevs_estim # no way to adjust them!
return adjusted_prevs
elif solver == 'minimize':
# poses the problem as an optimization one, and tries to minimize the norm of the differences
def loss(prev):
return np.linalg.norm(A@prev - B)
return np.linalg.norm(A @ prev - B)
return F.optim_minimize(loss, n_classes=A.shape[0])
return adjusted_prevs
class PCC(AggregativeSoftQuantifier):
@ -462,10 +473,14 @@ class PACC(AggregativeSoftQuantifier):
:param n_jobs: number of parallel workers
def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None):
def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None, solver='minimize'):
self.classifier = classifier
self.val_split = val_split
self.n_jobs = qp._get_njobs(n_jobs)
self.solver = solver
def _check_init_parameters(self):
assert self.solver in ['exact', 'minimize'], "unknown solver; valid ones are 'exact', 'minimize'"
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
@ -479,7 +494,7 @@ class PACC(AggregativeSoftQuantifier):
def aggregate(self, classif_posteriors):
prevs_estim = self.pcc.aggregate(classif_posteriors)
return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim)
return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim, solver=self.solver)
def getPteCondEstim(cls, classes, y, y_):
@ -143,6 +143,21 @@ class GridSearchQ(BaseQuantifier):
self._print_status(params, score, status, took)
return model, params, score, status, took
def _break_down_fit(self):
Decides whether to break down the fit phase in two (classifier-fit followed by aggregation-fit).
In order to do so, some conditions should be met: a) the quantifier is of type aggregative,
b) the set of hyperparameters can be split into two disjoint non-empty groups.
:return: True if the conditions are met, False otherwise
if not isinstance(self.model, AggregativeQuantifier):
return False
cls_configs, q_configs = group_params(self.param_grid)
if (len(cls_configs) == 1) or (len(q_configs)==1):
return False
return True
def _compute_scores_aggregative(self, training):
# break down the set of hyperparameters into two: classifier-specific, quantifier-specific
cls_configs, q_configs = group_params(self.param_grid)
@ -214,7 +229,7 @@ class GridSearchQ(BaseQuantifier):
self.error_collector = []
self._sout(f'starting model selection with n_jobs={self.n_jobs}')
if isinstance(self.model, AggregativeQuantifier):
if self._break_down_fit():
results = self._compute_scores_aggregative(training)
results = self._compute_scores_nonaggregative(training)
Reference in New Issue