From 4cfb97c165d1da56e4d11fe6f4f385b59a5e7d5c Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Sun, 15 Jun 2025 11:59:32 +0200
Subject: [PATCH] merging with office branch

---
 CHANGE_LOG.txt                   |  8 ++++----
 quapy/__init__.py                |  2 +-
 quapy/data/datasets.py           |  5 -----
 quapy/method/_threshold_optim.py |  2 +-
 quapy/method/aggregative.py      | 12 ++++++------
 quapy/method/confidence.py       | 17 +++++++++++++----
 quapy/tests/test_datasets.py     |  1 -
 quapy/tests/test_methods.py      |  2 +-
 8 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/CHANGE_LOG.txt b/CHANGE_LOG.txt
index 3c43e5d..c3880e7 100644
--- a/CHANGE_LOG.txt
+++ b/CHANGE_LOG.txt
@@ -4,7 +4,7 @@ Change Log 0.1.10
 CLEAN TODO-FILE
 
 - Base code Refactor:
-    - Removing coupling between LabelledCollection and quantification methods. E.g.:
+    - Removing coupling between LabelledCollection and quantification methods; the fit interface changes:
         def fit(data:LabelledCollection): -> def fit(X, y):
     - Adding function "predict" (function "quantify" is still present as an alias)
     - Aggregative methods's behavior in terms of fit_classifier and how to treat the val_split is now
@@ -14,13 +14,13 @@ CLEAN TODO-FILE
         in which case the first argument is unused, and this was ambiguous with
         my_acc.fit(the_data, fit_classifier=False)
         in which case the_data is to be used for validation purposes. However, the val_split could be set as a fraction
-        indicating only part of the_data must be used for validation, and the rest wasted... it was confusing.
+        indicating only part of the_data must be used for validation, and the rest wasted... it was certainly confusing.
 - EMQ has been modified, so that the representation function "classify" now only provides posterior
     probabilities and, if required, these are recalibrated (e.g., by "bcts") during the aggregation function.
     - A new parameter "on_calib_error" is passed to the constructor, which informs of the policy to follow
-        in case the calibration functions failed. Options include:
+        in case the abstention's calibration functions failed (which happens sometimes). Options include:
             - 'raise': raises a RuntimeException (default)
-            - 'backup': avoids calibration
+            - 'backup': reruns avoiding calibration
     - Parameter "recalib" has been renamed "calib"
 - Added aggregative bootstrap for deriving confidence regions (confidence intervals, ellipses in the simplex, or
     ellipses in the CLR space). This method is efficient as it leverages the two-phases of the aggregative quantifiers.
diff --git a/quapy/__init__.py b/quapy/__init__.py
index 300e7d3..90f7a70 100644
--- a/quapy/__init__.py
+++ b/quapy/__init__.py
@@ -14,7 +14,7 @@ from . import model_selection
 from . import classification
 import os
 
-__version__ = '0.1.10r'
+__version__ = '0.2.0'
 
 environ = {
     'SAMPLE_SIZE': None,
diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py
index 5582a58..b7fd81a 100644
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@@ -548,25 +548,20 @@ def fetch_UCIBinaryLabelledCollection(dataset_name, data_home=None, standardize=
         """
         if name == "acute.a":
             X, y = data["X"], data["y"][:, 0]
-            # X, y = Xy[:, :-2], Xy[:, -2]
         elif name == "acute.b":
             X, y = data["X"], data["y"][:, 1]
-            # X, y = Xy[:, :-2], Xy[:, -1]
         elif name == "wine-q-red":
             X, y, color = data["X"], data["y"], data["color"]
-            # X, y, color = Xy[:, :-2], Xy[:, -2], Xy[:, -1]
             red_idx = color == "red"
             X, y = X[red_idx, :], y[red_idx]
             y = (y > 5).astype(int)
         elif name == "wine-q-white":
             X, y, color = data["X"], data["y"], data["color"]
-            # X, y, color = Xy[:, :-2], Xy[:, -2], Xy[:, -1]
             white_idx = color == "white"
             X, y = X[white_idx, :], y[white_idx]
             y = (y > 5).astype(int)
         else:
             X, y = data["X"], data["y"]
-            # X, y = Xy[:, :-1], Xy[:, -1]
 
         y = binarize(y, pos_class=pos_class[name])
 
diff --git a/quapy/method/_threshold_optim.py b/quapy/method/_threshold_optim.py
index 2c3a68c..628f01a 100644
--- a/quapy/method/_threshold_optim.py
+++ b/quapy/method/_threshold_optim.py
@@ -34,7 +34,7 @@ class ThresholdOptimization(BinaryAggregativeQuantifier):
     """
 
     def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=None, n_jobs=None):
-        super.__init__(classifier, fit_classifier, val_split)
+        super().__init__(classifier, fit_classifier, val_split)
         self.n_jobs = qp._get_njobs(n_jobs)
 
     @abstractmethod
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index cda6294..0be9fb1 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -717,7 +717,7 @@ class EMQ(AggregativeSoftQuantifier):
         super().__init__(classifier, fit_classifier, val_split)
         self.exact_train_prev = exact_train_prev
         self.calib = calib
-        self.on_calib_errors = on_calib_error
+        self.on_calib_error = on_calib_error
         self.n_jobs = n_jobs
 
     @classmethod
@@ -790,9 +790,9 @@ class EMQ(AggregativeSoftQuantifier):
         try:
             self.calibration_function = calibrator(P, np.eye(n_classes)[y], posterior_supplied=True)
         except Exception as e:
-            if self.on_calib_errors == 'raise':
+            if self.on_calib_error == 'raise':
                 raise RuntimeError(f'calibration {self.calib} failed at fit time: {e}')
-            elif self.on_calib_errors == 'backup':
+            elif self.on_calib_error == 'backup':
                 self.calibration_function = lambda P: P
 
     def _calibrate_if_requested(self, uncalib_posteriors):
@@ -800,12 +800,12 @@ class EMQ(AggregativeSoftQuantifier):
             try:
                 calib_posteriors = self.calibration_function(uncalib_posteriors)
             except Exception as e:
-                if self.on_calib_errors == 'raise':
+                if self.on_calib_error == 'raise':
                     raise RuntimeError(f'calibration {self.calib} failed at predict time: {e}')
-                elif self.on_calib_errors == 'backup':
+                elif self.on_calib_error == 'backup':
                     calib_posteriors = uncalib_posteriors
                 else:
-                    raise ValueError(f'unexpected {self.on_calib_errors=}; '
+                    raise ValueError(f'unexpected {self.on_calib_error=}; '
                                      f'valid options are {EMQ.ON_CALIB_ERROR_VALUES}')
             return calib_posteriors
         return uncalib_posteriors
diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py
index 77660f1..f68f956 100644
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@@ -450,8 +450,17 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
 
     :param classifier: a scikit-learn's BaseEstimator, or None, in which case the classifier is taken to be
         the one indicated in `qp.environ['DEFAULT_CLS']`
-    :param val_split: a float in (0, 1) indicating the proportion of the training data to be used,
-        as a stratified held-out validation set, for generating classifier predictions.
+    :param fit_classifier: whether to train the learner (default is True). Set to False if the
+        learner has been trained outside the quantifier.
+    :param val_split: specifies the data used for generating classifier predictions. This specification
+        can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to
+        be extracted from the training set; or as an integer (default 5), indicating that the predictions
+        are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
+        for `k`); or as a tuple (X,y) defining the specific set of data to use for validation.
+        This hyperparameter is only meant to be used when the heuristics are to be applied, i.e., if a
+        calibration is required. The default value is None (meaning the calibration is not required). In
+        case this hyperparameter is set to a value other than None, but the calibration is not required
+        (calib=None), a warning message will be raised.
     :param num_warmup: number of warmup iterations for the MCMC sampler (default 500)
     :param num_samples: number of samples to draw from the posterior (default 1000)
     :param mcmc_seed: random seed for the MCMC sampler (default 0)
@@ -462,6 +471,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
     """
     def __init__(self,
                  classifier: BaseEstimator=None,
+                 fit_classifier=True,
                  val_split: int = 5,
                  num_warmup: int = 500,
                  num_samples: int = 1_000,
@@ -480,8 +490,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
         if _bayesian.DEPENDENCIES_INSTALLED is False:
             raise ImportError("Auxiliary dependencies are required. Run `$ pip install quapy[bayes]` to install them.")
 
-        self.classifier = qp._get_classifier(classifier)
-        self.val_split = val_split
+        super().__init__(classifier, fit_classifier, val_split)
         self.num_warmup = num_warmup
         self.num_samples = num_samples
         self.mcmc_seed = mcmc_seed
diff --git a/quapy/tests/test_datasets.py b/quapy/tests/test_datasets.py
index 63c6ef8..a1910d5 100644
--- a/quapy/tests/test_datasets.py
+++ b/quapy/tests/test_datasets.py
@@ -106,7 +106,6 @@ class TestDatasets(unittest.TestCase):
             self._check_samples(gen_val, q, max_samples_test=5, vectorizer=tfidf)
             self._check_samples(gen_test, q, max_samples_test=5, vectorizer=tfidf)
 
-
     def test_IFCB(self):
         if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
             print("omitting test_IFCB because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")
diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py
index aa609bc..c2931b9 100644
--- a/quapy/tests/test_methods.py
+++ b/quapy/tests/test_methods.py
@@ -64,7 +64,7 @@ class TestMethods(unittest.TestCase):
 
                 q = model()
                 print(f'testing {q} on dataset {dataset.name}')
-                q.fit(dataset.training)
+                q.fit(*dataset.training.Xy)
                 estim_prevalences = q.predict(dataset.test.X)
                 self.assertTrue(check_prevalence_vector(estim_prevalences))