broken submodule fix

2026-02-12 13:14:59 +01:00 · 2026-02-12 13:14:59 +01:00 · bb5763da76
parent 81472b9d25
commit bb5763da76
17 changed files with 969 additions and 210 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,3 @@
 [submodule "result_table"]
 	path = result_table
 	url = gitea@gitea-s2i2s.isti.cnr.it:moreo/result_table.git
--- a/BayesianKDEy/TODO.txt
+++ b/BayesianKDEy/TODO.txt
@ -1,4 +1,5 @@
 - Things to try:
    - Why not optmize the calibration of the classifier, instead of the classifier as a component of the quantifier?
    - init chain helps? [seems irrelevant in MAPLS...]
    - Aitchison kernel is better?
    - other classifiers?
--- a/BayesianKDEy/commons.py
+++ b/BayesianKDEy/commons.py
@ -101,6 +101,13 @@ class KDEyCLR(KDEyML):
            random_state=random_state, kernel='aitchison'
        )
 class KDEyCLR2(KDEyML):
    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
        super().__init__(
            classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
            random_state=random_state, kernel='aitchison'
        )
 class KDEyILR(KDEyML):
    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
--- a/BayesianKDEy/datasets.py
+++ b/BayesianKDEy/datasets.py
@ -180,7 +180,7 @@ class VisualDataHandler(DatasetHandler):
    @classmethod
    def get_datasets(cls):
-        datasets = ['cifar10', 'mnist', 'cifar100coarse', 'fashionmnist', 'svhn'] #+ ['cifar100']
+        datasets = ['cifar100coarse', 'cifar10', 'mnist', 'fashionmnist', 'svhn'] #+ ['cifar100']
        # datasets_feat = [f'{d}-f' for d in datasets]
        datasets_feat = [f'{d}-l' for d in datasets]
        return datasets_feat # + datasets
@ -295,7 +295,7 @@ class UCIDatasetHandler(DatasetHandler, ABC):
 class UCIMulticlassHandler(UCIDatasetHandler):
-    DATASETS = [d for d in qp.datasets.UCI_MULTICLASS_DATASETS if d not in frozenset(['hcv', 'poker_hand'])]
+    DATASETS = sorted([d for d in qp.datasets.UCI_MULTICLASS_DATASETS if d not in frozenset(['hcv', 'poker_hand'])])
    def __init__(self, name, n_val_samples=100, n_test_samples=100, sample_size=1000, random_state=0):
        super().__init__(name, n_val_samples, n_test_samples, sample_size, random_state)
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@ -4,6 +4,7 @@ from sklearn.linear_model import LogisticRegression
 from copy import deepcopy as cp
 import quapy as qp
 from BayesianKDEy.commons import KDEyReduce
 from BayesianKDEy.methods import get_experimental_methods, MethodDescriptor
 from _bayeisan_kdey import BayesianKDEy
 from _bayesian_mapls import BayesianMAPLS
 from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors, KDEyScaledB, KDEyFresh
@ -24,7 +25,7 @@ from time import time
-def methods(data_handler: DatasetHandler):
+def methods___depr():
    """
    Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
    - name: is a str representing the name of the method (e.g., 'BayesianKDEy')
@ -33,26 +34,23 @@ def methods(data_handler: DatasetHandler):
    - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
        quantifier with optimized hyperparameters
    """
-    if False: #  isinstance(data_handler, VisualDataHandler):
+
        Cls = MockClassifierFromPosteriors
        cls_hyper = {}
        val_split = data_handler.get_validation().Xy  # use this specific collection
        pass
    else:
    Cls = LogisticRegression
    cls_hyper = {'classifier__C': np.logspace(-4,4,9), 'classifier__class_weight': ['balanced', None]}
    val_split = 5  # k-fold cross-validation
    cc_hyper = cls_hyper
    acc_hyper = cls_hyper
    # emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs'], **cls_hyper}
    hdy_hyper = {'nbins': [3,4,5,8,16,32], **cls_hyper}
    kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper}
    kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper}
-
+    band ={'bandwidth':np.logspace(-3,-1,10)}
    multiclass_method = 'multiclass'
    only_binary = 'only_binary'
    only_multiclass = 'only_multiclass'
    # surrogate quantifiers
    cc = CC(Cls())
    acc = ACC(Cls(), val_split=val_split)
    hdy = DMy(Cls(), val_split=val_split)
    kde_gau = KDEyML(Cls(), val_split=val_split)
@ -65,22 +63,26 @@ def methods(data_handler: DatasetHandler):
    # Bootstrap approaches:
    # --------------------------------------------------------------------------------------------------------
    # yield 'BootstrapCC', cc, cc_hyper, lambda hyper: AggregativeBootstrap(CC(Cls()), n_test_samples=1000, random_state=0), multiclass_method
    #yield 'BootstrapACC', acc, acc_hyper, lambda hyper: _AggregativeBootstrap(ACC(Cls()), n_test_samples=1000, random_state=0), multiclass_method
    #yield 'BootstrapEMQ', emq, on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: _AggregativeBootstrap(EMQ(Cls(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method
    #yield 'BootstrapHDy', hdy, hdy_hyper, lambda hyper: _AggregativeBootstrap(DMy(Cls(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
    #yield 'BootstrapKDEy', kde_gau, kdey_hyper, lambda hyper: _AggregativeBootstrap(KDEyML(Cls(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
-    # Bayesian approaches:
+    # Bayesian approaches: (*=temp calibration auto threshold and coverage sim to nominal; +=temp calibration w/o amplitude coverage, for winkler criterion, !=same but alpha=0.005 for winkler)
    # --------------------------------------------------------------------------------------------------------
    # yield 'BayesianACC', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, mcmc_seed=0), multiclass_method
    # yield 'BayesianACC*', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, temperature=None, mcmc_seed=0), multiclass_method
    # yield 'BayesianACC+', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, temperature=None, mcmc_seed=0), multiclass_method
    # yield 'BayesianACC!', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, temperature=None, mcmc_seed=0), multiclass_method
    #yield 'BayesianHDy', hdy, hdy_hyper, lambda hyper: PQ(Cls(), val_split=val_split, stan_seed=0, **hyper), only_binary
    # yield f'BaKDE-Ait-numpyro', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(), kernel='aitchison', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
    #yield f'BaKDE-Gau-numpyro', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
    #yield f'BaKDE-Gau-scale', kde_gau_scale, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
-    yield f'BaKDE-Gau-pca5', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
+    #yield f'BaKDE-Gau-pca5', kde_gau_pca, band, lambda hyper: BayesianKDEy(Cls(), reduce=5, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
-    yield f'BaKDE-Gau-pca5*', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
+    #yield f'BaKDE-Gau-pca5*', kde_gau_pca, band, lambda hyper: BayesianKDEy(Cls(), reduce=5, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
-    yield f'BaKDE-Gau-pca10', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
+    #yield f'BaKDE-Gau-pca10', kde_gau_pca10, band, lambda hyper: BayesianKDEy(Cls(), reduce=10, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
-    yield f'BaKDE-Gau-pca10*', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
+    #yield f'BaKDE-Gau-pca10*', kde_gau_pca10, band, lambda hyper: BayesianKDEy(Cls(), reduce=10, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
    # yield f'BaKDE-Gau-H0', KDEyFresh(Cls(), bandwidth=0.4), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=0.4, kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
    # yield f'BaKDE-Gau-H1', KDEyFresh(Cls(), bandwidth=1.), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1., kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
    # yield f'BaKDE-Gau-H2', KDEyFresh(Cls(), bandwidth=1.5), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1.5,
@ -89,19 +91,21 @@ def methods(data_handler: DatasetHandler):
    #                                                                                          engine='numpyro',
    #                                                                                          **hyper), multiclass_method
    # yield f'BaKDE-Ait-T*', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
    # yield f'BaKDE-Ait-T!', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
    #yield f'BaKDE-Gau-T*', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
    #yield f'BaKDE-Gau-T!', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
    # yield 'BayEMQ', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=1, exact_train_prev=False, val_split=val_split), multiclass_method
    # yield 'BayEMQ*', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=None, exact_train_prev=False, val_split=val_split), multiclass_method
    # yield 'BayEMQ!', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=None, exact_train_prev=False, val_split=val_split), multiclass_method
    # yield 'BaEMQ', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(**{k.replace('classifier__', ''): v for k, v in hyper.items()}), prior='uniform', temperature=1, exact_train_prev=False, val_split=val_split), multiclass_method
    # yield 'BaACC!', acc, acc_hyper, lambda hyper: BayesianCC(Cls(**{k.replace('classifier__', ''): v for k, v in hyper.items()}), temperature=None, mcmc_seed=0), multiclass_method
    # yield 'BaEMQ!', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(**{k.replace('classifier__', ''): v for k, v in hyper.items()}), prior='uniform', temperature=None, exact_train_prev=False), multiclass_method
 def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict):
    with qp.util.temp_seed(0):
-        if isinstance(point_quantifier, KDEyScaledB) and 'bandwidth' in grid:
+        point_quantifier = cp(point_quantifier)
            def scale_bandwidth(bandwidth, n_classes, beta=0.5):            
                return bandwidth * np.power(n_classes, beta)
            n = dataset.get_training().n_classes
            grid['bandwidth'] = [scale_bandwidth(b, n) for b in grid['bandwidth']]
            print('bandwidth scaled')
        print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
        # model selection
        if len(grid)>0:
@ -127,30 +131,26 @@ def temperature_calibration(dataset: DatasetHandler, uncertainty_quantifier):
        if uncertainty_quantifier.temperature is None:
            print('calibrating temperature')
            train, val_prot = dataset.get_train_valprot_for_modsel()
-            if dataset.name.startswith('LeQua'):            
+            temp_grid=[1., .5, 1.5, 2., 5., 10., 100., 1000.]
-                temp_grid=[100., 500, 1000, 5_000, 10_000, 50_000]
+            temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1, amplitude_threshold=1., criterion='winkler')
            else:
                temp_grid=[.5, 1., 1.5, 2., 5., 10., 100., 1000.]
            temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1, amplitude_threshold=.999)
            uncertainty_quantifier.temperature = temperature
        else:
            temperature = uncertainty_quantifier.temperature
    return temperature
-
+def experiment(dataset: DatasetHandler, method: MethodDescriptor, hyper_choice_path: Path):
 def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, method_name:str, grid: dict, uncertainty_quant_constructor, hyper_choice_path: Path):
    with qp.util.temp_seed(0):
        # model selection
        best_hyperparams = qp.util.pickled_resource(
-            hyper_choice_path, model_selection, dataset, cp(point_quantifier), grid
+            hyper_choice_path, model_selection, dataset, method.surrogate_quantifier(), method.hyper_parameters
        )
        print(f'{best_hyperparams=}')
        t_init = time()
-        uncertainty_quantifier = uncertainty_quant_constructor(best_hyperparams)
+        uncertainty_quantifier = method.uncertainty_aware_quantifier(best_hyperparams)
        temperature = temperature_calibration(dataset, uncertainty_quantifier)
        training, test_generator = dataset.get_train_testprot_for_eval()
        uncertainty_quantifier.fit(*training.Xy)
@ -176,7 +176,13 @@ def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier,
            results['test-time'].append(ttime)
            results['samples'].append(region.samples)
-            pbar.set_description(f'{method_name} MAE={np.mean(results["ae"]):.5f} W={np.mean(results["sre"]):.5f} Cov={np.mean(results["coverage"]):.5f} AMP={np.mean(results["amplitude"]):.5f}')
+            pbar.set_description(
                f'{method.name} '
                f'MAE={np.mean(results["ae"]):.5f} '
                f'W={np.mean(results["sre"]):.5f} '
                f'Cov={np.mean(results["coverage"]):.5f} '
                f'AMP={np.mean(results["amplitude"]):.5f}'
            )
        report = {
            'optim_hyper': best_hyperparams,
@ -189,40 +195,31 @@ def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier,
        return report
 def check_skip_experiment(method_scope, dataset: DatasetHandler):
    if method_scope == 'only_binary' and not dataset.is_binary():
        return True
    if method_scope == 'only_multiclass' and dataset.is_binary():
        return True
    return False
 if __name__ == '__main__':
    result_dir = RESULT_DIR
-    for data_handler in [CIFAR100Handler]:#, UCIMulticlassHandler,LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
+    for data_handler in [LeQuaHandler]:#, UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
        for dataset in data_handler.iter():
            qp.environ['SAMPLE_SIZE'] = dataset.sample_size
            print(f'dataset={dataset.name}')
            #if dataset.name != 'abalone':
            #    continue
            problem_type = 'binary' if dataset.is_binary() else 'multiclass'
-            for method_name, surrogate_quant, hyper_params, withconf_constructor, method_scope in methods(dataset):
+            for method in get_experimental_methods():
-                if check_skip_experiment(method_scope, dataset):
+                # skip combination?
                if method.binary_only() and not dataset.is_binary():
                    continue
-                result_path = experiment_path(result_dir / problem_type, dataset.name, method_name)
+                result_path = experiment_path(result_dir / problem_type, dataset.name, method.name)
-                hyper_path  = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name, surrogate_quant.__class__.__name__)
+                hyper_path  = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name, method.surrogate_quantifier_name())
                report = qp.util.pickled_resource(
-                    result_path, experiment, dataset, surrogate_quant, method_name, hyper_params, withconf_constructor, hyper_path
+                    result_path, experiment, dataset, method, hyper_path
                )
                print(f'dataset={dataset.name}, '
-                      f'method={method_name}: '
+                      f'method={method.name}: '
                      f'mae={report["results"]["ae"].mean():.5f}, '
                      f'W={report["results"]["sre"].mean():.5f}, '
                      f'coverage={report["results"]["coverage"].mean():.5f}, '
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@ -1,6 +1,7 @@
 import pickle
 from collections import defaultdict
-
+import matplotlib.pyplot as plt
 import seaborn as sns
 from joblib import Parallel, delayed
 from tqdm import tqdm
 import pandas as pd
@ -9,10 +10,16 @@ from pathlib import Path
 import quapy as qp
 from BayesianKDEy.commons import RESULT_DIR
 from BayesianKDEy.datasets import LeQuaHandler, UCIMulticlassHandler, VisualDataHandler, CIFAR100Handler
 from comparison_group import SelectGreaterThan, SelectByName, SelectSmallerThan
 from format import FormatModifierSelectColor
 from quapy.error import dist_aitchison
-from quapy.method.confidence import ConfidenceIntervals
+from quapy.method.confidence import ConfidenceIntervals, ConfidenceIntervalsILR, ConfidenceIntervalsCLR
 from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
 import quapy.functional as F
 from result_path.src.table import LatexTable
 import numpy as np
 import pandas as pd
 from itertools import chain
 pd.set_option('display.max_columns', None)
 pd.set_option('display.width', 2000)
@ -23,15 +30,12 @@ pd.set_option("display.float_format", "{:.4f}".format)
 # methods = None  # show all methods
-methods = ['BayesianACC',
+methods = ['BoCC',
-           'BaKDE-Ait-numpyro',
+           'BaACC!',
-           'BaKDE-Ait-T*',
+           'BaEMQ!',
-           'BaKDE-Gau-numpyro',
+           'BaKDE-Gau-T!',
-           'BaKDE-Gau-T*', 'BaKDE-Gau-pca5', 'BaKDE-Gau-pca5*', 'BaKDE-Gau-pca10', 'BaKDE-Gau-pca10*',
+           'BaKDE-Ait-T!',
-           # 'BayEMQ-U-Temp1-2',
+           'BaKDE-Ait-T!2'
           # 'BayEMQ-T*',
           'BayEMQ',
           'BayEMQ*',
           #'BootstrapACC',
           #'BootstrapHDy',
           #'BootstrapKDEy',
@ -55,11 +59,15 @@ def compute_coverage_amplitude(region_constructor, **kwargs):
    def process_one(samples, true_prevs):
        region = region_constructor(samples, **kwargs)
-        if isinstance(region, ConfidenceIntervals):
+        if isinstance(region, ConfidenceIntervals) or isinstance(region, ConfidenceIntervalsCLR) or isinstance(region, ConfidenceIntervalsILR):
            winkler = region.mean_winkler_score(true_prevs)
            # winkler_e = region.mean_winkler_score(true_prevs, add_ae=True)
            cov_soft = region.coverage_soft(true_prevs)
        else:
            winkler = None
-        return region.coverage(true_prevs), region.montecarlo_proportion(), winkler
+            # winkler_e = None
            cov_soft = None
        return region.coverage(true_prevs), region.montecarlo_proportion(), winkler, cov_soft
    out = Parallel(n_jobs=3)(
        delayed(process_one)(samples, true_prevs)
@ -71,8 +79,8 @@ def compute_coverage_amplitude(region_constructor, **kwargs):
    )
    # unzip results
-    coverage, amplitude, winkler = zip(*out)
+    coverage, amplitude, winkler, cov_soft = zip(*out)
-    return list(coverage), list(amplitude), list(winkler)
+    return list(coverage), list(amplitude), list(winkler), list(cov_soft)
 def update_pickle(report, pickle_path, updated_dict:dict):
@ -83,29 +91,145 @@ def update_pickle(report, pickle_path, updated_dict:dict):
 def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwargs):
    if f'coverage-{conf_name}' not in report:
-        covs, amps, winkler = compute_coverage_amplitude(conf_region_class, **kwargs)
+        covs, amps, winkler, cov_soft = compute_coverage_amplitude(conf_region_class, **kwargs)
        # amperr (lower is better) counts the amplitude when the true vale was covered, or 1 (max amplitude) otherwise
        amperrs = [amp if cov == 1.0 else 1. for amp, cov in zip(amps, covs)]
        update_fields = {
            f'coverage-{conf_name}': covs,
            f'amplitude-{conf_name}': amps,
            f'winkler-{conf_name}': winkler,
-            f'amperr-{conf_name}': amperrs,
+            f'coverage-soft-{conf_name}': cov_soft
        }
        update_pickle(report, file, update_fields)
 def pareto_front(df, x_col, y_col, maximize_y=True, minimize_x=True):
    """
    Returns a boolean mask indicating whether each row is Pareto-optimal.
    """
    X = df[x_col].values
    Y = df[y_col].values
-def nicer(name:str):
+    is_pareto = np.ones(len(df), dtype=bool)
    for i in range(len(df)):
        if not is_pareto[i]:
            continue
        for j in range(len(df)):
            if i == j:
                continue
            better_or_equal_x = X[j] <= X[i] if minimize_x else X[j] >= X[i]
            better_or_equal_y = Y[j] >= Y[i] if maximize_y else Y[j] <= Y[i]
            strictly_better = (
                (X[j] < X[i] if minimize_x else X[j] > X[i]) or
                (Y[j] > Y[i] if maximize_y else Y[j] < Y[i])
            )
            if better_or_equal_x and better_or_equal_y and strictly_better:
                is_pareto[i] = False
                break
    return is_pareto
 def plot_coverage_vs_amplitude(
    df,
    coverage_col,
    amplitude_col="a-CI",
    method_col="method",
    dataset_col=None,
    error_col=None,
    error_threshold=None,
    nominal_coverage=0.95,
    title=None,
 ):
    df_plot = df.copy()
    # Optional error filtering
    if error_col is not None and error_threshold is not None:
        df_plot = df_plot[df_plot[error_col] <= error_threshold]
    # Compute Pareto front
    pareto_mask = pareto_front(
        df_plot,
        x_col=amplitude_col,
        y_col=coverage_col,
        maximize_y=True,
        minimize_x=True
    )
    plt.figure(figsize=(7, 6))
    # Base scatter
    sns.scatterplot(
        data=df_plot,
        x=amplitude_col,
        y=coverage_col,
        hue=method_col,
        # style=dataset_col,
        alpha=0.6,
        s=60,
        legend=True
    )
    # Highlight Pareto front
    plt.scatter(
        df_plot.loc[pareto_mask, amplitude_col],
        df_plot.loc[pareto_mask, coverage_col],
        facecolors='none',
        edgecolors='black',
        s=120,
        linewidths=1.5,
        label="Pareto front"
    )
    # Nominal coverage line
    plt.axhline(
        nominal_coverage,
        linestyle="--",
        color="gray",
        linewidth=1,
        label="Nominal coverage"
    )
    plt.xlabel("Amplitude (fraction of simplex)")
    plt.ylabel("Coverage")
    plt.ylim(0, 1.05)
    if title is not None:
        plt.title(title)
    plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
    plt.tight_layout()
    plt.show()
 def nicer_method(name:str):
    replacements = {
-        'Bayesian': 'Ba',
+        # 'Bayesian': 'Ba',
        'Bootstrap': 'Bo',
        '-numpyro': '',
        'emcee': 'emc',
-        '-T*': '*'
+        '-T*': '*',
        '-T!': '',
        '!': '',
        '-Ait': r'$^{(\mathrm{Ait})}$',
        '-Gau': r'$^{(\mathrm{Gau})}$'
    }
    for k, v in replacements.items():
        name = name.replace(k,v)
    return name
 def nicer_data(name:str):
    replacements = {
        'cifar': 'CIFAR',
        '-l': '',
        'mnist': 'MNIST',
        'fashionmnist': 'fashionMNIST',
        'svhn': 'SVHN',
        '100coarse': '100(20)',
    }
    for k, v in replacements.items():
        name = name.replace(k, v)
@ -119,61 +243,55 @@ n_classes = {}
 tr_size   = {}
 tr_prev   = {}
-for dataset_handler in [UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
+dataset_class = [UCIMulticlassHandler, CIFAR100Handler, VisualDataHandler, LeQuaHandler]
-    problem_type = 'binary' if dataset_handler.is_binary() else 'multiclass'
+dataset_order = []
 for handler in dataset_class:
    for dataset in handler.iter():
        dataset_order.append(dataset.name)
        train = dataset.get_training()
        n_classes[dataset.name] = train.n_classes
        tr_size[dataset.name] = len(train)
        tr_prev[dataset.name] = F.strprev(train.prevalence())
 problem_type = 'multiclass'
 path = f'./{base_dir}/{problem_type}/*.pkl'
 for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
    file = Path(file)
    dataset, method = file.name.replace('.pkl', '').split('__')
-        if (method not in methods) or (dataset not in dataset_handler.get_datasets()):
+    if (method not in methods) or (dataset not in dataset_order):
        continue
    report = pickle.load(open(file, 'rb'))
    results = report['results']
    n_samples = len(results['ae'])
-        table['method'].extend([nicer(method)] * n_samples)
+    table['method'].extend([nicer_method(method)] * n_samples)
-        table['dataset'].extend([dataset] * n_samples)
+    table['dataset'].extend([nicer_data(dataset)] * n_samples)
    table['ae'].extend(results['ae'])
    table['rae'].extend(results['rae'])
    # table['c-CI'].extend(results['coverage'])
    # table['a-CI'].extend(results['amplitude'])
    # update_pickle_with_region(report, file, conf_name='CI-ILR', conf_region_class=ConfidenceIntervalsILR, bonferroni_correction=True)
    # update_pickle_with_region(report, file, conf_name='CI-CLR', conf_region_class=ConfidenceIntervalsCLR, bonferroni_correction=True)
    update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True)
    update_pickle_with_region(report, file, conf_name='CInb', conf_region_class=ConfidenceIntervals, bonferroni_correction=False)  # no Bonferroni-correction
    # update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
    # update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
    # update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
-        table['c-CI'].extend(report['coverage-CI'])
+    conf_bonferroni = 'CI'
-        table['a-CI'].extend(report['amplitude-CI'])
+    conf_name='CInb'
-        table['w-CI'].extend(report['winkler-CI'])
+    table['c-CI'].extend(report[f'coverage-{conf_bonferroni}'])  # the true coverage is better measured with Bonferroni-correction
-        table['amperr-CI'].extend(report['amperr-CI'])
+    table['w-CI'].extend(report[f'winkler-{conf_name}'])
    table['cs-CI'].extend(report[f'coverage-soft-{conf_name}'])
    table['a-CI'].extend(report[f'amplitude-{conf_name}'])
-        # table['c-CE'].extend(report['coverage-CE'])
+    # table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim']))  # not in this paper...
        # table['a-CE'].extend(report['amplitude-CE'])
        # table['amperr-CE'].extend(report['amperr-CE'])
        # table['c-CLR'].extend(report['coverage-CLR'])
        # table['a-CLR'].extend(report['amplitude-CLR'])
        # table['amperr-CLR'].extend(report['amperr-CLR'])
        # table['c-ILR'].extend(report['coverage-ILR'])
        # table['a-ILR'].extend(report['amplitude-ILR'])
        # table['amperr-ILR'].extend(report['amperr-ILR'])
        table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim']))
    table['SRE'].extend(qp.error.sre(results['true-prevs'], results['point-estim'], report['train-prev'], eps=0.001))
        # table['aitch-well'].extend(qp.error.dist_aitchison(results['true-prevs'], [ConfidenceEllipseILR(samples).mean_ for samples in results['samples']]))
        # table['aitch'].extend()
        # table['reg-score-ILR'].extend(
        #     [region_score(true_prev, ConfidenceEllipseILR(samples)) for true_prev, samples in zip(results['true-prevs'], results['samples'])]
        # )
-    for dataset in dataset_handler.iter():
+
        train = dataset.get_training()
        n_classes[dataset.name] = train.n_classes
        tr_size[dataset.name] = len(train)
        tr_prev[dataset.name] = F.strprev(train.prevalence())
 # remove datasets with more than max_classes classes
 # max_classes = 25
@ -190,19 +308,100 @@ for dataset_handler in [UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, C
 #         df = df[df["dataset"] != data_name]
 df = pd.DataFrame(table)
 df['a-CI'] *= 100
 df['c-CI'] *= 100
 df['cs-CI'] *= 100
 for region in ['CI']: #, 'CLR', 'ILR', 'CI']:
    if problem_type == 'binary' and region=='ILR':
        continue
-    for column in [f'a-{region}', f'c-{region}', 'ae', 'SRE']:
+    for column in [f'a-{region}', 'ae', 'SRE', f'c-{region}', f'cs-{region}']: # f'w-{region}'
        pv = pd.pivot_table(
            df, index='dataset', columns='method', values=column, margins=True
        )
        pv['n_classes'] = pv.index.map(n_classes).astype('Int64')
        pv['tr_size'] = pv.index.map(tr_size).astype('Int64')
        #pv['tr-prev'] = pv.index.map(tr_prev)
-        pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"])
+        pv = pv.drop(columns=[col for col in pv.columns if col == "All" or col[-1]=='All'])
        print(f'{problem_type=} {column=}')
        print(pv)
        print('-'*80)
        latex = LatexTable.from_dataframe(df, method='method', benchmark='dataset', value=column, name=column)
        latex.format.configuration.show_std = False
        #latex.reorder_methods([nicer_method(m) for m in methods])
        latex.reorder_benchmarks([nicer_data(d) for d in dataset_order])
        if column in ['ae', 'SRE']:
            latex.format.configuration.lower_is_better = True
            latex.format.configuration.stat_test = 'wilcoxon'
            #latex.format.configuration.stat_test = None
            # latex.format.configuration.show_std = True
        if column in [f'c-{region}', f'cs-{region}']:
            latex.format.configuration.lower_is_better = False
            latex.format.configuration.stat_test = None
            latex.format.configuration.with_color = False
            latex.format.configuration.best_in_bold = False
            latex.format.configuration.with_rank = False
            latex.format.configuration.mean_prec = 0
            latex.add_format_modifier(
                format_modifier=FormatModifierSelectColor(
                    comparison=SelectGreaterThan(reference_selector=89, input_selector=SelectByName())
                )
            )
        if column in [f'a-{region}']:
            latex.format.configuration.lower_is_better = True
            latex.format.configuration.stat_test = None
            latex.format.configuration.with_color = False
            latex.format.configuration.best_in_bold = False
            latex.format.configuration.mean_prec = 2
            latex.add_format_modifier(
                format_modifier=FormatModifierSelectColor(
                    comparison=SelectSmallerThan(reference_selector=11, input_selector=SelectByName())
                )
            )
            # latex.add_format_modifier(
            #     format_modifier=FormatModifierSelectColor(
            #         comparison=SelectSmallerThan(reference_selector=0.01, input_selector=SelectByName()),
            #         intensity=50
            #     )
            # )
        latex.format.configuration.resizebox=.5
        latex.latexPDF(pdf_path=f'./tables/{latex.name}.pdf')
 df = df[df['method']!='BaACC']
 df = df[df['method']!='BaACC*']
 df = df[df['method']!='BaACC+']
 df = df[df['method']!='BaKDE-Ait*']
 df = df[df['method']!='BaKDE-Gau*']
 df = df[df['method']!='BayEMQ*']
 grouped = df.groupby(["method", "dataset"])
 agg = grouped.agg(
    ae_mean=("ae", "mean"),
    ae_std=("ae", "std"),
    sre_mean=("SRE", "mean"),
    sre_std=("SRE", "std"),
    coverage_mean=("c-CI", "mean"),
    coverage_std=("c-CI", "std"),
    coverage_soft_mean=("cs-CI", "mean"),
    amplitude_mean=("a-CI", "mean"),
    amplitude_std=("a-CI", "std"),
 ).reset_index()
 #plot_coverage_vs_amplitude(
 #    agg,
 #    coverage_col="coverage_soft_mean",
 #    amplitude_col="amplitude_mean",
 #    method_col="method",
 #    dataset_col="dataset",
 #    nominal_coverage=0.95,
 #    title="Marginal coverage vs amplitude"
 #)
 #print('RESTITUIR EL WILCOXON')
--- a/BayesianKDEy/map_experiments.py
+++ b/BayesianKDEy/map_experiments.py
@ -0,0 +1,169 @@
 import os.path
 from pathlib import Path
 import pandas as pd
 from sklearn.linear_model import LogisticRegression
 from copy import deepcopy as cp
 import quapy as qp
 from BayesianKDEy.commons import KDEyReduce
 from _bayeisan_kdey import BayesianKDEy
 from _bayesian_mapls import BayesianMAPLS
 from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors, KDEyScaledB, KDEyFresh
 # import datasets
 from datasets import LeQuaHandler, UCIMulticlassHandler, DatasetHandler, VisualDataHandler, CIFAR100Handler
 from method.confidence import ConfidenceIntervals
 from temperature_calibration import temp_calibration
 from build.lib.quapy.data import LabelledCollection
 from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ, CC
 from quapy.model_selection import GridSearchQ
 from quapy.data import Dataset
 from quapy.method.confidence import BayesianCC, AggregativeBootstrap
 from quapy.method.aggregative import KDEyML, ACC
 from quapy.protocol import UPP
 import numpy as np
 from tqdm import tqdm
 from collections import defaultdict
 from time import time
 def methods(data_handler: DatasetHandler):
    """
    Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
    - name: is a str representing the name of the method (e.g., 'BayesianKDEy')
    - quantifier: is the base model (e.g., KDEyML())
    - hyperparams: is a dictionary for the quantifier (e.g., {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]})
    - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
        quantifier with optimized hyperparameters
    """
    if isinstance(data_handler, VisualDataHandler):
        Cls = LogisticRegression
        cls_hyper = {}
    else:
        Cls = LogisticRegression
        cls_hyper = {'classifier__C': np.logspace(-4, 4, 9), 'classifier__class_weight': ['balanced', None]}
    kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper}
    kdey_hyper_larger = {'bandwidth': np.logspace(-1, 0, 10), **cls_hyper}
    kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper}
    # surrogate quantifiers
    kde_gau_scale = KDEyScaledB(Cls())
    yield 'KDEy-G-exp', kdey_hyper, KDEyML(Cls())
    # yield 'KDEy-G-exp2', kdey_hyper_larger, KDEyML(Cls())
    # yield 'KDEy-G-log', kdey_hyper, KDEyML(Cls(), logdensities=True)
    yield 'KDEy-Ait', kdey_hyper_clr, KDEyCLR(Cls())
 def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict):
    with qp.util.temp_seed(0):
        if isinstance(point_quantifier, KDEyScaledB) and 'bandwidth' in grid:
            def scale_bandwidth(bandwidth, n_classes, beta=0.5):
                return bandwidth * np.power(n_classes, beta)
            n = dataset.get_training().n_classes
            grid['bandwidth'] = [scale_bandwidth(b, n) for b in grid['bandwidth']]
            print('bandwidth scaled')
        print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
        # model selection
        if len(grid) > 0:
            train, val_prot = dataset.get_train_valprot_for_modsel()
            mod_sel = GridSearchQ(
                model=point_quantifier,
                param_grid=grid,
                protocol=val_prot,
                refit=False,
                n_jobs=-1,
                verbose=True
            ).fit(*train.Xy)
            best_params = mod_sel.best_params_
        else:
            best_params = {}
        return best_params
 def experiment(dataset: DatasetHandler,
               point_quantifier: AggregativeQuantifier,
               method_name: str,
               grid: dict,
               hyper_choice_path: Path):
    with qp.util.temp_seed(0):
        # model selection
        best_hyperparams = qp.util.pickled_resource(
            hyper_choice_path, model_selection, dataset, cp(point_quantifier), grid
        )
        print(f'{best_hyperparams=}')
        t_init = time()
        training, test_generator = dataset.get_train_testprot_for_eval()
        point_quantifier.fit(*training.Xy)
        tr_time = time() - t_init
        # test
        train_prevalence = training.prevalence()
        results = defaultdict(list)
        pbar = tqdm(enumerate(test_generator()), total=test_generator.total())
        for i, (sample_X, true_prevalence) in pbar:
            t_init = time()
            point_estimate = point_quantifier.predict(sample_X)
            ttime = time() - t_init
            results['true-prevs'].append(true_prevalence)
            results['point-estim'].append(point_estimate)
            results['shift'].append(qp.error.ae(true_prevalence, train_prevalence))
            results['ae'].append(qp.error.ae(prevs_true=true_prevalence, prevs_hat=point_estimate))
            results['rae'].append(qp.error.rae(prevs_true=true_prevalence, prevs_hat=point_estimate))
            results['sre'].append(qp.error.sre(prevs_true=true_prevalence, prevs_hat=point_estimate, prevs_train=train_prevalence))
            results['test-time'].append(ttime)
            pbar.set_description(
                f'{method_name} MAE={np.mean(results["ae"]):.5f} W={np.mean(results["sre"]):.5f}')
        report = {
            'optim_hyper': best_hyperparams,
            'train_time': tr_time,
            'train-prev': train_prevalence,
            'results': {k: np.asarray(v) for k, v in results.items()},
        }
        return report
 if __name__ == '__main__':
    result_dir = Path('results_map')
    reports = defaultdict(list)
    for data_handler in [UCIMulticlassHandler]:  # , UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
        for dataset in data_handler.iter():
            qp.environ['SAMPLE_SIZE'] = dataset.sample_size
            # print(f'dataset={dataset.name}')
            problem_type = 'binary' if dataset.is_binary() else 'multiclass'
            for method_name, hyper_params, quantifier in methods(dataset):
                result_path = experiment_path(result_dir / problem_type, dataset.name, method_name)
                hyper_path = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name, method_name)
                # if os.path.exists(result_path):
                report = qp.util.pickled_resource(
                    result_path, experiment, dataset, quantifier, method_name, hyper_params, hyper_path
                )
                reports['dataset'].append(dataset.name)
                reports['method'].append(method_name)
                reports['MAE'].append(report["results"]["ae"].mean())
                reports['SRE'].append(report["results"]["sre"].mean())
                reports['h'].append(report["optim_hyper"]["bandwidth"])
                print(f'dataset={dataset.name}, '
                      f'method={method_name}: '
                      f'mae={reports["MAE"][-1]:.5f}, '
                      f'W={reports["SRE"][-1]:.5f} '
                      f'h={reports["h"][-1]}')
    pv = pd.DataFrame(reports).pivot_table(values=['MAE', 'SRE', 'h'], index='dataset', columns='method', margins=True)
    print(pv)
--- a/BayesianKDEy/methods.py
+++ b/BayesianKDEy/methods.py
@ -0,0 +1,168 @@
 from abc import ABC, abstractmethod
 import numpy as np
 from sklearn.linear_model import LogisticRegression
 from BayesianKDEy._bayeisan_kdey import BayesianKDEy
 from BayesianKDEy._bayesian_mapls import BayesianMAPLS
 from BayesianKDEy.commons import KDEyCLR, KDEyCLR2
 from quapy.method.aggregative import CC, ACC, EMQ, DMy, KDEyML
 from quapy.method.base import BaseQuantifier
 from quapy.method.confidence import AggregativeBootstrap, BayesianCC
 def get_experimental_methods():
    #yield BootsCC()
    #yield BayACC()
    #yield BayEMQ()
    #yield BayKDEyGau()
    #yield BayKDEyAit()
    yield BayKDEyAit2()
 # commons
 # ------------------------------------------------------------
 Cls = LogisticRegression
 cls_hyper = {
    'classifier__C': np.logspace(-4,4,9),
    'classifier__class_weight': ['balanced', None]
 }
 hdy_hyper = {'nbins': [3, 4, 5, 8, 9, 10, 12, 14, 16, 32], **cls_hyper}
 kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper}
 kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper}
 def hyper2cls(hyperparams):
    return {k.replace('classifier__', ''): v for k, v in hyperparams.items()}
 # method descriptor logic
 # ------------------------------------------------------------
 class MethodDescriptor(ABC):
    def __init__(self,
                 name: str,
                 surrogate_quantifier: BaseQuantifier,
                 hyper_parameters: dict,
                 ):
        self.name = name
        self.surrogate_quantifier_ = surrogate_quantifier
        self.hyper_parameters = hyper_parameters
    @abstractmethod
    def binary_only(self): ...
    def surrogate_quantifier(self):
        return self.surrogate_quantifier_
    def surrogate_quantifier_name(self):
        return self.surrogate_quantifier_.__class__.__name__
    @abstractmethod
    def uncertainty_aware_quantifier(self, hyperparameters): ...
 class MulticlassMethodDescriptor(MethodDescriptor):
    def binary_only(self):
        return False
 # specific methods definitions
 # ------------------------------------------------------------
 # ------------------------------------------------------------
 # Bootstrap approaches:
 # ------------------------------------------------------------
 class BootsCC(MulticlassMethodDescriptor):
    def __init__(self):
        super().__init__(name='BoCC', surrogate_quantifier=CC(Cls()), hyper_parameters=cls_hyper)
    def uncertainty_aware_quantifier(self, hyperparameters):
        quantifier = CC(Cls()).set_params(**hyperparameters)
        return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0)
 # class BootsACC(MulticlassMethodDescriptor):
 #     def __init__(self):
 #         super().__init__(name='BoACC', surrogate_quantifier=ACC(Cls()), hyper_parameters=cls_hyper)
 #
 #     def uncertainty_aware_quantifier(self, hyperparameters):
 #         quantifier = ACC(Cls()).set_params(**hyperparameters)
 #         return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0)
 #
 #
 # class BootsEMQ(MulticlassMethodDescriptor):
 #     def __init__(self):
 #         super().__init__(name='BoEMQ', surrogate_quantifier=EMQ(Cls(), exact_train_prev=False), hyper_parameters=cls_hyper)
 #
 #     def uncertainty_aware_quantifier(self, hyperparameters):
 #         quantifier = EMQ(Cls(), exact_train_prev=False).set_params(**hyperparameters)
 #         return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0)
 # class BootsHDy(MethodDescriptor):
 #     def __init__(self):
 #         super().__init__(name='BoHDy', surrogate_quantifier=DMy(Cls()), hyper_parameters=hdy_hyper)
 #
 #     def uncertainty_aware_quantifier(self, hyperparameters):
 #         quantifier = DMy(Cls()).set_params(**hyperparameters)
 #         return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0)
 #
 #     def binary_only(self):
 #         return True
 # class BootsKDEy(MulticlassMethodDescriptor):
 #     def __init__(self):
 #         super().__init__(name='BoKDEy', surrogate_quantifier=KDEyML(Cls()), hyper_parameters=kdey_hyper)
 #
 #     def uncertainty_aware_quantifier(self, hyperparameters):
 #         quantifier = KDEyML(Cls()).set_params(**hyperparameters)
 #         return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0)
 # Bayesian approaches:
 # ------------------------------------------------------------
 class BayACC(MulticlassMethodDescriptor):
    def __init__(self):
        super().__init__(name='BaACC!', surrogate_quantifier=ACC(Cls()), hyper_parameters=cls_hyper)
    def uncertainty_aware_quantifier(self, hyperparameters):
        classifier = Cls(**hyper2cls(hyperparameters))
        return BayesianCC(classifier, temperature=None, mcmc_seed=0)  # is actually a Bayesian variant of ACC
 class BayEMQ(MulticlassMethodDescriptor):
    def __init__(self):
        super().__init__(name='BaEMQ!', surrogate_quantifier=EMQ(Cls(), exact_train_prev=False), hyper_parameters=cls_hyper)
    def uncertainty_aware_quantifier(self, hyperparameters):
        classifier = Cls(**hyper2cls(hyperparameters))
        return BayesianMAPLS(classifier, prior='uniform', temperature=None, exact_train_prev=False)
 class BayKDEyGau(MulticlassMethodDescriptor):
    def __init__(self):
        kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper}
        super().__init__(name='BaKDE-Gau-T!', surrogate_quantifier=KDEyML(Cls()), hyper_parameters=kdey_hyper)
    def uncertainty_aware_quantifier(self, hyperparameters):
        return BayesianKDEy(Cls(), kernel='gaussian', temperature=None, mcmc_seed=0, **hyperparameters)
 class BayKDEyAit(MulticlassMethodDescriptor):
    def __init__(self):
        kdey_hyper = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper}
        super().__init__(name='BaKDE-Ait-T!', surrogate_quantifier=KDEyCLR(Cls()), hyper_parameters=kdey_hyper)
    def uncertainty_aware_quantifier(self, hyperparameters):
        return BayesianKDEy(Cls(), kernel='aitchison', temperature=None, mcmc_seed=0, **hyperparameters)
 class BayKDEyAit2(MulticlassMethodDescriptor):
    def __init__(self):
        kdey_hyper = {'bandwidth': np.linspace(0.05, 2., 10), **cls_hyper}
        super().__init__(name='BaKDE-Ait-T!2', surrogate_quantifier=KDEyCLR2(Cls()), hyper_parameters=kdey_hyper)
    def uncertainty_aware_quantifier(self, hyperparameters):
        return BayesianKDEy(Cls(), kernel='aitchison', temperature=None, mcmc_seed=0, **hyperparameters)
--- a/BayesianKDEy/plot_simplex.py
+++ b/BayesianKDEy/plot_simplex.py
@ -10,6 +10,8 @@ from sklearn.preprocessing import MinMaxScaler
 from BayesianKDEy.commons import antagonistic_prevalence, in_simplex
 from method.confidence import (ConfidenceIntervals as CI,
                               ConfidenceIntervalsCLR as CICLR,
                               ConfidenceIntervalsILR as CIILR,
                               ConfidenceEllipseSimplex as CE,
                               ConfidenceEllipseCLR as CLR,
                               ConfidenceEllipseILR as ILR)
@ -260,6 +262,8 @@ def plot_regions(ax, region_layers, resolution, confine):
        )
 def plot_points(ax, point_layers):
    for layer in point_layers:
        pts = layer["points"]
@ -433,25 +437,34 @@ def plot_kernels():
 if __name__ == '__main__':
    np.random.seed(1)
-    # n = 1000
+    n = 1000
-    # alpha = [1,1,1]
+    alpha = [15,10,7]
-    # prevs = np.random.dirichlet(alpha, size=n)
+    prevs = np.random.dirichlet(alpha, size=n)
-    # def regions():
+    def regions():
-    #     confs = [0.99, 0.95, 0.90]
+        confs = [0.9, 0.95, 0.99]
        # yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
        # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
        # yield 'CI-CLR', [(f'{int(c * 100)}%', CICLR(prevs, confidence_level=c).coverage) for c in confs]
        # yield 'CI-CLR-b', [(f'{int(c * 100)}%', CICLR(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
        # yield 'CI-ILR', [(f'{int(c * 100)}%', CIILR(prevs, confidence_level=c).coverage) for c in confs]
        yield 'CI-ILR-b', [(f'{int(c * 100)}%', CIILR(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
        # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
-        # yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'CE-CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
-        # yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'CE-ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
-    # resolution = 1000
+    resolution = 1000
-    # alpha_str = ','.join([f'{str(i)}' for i in alpha])
+    alpha_str = ','.join([f'{str(i)}' for i in alpha])
-    # for crname, cr in regions():
+
-    #     plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr, region_resolution=resolution,
+    dot_style = {"color": "gray", "alpha": .5, "s": 15, 'linewidth': .25, 'edgecolors': "black"}
-    #                      color='blue',
+    point_layer = [
-    #                      save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png',
+        {"points": prevs, "label": "points", "style": dot_style},
-    #                      )
+    ]
    for crname, cr in regions():
        region = [{'fn': fn, 'alpha':.6, 'label':label} for label, fn in cr]
        plot_simplex(point_layers=point_layer, region_layers=region, show_legend=False, resolution=resolution, save_path=f'./plots/regions/{crname}.png')
    # def regions():
    #     confs = [0.99, 0.95, 0.90]
@ -544,4 +557,4 @@ if __name__ == '__main__':
    #         save_path=f'./plots/prior_test/concentration_{c}.png'
    #     )
-    plot_kernels()
+    # plot_kernels()
--- a/BayesianKDEy/temperature_calibration.py
+++ b/BayesianKDEy/temperature_calibration.py
@ -13,12 +13,14 @@ def temp_calibration(method:WithConfidenceABC,
                     val_prot:AbstractProtocol,
                     temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.],
                     nominal_coverage=0.95,
-                     amplitude_threshold='auto',
+                     amplitude_threshold=1.,
                     criterion='winkler',
                     n_jobs=1,
                     verbose=True):
-    assert (amplitude_threshold == 'auto' or (isinstance(amplitude_threshold, float)) and amplitude_threshold < 1.), \
+    assert (amplitude_threshold == 'auto' or (isinstance(amplitude_threshold, float)) and amplitude_threshold <= 1.), \
-        f'wrong value for {amplitude_threshold=}, it must either be "auto" or a float < 1.0.'
+        f'wrong value for {amplitude_threshold=}, it must either be "auto" or a float <= 1.0.'
    assert criterion in {'auto', 'winkler'}, f'unknown {criterion=}; valid ones are auto or winkler'
    if amplitude_threshold=='auto':
        n_classes = train.n_classes
@ -27,15 +29,16 @@ def temp_calibration(method:WithConfidenceABC,
    if isinstance(amplitude_threshold, float) and amplitude_threshold > 0.1:
        print(f'warning: the {amplitude_threshold=} is too large; this may lead to uninformative regions')
-    def evaluate_temperature_job(job_id, temp):
+    def _evaluate_temperature_job(job_id, temp):
-        if verbose:
+        # if verbose:
-            print(f'\tstarting exploration with temperature={temp}...')
+        #     print(f'\tstarting exploration with temperature={temp}...')
        local_method = copy.deepcopy(method)
        local_method.temperature = temp
        coverage = 0
        amplitudes = []
        winklers = []
        # errs = []
        pbar = tqdm(enumerate(val_prot()), position=job_id, total=val_prot.total(), disable=not verbose)
@ -47,35 +50,62 @@ def temp_calibration(method:WithConfidenceABC,
                coverage += 1
            amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000))
            winkler = None
            if criterion=='winkler':
                winkler = conf_region.mean_winkler_score(true_prev=prev, alpha=0.005)
            winklers.append(winkler)
            # errs.append(qp.error.mae(prev, point_estim))
-            pbar.set_description(f'job={job_id} T={temp}: coverage={coverage/(i+1)*100:.2f}% amplitude={np.mean(amplitudes)*100:.2f}%')
+            pbar.set_description(
                f'job={job_id} T={temp}: '
                f'coverage={coverage/(i+1)*100:.2f}% '
                f'amplitude={np.mean(amplitudes)*100:.4f}% '
                + f'winkler={np.mean(winklers):.4f}%' if criterion=='winkler' else ''
            )
        mean_coverage = coverage / val_prot.total()
        mean_amplitude = np.mean(amplitudes)
        winkler_mean = np.mean(winklers) if criterion=='winkler' else None
-        if verbose:
+        # if verbose:
-            print(f'Temperature={temp} got coverage={mean_coverage*100:.2f}% amplitude={mean_amplitude*100:.2f}%')
+        #     print(
        #         f'Temperature={temp} got '
        #         f'coverage={mean_coverage*100:.2f}% '
        #         f'amplitude={mean_amplitude*100:.2f}% '
        #         + f'winkler={winkler_mean:.4f}' if criterion == 'winkler' else ''
        #     )
-        return temp, mean_coverage, mean_amplitude
+        return temp, mean_coverage, mean_amplitude, winkler_mean
    temp_grid = sorted(temp_grid)
    method.fit(*train.Xy)
    raw_results = Parallel(n_jobs=n_jobs, backend="loky")(
-        delayed(evaluate_temperature_job)(job_id, temp)
+        delayed(_evaluate_temperature_job)(job_id, temp)
        for job_id, temp in tqdm(enumerate(temp_grid), disable=not verbose)
    )
    results = [
-        (temp, cov, amp)
+        (temp, cov, amp, wink)
-        for temp, cov, amp in raw_results
+        for temp, cov, amp, wink in raw_results
        if amp < amplitude_threshold
    ]
    chosen_temperature = 1.
    if len(results) > 0:
-        chosen_temperature = min(results, key=lambda x: abs(x[1]-nominal_coverage))[0]
+        if criterion=='winkler':
            # choose min winkler
            chosen_temperature, ccov, camp, cwink = min(results, key=lambda x: x[3])
        else:
            # choose best coverage (regardless of amplitude), i.e., closest to nominal
            chosen_temperature, ccov, camp, cwink = min(results, key=lambda x: abs(x[1]-nominal_coverage))
-    print(f'chosen_temperature={chosen_temperature:.2f}')
+    if verbose:
        print(
            f'\nChosen_temperature={chosen_temperature:.2f} got '
            f'coverage={ccov*100:.2f}% '
            f'amplitude={camp*100:.4f}% '
            + f'winkler={cwink:.4f}' if criterion=='winkler' else ''
        )
    return chosen_temperature
--- a/CHANGE_LOG.txt
+++ b/CHANGE_LOG.txt
@ -1,15 +1,21 @@
 Change Log 0.2.1
 -----------------
 - Added mechanisms for Temperature Calibration for coverage
 - Added MAPLS and BayesianEMQ
 - Added DirichletProtocol, which allows to generate samples according to a parameterized Dirichlet prior.
 - Added squared ratio error.
 - Improved efficiency of confidence regions coverage functions
 - Added Precise Quantifier to WithConfidence methods (a Bayesian adaptation of HDy)
 - Added Temperature parameter to BayesianCC
 - Improved documentation of confidence regions.
 - Added ReadMe method by Daniel Hopkins and Gary King
 - Internal index in LabelledCollection is now "lazy", and is only constructed if required.
- Added dist_aitchison and mean_dist_aitchison as a new error evaluation metric.
+- Added new error metrics:
- Improved numerical stability of KDEyML through logsumexp; useful for cases with large number of classes, where densities for small bandwidths may become huge
+    - dist_aitchison and mean_dist_aitchison as a new error evaluation metric.
    - squared ratio error.
 - Improved numerical stability of KDEyML through logsumexp; useful for cases with large number of classes, where
    densities for small bandwidths may become huge.
 Change Log 0.2.0
 -----------------
--- a/TODO.txt
+++ b/TODO.txt
@ -1,6 +1,11 @@
 Adapt examples; remaining: example 4-onwards
 not working: 15 (qunfold)
 Unify ConfidenceIntervalsTransformation with ConfidenceEllipseTransformation
 Unify functionality of withconfidence methods; the predict_conf has a clear similar structure across all 
    variants, and should be unified in the super class
 Solve the warnings issue; right now there is a warning ignore in method/__init__.py:
 Add 'platt' to calib options in EMQ?
@ -46,7 +51,15 @@ Para quitar el labelledcollection de los métodos:
                - proporción en [0,1]
        - fit_classifier=False:
-
+- [TODO] add RLSbench?:
    - https://arxiv.org/pdf/2302.03020
    - https://github.com/acmi-lab/RLSbench
 - [TODO] check Table shift
    - https://proceedings.neurips.cc/paper_files/paper/2023/hash/a76a757ed479a1e6a5f8134bea492f83-Abstract-Datasets_and_Benchmarks.html
 - [TODO] have a look at TorchDrift
    - https://torchdrift.org/
 - [TODO] have a look at
    - https://github.com/SeldonIO/alibi-detect/
 - [TODO] check if the KDEyML variant with sumlogexp is slower than the original one, or check whether we can explore
    an unconstrained space in which the parameter is already the log(prev); maybe also move to cvxq
 - [TODO] why not simplifying the epsilon of RAE? at the end, it is meant to smooth the denominator for avoiding div 0
--- a/quapy/method/_bayesian.py
+++ b/quapy/method/_bayesian.py
@ -33,7 +33,7 @@ P_TEST_C: str = "P_test(C)"
 P_C_COND_Y: str = "P(C|Y)"
-def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray, alpha: np.ndarray) -> None:
+def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray, temperature:float, alpha: np.ndarray) -> None:
    """
    Defines a probabilistic model in `NumPyro <https://num.pyro.ai/>`_.
@ -50,11 +50,40 @@ def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray, a
    pi_ = numpyro.sample(P_TEST_Y, dist.Dirichlet(jnp.asarray(alpha, dtype=jnp.float32)))
    p_c_cond_y = numpyro.sample(P_C_COND_Y, dist.Dirichlet(jnp.ones(K).repeat(L).reshape(L, K)))
    if temperature==1:
        # original implementation
        with numpyro.plate('plate', L):
            numpyro.sample('F_yc', dist.Multinomial(n_y_labeled, p_c_cond_y), obs=n_y_and_c_labeled)
        p_c = numpyro.deterministic(P_TEST_C, jnp.einsum("yc,y->c", p_c_cond_y, pi_))
        numpyro.sample('N_c', dist.Multinomial(jnp.sum(n_c_unlabeled), p_c), obs=n_c_unlabeled) 
    else:
        # with temperature modification
        with numpyro.plate('plate_y', L):
            logp_F = dist.Multinomial(
                n_y_labeled,
                p_c_cond_y
            ).log_prob(n_y_and_c_labeled)
        numpyro.factor(
            'F_yc_loglik',
            jnp.sum(logp_F) / temperature
        )
        p_c = numpyro.deterministic(
            P_TEST_C,
            jnp.einsum("yc,y->c", p_c_cond_y, pi_)
        )
        # Likelihood datos no etiquetados
        logp_N = dist.Multinomial(
            jnp.sum(n_c_unlabeled),
            p_c
        ).log_prob(n_c_unlabeled)
        numpyro.factor(
            'N_c_loglik',
            logp_N / temperature
        )
 def sample_posterior_bayesianCC(
@ -63,6 +92,7 @@ def sample_posterior_bayesianCC(
        num_warmup: int,
        num_samples: int,
        alpha: np.ndarray,
        temperature = 1.,
        seed: int = 0,
 ) -> dict:
    """
@ -88,7 +118,7 @@ def sample_posterior_bayesianCC(
        progress_bar=False
    )
    rng_key = jax.random.PRNGKey(seed)
-    mcmc.run(rng_key, n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=n_y_and_c_labeled, alpha=alpha)
+    mcmc.run(rng_key, n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=n_y_and_c_labeled, temperature=temperature, alpha=alpha)
    return mcmc.get_samples()
--- a/quapy/method/_kdey.py
+++ b/quapy/method/_kdey.py
@ -178,16 +178,23 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
        with qp.util.temp_seed(self.random_state):
            epsilon = 1e-12
            n_classes = len(self.mix_densities)
-            #test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
+            if n_classes>=30:
                # new version: improves numerical stability with logsumexp, at the cost of optimization efficiency.
                # needed if the number of classes is large (approx >= 30) because densities tend to grow exponentially
                test_log_densities = [self.pdf(kde_i, posteriors, self.kernel, log_densities=True) for kde_i in self.mix_densities]
            #def neg_loglikelihood(prev):
            #    prev = np.clip(prev, epsilon, 1.0)
            #    test_mixture_likelihood = prev @ test_densities
            #    test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
            #    return  -np.sum(test_loglikelihood)
                def neg_loglikelihood(prev):
-                test_loglikelihood = logsumexp(np.log(np.clip(prev, epsilon, 1.0))[:,None] + test_log_densities, axis=0)
+                    prev = np.clip(prev, epsilon, 1.0)
                    test_loglikelihood = logsumexp(np.log(prev)[:,None] + test_log_densities, axis=0)
                    return -np.sum(test_loglikelihood)
            else:
                # original implementation
                test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
                def neg_loglikelihood(prev):
                   # prev = np.clip(prev, epsilon, 1.0)
                   test_mixture_likelihood = prev @ test_densities
                   test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
                   return -np.sum(test_loglikelihood)
            return F.optim_minimize(neg_loglikelihood, n_classes)
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -163,6 +163,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
        :param X: array-like of shape `(n_samples, n_features)`, the training instances
        :param y: array-like of shape `(n_samples,)`, the labels
        :return: a tuple (predictions, labels)
        """
        self._check_classifier(adapt_if_necessary=self.fit_classifier)
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@ -341,6 +341,7 @@ class ConfidenceIntervals(ConfidenceRegionABC):
    """
    def __init__(self, samples, confidence_level=0.95, bonferroni_correction=False):
        assert 0 < confidence_level < 1, f'{confidence_level=} must be in range(0,1)'
        assert samples.ndim == 2, 'unexpected shape; must be (n_bootstrap_samples, n_classes)'
        samples = np.asarray(samples)
@ -383,6 +384,10 @@ class ConfidenceIntervals(ConfidenceRegionABC):
        return proportion
    def coverage_soft(self, true_value):
        within_intervals = np.logical_and(self.I_low <= true_value, true_value <= self.I_high)
        return np.mean(within_intervals.astype(float))
    def __repr__(self):
        return '['+', '.join(f'({low:.4f}, {high:.4f})' for (low,high) in zip(self.I_low, self.I_high))+']'
@ -390,25 +395,30 @@ class ConfidenceIntervals(ConfidenceRegionABC):
    def n_dim(self):
        return len(self.I_low)
-    def winkler_scores(self, true_prev):
+    def winkler_scores(self, true_prev, alpha=None, add_ae=False):
        true_prev = np.asarray(true_prev)
        assert true_prev.ndim == 1, 'unexpected dimensionality for true_prev'
        assert len(true_prev)==self.n_dim, \
            f'unexpected number of dimensions; found {true_prev.ndim}, expected {self.n_dim}'
-        def winkler_score(low, high, true_val, alpha):
+        def winkler_score(low, high, true_val, alpha, center):
            amp = high-low
-            scale_cost = 1./alpha
+            scale_cost = 2./alpha
            cost = np.max([0, low-true_val], axis=0) + np.max([0, true_val-high], axis=0)
-            return amp + scale_cost*cost
+            err = 0
            if add_ae:
                err = abs(true_val - center)
            return amp + scale_cost*cost + err
        alpha = alpha or self.alpha
        return np.asarray(
-            [winkler_score(low_i, high_i, true_v, self.alpha)
+            [winkler_score(low_i, high_i, true_v, alpha, center)
-                for (low_i, high_i, true_v) in zip(self.I_low, self.I_high, true_prev)]
+                for (low_i, high_i, true_v, center) in zip(self.I_low, self.I_high, true_prev, self.point_estimate())]
        )
-    def mean_winkler_score(self, true_prev):
+    def mean_winkler_score(self, true_prev, alpha=None, add_ae=False):
-        return np.mean(self.winkler_scores(true_prev))
+        return np.mean(self.winkler_scores(true_prev, alpha=alpha, add_ae=add_ae))
 class ConfidenceEllipseSimplex(ConfidenceRegionABC):
@ -486,8 +496,8 @@ class ConfidenceEllipseTransformed(ConfidenceRegionABC):
        samples = np.asarray(samples)
        self.transformation = transformation
        Z = self.transformation(samples)
-        # self.mean_ = np.mean(samples, axis=0)
+        self.mean_ = np.mean(samples, axis=0)
-        self.mean_ = self.transformation.inverse(np.mean(Z, axis=0))
+        # self.mean_ = self.transformation.inverse(np.mean(Z, axis=0))
        self.conf_region_z = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
        self._samples = samples
        self.alpha = 1.-confidence_level
@ -549,7 +559,99 @@ class ConfidenceEllipseILR(ConfidenceEllipseTransformed):
 class ConfidenceIntervalsTransformed(ConfidenceRegionABC):
    """
    Instantiates a Confidence Interval region in a transformed space.
    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    :param bonferroni_correction: bool (default False), if True, a Bonferroni correction
        is applied to the significance level (`alpha`) before computing confidence intervals.
        The correction consists of replacing `alpha` with `alpha/n_classes`. When
        `n_classes=2` the correction is not applied because there is only one verification test
        since the other class is constrained. This is not necessarily true for n_classes>2.
    """
    def __init__(self, samples, transformation: CompositionalTransformation, confidence_level=0.95, bonferroni_correction=False):
        samples = np.asarray(samples)
        self.transformation = transformation
        Z = self.transformation(samples)
        self.mean_ = np.mean(samples, axis=0)
        # self.mean_ = self.transformation.inverse(np.mean(Z, axis=0))
        self.conf_region_z = ConfidenceIntervals(Z, confidence_level=confidence_level, bonferroni_correction=bonferroni_correction)
        self._samples = samples
        self.alpha = 1.-confidence_level
    @property
    def samples(self):
        return self._samples
    def point_estimate(self):
        """
        Returns the point estimate, the center of the ellipse.
        :return: np.ndarray of shape (n_classes,)
        """
        # The inverse of the CLR does not coincide with the true mean, because the geometric mean
        # requires smoothing the prevalence vectors and this affects the softmax (inverse);
        # return self.clr.inverse(self.mean_) # <- does not coincide
        return self.mean_
    def coverage(self, true_value):
        """
        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
        fraction of these that are contained in the region, if more than one value is passed. If only one value is
        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
        :return: float in [0,1]
        """
        transformed_values = self.transformation(true_value)
        return self.conf_region_z.coverage(transformed_values)
    def coverage_soft(self, true_value):
        transformed_values = self.transformation(true_value)
        return self.conf_region_z.coverage_soft(transformed_values)
    def winkler_scores(self, true_prev, alpha=None, add_ae=False):
        transformed_values = self.transformation(true_prev)
        return self.conf_region_z.winkler_scores(transformed_values, alpha=alpha, add_ae=add_ae)
    def mean_winkler_score(self, true_prev, alpha=None, add_ae=False):
        transformed_values = self.transformation(true_prev)
        return self.conf_region_z.mean_winkler_score(transformed_values, alpha=alpha, add_ae=add_ae)
 class ConfidenceIntervalsCLR(ConfidenceIntervalsTransformed):
    """
    Instantiates a Confidence Intervals in the Centered-Log Ratio (CLR) space.
    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    :param bonferroni_correction: bool (default False), if True, a Bonferroni correction
        is applied to the significance level (`alpha`) before computing confidence intervals.
        The correction consists of replacing `alpha` with `alpha/n_classes`. When
        `n_classes=2` the correction is not applied because there is only one verification test
        since the other class is constrained. This is not necessarily true for n_classes>2.
    """
    def __init__(self, samples, confidence_level=0.95, bonferroni_correction=False):
        super().__init__(samples, CLRtransformation(), confidence_level=confidence_level, bonferroni_correction=bonferroni_correction)
 class ConfidenceIntervalsILR(ConfidenceIntervalsTransformed):
    """
    Instantiates a Confidence Intervals in the Isometric-Log Ratio (CLR) space.
    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    :param bonferroni_correction: bool (default False), if True, a Bonferroni correction
        is applied to the significance level (`alpha`) before computing confidence intervals.
        The correction consists of replacing `alpha` with `alpha/n_classes`. When
        `n_classes=2` the correction is not applied because there is only one verification test
        since the other class is constrained. This is not necessarily true for n_classes>2.
    """
    def __init__(self, samples, confidence_level=0.95, bonferroni_correction=False):
        super().__init__(samples, ILRtransformation(), confidence_level=confidence_level, bonferroni_correction=bonferroni_correction)
@ -611,13 +713,25 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
        self.verbose = verbose
    def aggregation_fit(self, classif_predictions, labels):
-        data = LabelledCollection(classif_predictions, labels, classes=self.classes_)
+
        self.quantifiers = []
        if self.n_train_samples==1:
            self.quantifier.aggregation_fit(classif_predictions, labels)
            self.quantifiers.append(self.quantifier)
        else:
            if classif_predictions is None or labels is None:
                # The entire dataset was consumed for classifier training, implying there is no need for training
                # an aggregation function. If the bootstrap method was configured to train different aggregators
                # (i.e., self.n_train_samples>1), then an error is raise. Otherwise, the method ends.
                if self.n_train_samples > 1:
                    raise ValueError(
                        f'The underlying quantifier ({self.quantifier.__class__.__name__}) has consumed, all training '
                        f'data, meaning the aggregation function needs none, but {self.n_train_samples=} is > 1, which '
                        f'is inconsistent.'
                    )
            else:
                # model-based bootstrap (only on the aggregative part)
                data = LabelledCollection(classif_predictions, labels, classes=self.classes_)
                n_examples = len(data)
                full_index = np.arange(n_examples)
                with qp.util.temp_seed(self.random_state):
@ -653,8 +767,8 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
        return prev_estim, conf
    def aggregate_conf(self, classif_predictions: np.ndarray, confidence_level=None):
-        if confidence_level is None:
+        confidence_level = confidence_level or self.confidence_level
-            confidence_level = self.confidence_level
+
        n_samples = classif_predictions.shape[0]
        prevs = []
@ -665,11 +779,8 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
                    for i in range(self.n_test_samples)
                )
                prevs.extend(results)
                # for i in tqdm(range(self.n_test_samples), desc='resampling', total=self.n_test_samples, disable=not self.verbose):
                #     sample_i = resample(classif_predictions, n_samples=n_samples)
                #     prev_i = quantifier.aggregate(sample_i)
                #     prevs.append(prev_i)
        prevs = np.array(prevs)
        conf = WithConfidenceABC.construct_region(prevs, confidence_level, method=self.region)
        prev_estim = conf.point_estimate()
@ -741,6 +852,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
                 mcmc_seed: int = 0,
                 confidence_level: float = 0.95,
                 region: str = 'intervals',
                 temperature = 1., 
                 prior = 'uniform'):
        if num_warmup <= 0:
@ -761,6 +873,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
        self.mcmc_seed = mcmc_seed
        self.confidence_level = confidence_level
        self.region = region
        self.temperature = temperature
        self.prior = prior
        # Array of shape (n_classes, n_predicted_classes,) where entry (y, c) is the number of instances
@ -804,6 +917,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
            num_warmup=self.num_warmup,
            num_samples=self.num_samples,
            alpha=alpha,
            temperature=self.temperature,
            seed=self.mcmc_seed,
        )
        return self._samples
--- a/1
+++ b/1
@ -0,0 +1 @@
 Subproject commit 9d433e3e35b4d111a3914a1e7d3257a8fcf24a9b
		`@ -0,0 +1 @@`
							`Subproject commit 9d433e3e35b4d111a3914a1e7d3257a8fcf24a9b`