From bb5763da76b60b980eacdfa47d7fae0ce53587d3 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 12 Feb 2026 13:14:59 +0100
Subject: [PATCH] broken submodule fix

---
 .gitmodules                             |   3 +
 BayesianKDEy/TODO.txt                   |   1 +
 BayesianKDEy/commons.py                 |   7 +
 BayesianKDEy/datasets.py                |   4 +-
 BayesianKDEy/full_experiments.py        |  95 +++----
 BayesianKDEy/generate_results.py        | 359 ++++++++++++++++++------
 BayesianKDEy/map_experiments.py         | 169 +++++++++++
 BayesianKDEy/methods.py                 | 168 +++++++++++
 BayesianKDEy/plot_simplex.py            |  45 +--
 BayesianKDEy/temperature_calibration.py |  60 +++-
 CHANGE_LOG.txt                          |  12 +-
 TODO.txt                                |  15 +-
 quapy/method/_bayesian.py               |  42 ++-
 quapy/method/_kdey.py                   |  27 +-
 quapy/method/aggregative.py             |   1 +
 quapy/method/confidence.py              | 170 +++++++++--
 result_table                            |   1 +
 17 files changed, 969 insertions(+), 210 deletions(-)
 create mode 100644 .gitmodules
 create mode 100644 BayesianKDEy/map_experiments.py
 create mode 100644 BayesianKDEy/methods.py
 create mode 160000 result_table

diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..89cf11c
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "result_table"]
+	path = result_table
+	url = gitea@gitea-s2i2s.isti.cnr.it:moreo/result_table.git
diff --git a/BayesianKDEy/TODO.txt b/BayesianKDEy/TODO.txt
index d4d8d58..59e835e 100644
--- a/BayesianKDEy/TODO.txt
+++ b/BayesianKDEy/TODO.txt
@@ -1,4 +1,5 @@
 - Things to try:
+    - Why not optmize the calibration of the classifier, instead of the classifier as a component of the quantifier?
     - init chain helps? [seems irrelevant in MAPLS...]
     - Aitchison kernel is better?
     - other classifiers?
diff --git a/BayesianKDEy/commons.py b/BayesianKDEy/commons.py
index 3a2b73e..c33e3fa 100644
--- a/BayesianKDEy/commons.py
+++ b/BayesianKDEy/commons.py
@@ -101,6 +101,13 @@ class KDEyCLR(KDEyML):
             random_state=random_state, kernel='aitchison'
         )
 
+class KDEyCLR2(KDEyML):
+    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
+        super().__init__(
+            classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
+            random_state=random_state, kernel='aitchison'
+        )
+
 
 class KDEyILR(KDEyML):
     def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
diff --git a/BayesianKDEy/datasets.py b/BayesianKDEy/datasets.py
index f4c80f3..0a19dce 100644
--- a/BayesianKDEy/datasets.py
+++ b/BayesianKDEy/datasets.py
@@ -180,7 +180,7 @@ class VisualDataHandler(DatasetHandler):
 
     @classmethod
     def get_datasets(cls):
-        datasets = ['cifar10', 'mnist', 'cifar100coarse', 'fashionmnist', 'svhn'] #+ ['cifar100']
+        datasets = ['cifar100coarse', 'cifar10', 'mnist', 'fashionmnist', 'svhn'] #+ ['cifar100']
         # datasets_feat = [f'{d}-f' for d in datasets]
         datasets_feat = [f'{d}-l' for d in datasets]
         return datasets_feat # + datasets
@@ -295,7 +295,7 @@ class UCIDatasetHandler(DatasetHandler, ABC):
 
 class UCIMulticlassHandler(UCIDatasetHandler):
 
-    DATASETS = [d for d in qp.datasets.UCI_MULTICLASS_DATASETS if d not in frozenset(['hcv', 'poker_hand'])]
+    DATASETS = sorted([d for d in qp.datasets.UCI_MULTICLASS_DATASETS if d not in frozenset(['hcv', 'poker_hand'])])
 
     def __init__(self, name, n_val_samples=100, n_test_samples=100, sample_size=1000, random_state=0):
         super().__init__(name, n_val_samples, n_test_samples, sample_size, random_state)
diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py
index a176b59..c643a52 100644
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@@ -4,6 +4,7 @@ from sklearn.linear_model import LogisticRegression
 from copy import deepcopy as cp
 import quapy as qp
 from BayesianKDEy.commons import KDEyReduce
+from BayesianKDEy.methods import get_experimental_methods, MethodDescriptor
 from _bayeisan_kdey import BayesianKDEy
 from _bayesian_mapls import BayesianMAPLS
 from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors, KDEyScaledB, KDEyFresh
@@ -24,7 +25,7 @@ from time import time
 
 
 
-def methods(data_handler: DatasetHandler):
+def methods___depr():
     """
     Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
     - name: is a str representing the name of the method (e.g., 'BayesianKDEy')
@@ -33,26 +34,23 @@ def methods(data_handler: DatasetHandler):
     - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
         quantifier with optimized hyperparameters
     """
-    if False: #  isinstance(data_handler, VisualDataHandler):
-        Cls = MockClassifierFromPosteriors
-        cls_hyper = {}
-        val_split = data_handler.get_validation().Xy  # use this specific collection
-        pass
-    else:
-        Cls = LogisticRegression
-        cls_hyper = {'classifier__C': np.logspace(-4,4,9), 'classifier__class_weight': ['balanced', None]}
-        val_split = 5  # k-fold cross-validation
+
+    Cls = LogisticRegression
+    cls_hyper = {'classifier__C': np.logspace(-4,4,9), 'classifier__class_weight': ['balanced', None]}
+    val_split = 5  # k-fold cross-validation
+    cc_hyper = cls_hyper
     acc_hyper = cls_hyper
     # emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs'], **cls_hyper}
     hdy_hyper = {'nbins': [3,4,5,8,16,32], **cls_hyper}
     kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper}
     kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper}
-
+    band ={'bandwidth':np.logspace(-3,-1,10)}
     multiclass_method = 'multiclass'
     only_binary = 'only_binary'
     only_multiclass = 'only_multiclass'
 
     # surrogate quantifiers
+    cc = CC(Cls())
     acc = ACC(Cls(), val_split=val_split)
     hdy = DMy(Cls(), val_split=val_split)
     kde_gau = KDEyML(Cls(), val_split=val_split)
@@ -65,22 +63,26 @@ def methods(data_handler: DatasetHandler):
 
     # Bootstrap approaches:
     # --------------------------------------------------------------------------------------------------------
+    # yield 'BootstrapCC', cc, cc_hyper, lambda hyper: AggregativeBootstrap(CC(Cls()), n_test_samples=1000, random_state=0), multiclass_method
     #yield 'BootstrapACC', acc, acc_hyper, lambda hyper: _AggregativeBootstrap(ACC(Cls()), n_test_samples=1000, random_state=0), multiclass_method
     #yield 'BootstrapEMQ', emq, on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: _AggregativeBootstrap(EMQ(Cls(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method
     #yield 'BootstrapHDy', hdy, hdy_hyper, lambda hyper: _AggregativeBootstrap(DMy(Cls(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
     #yield 'BootstrapKDEy', kde_gau, kdey_hyper, lambda hyper: _AggregativeBootstrap(KDEyML(Cls(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
 
-    # Bayesian approaches:
+    # Bayesian approaches: (*=temp calibration auto threshold and coverage sim to nominal; +=temp calibration w/o amplitude coverage, for winkler criterion, !=same but alpha=0.005 for winkler)
     # --------------------------------------------------------------------------------------------------------
     # yield 'BayesianACC', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, mcmc_seed=0), multiclass_method
+    # yield 'BayesianACC*', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, temperature=None, mcmc_seed=0), multiclass_method
+    # yield 'BayesianACC+', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, temperature=None, mcmc_seed=0), multiclass_method
+    # yield 'BayesianACC!', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, temperature=None, mcmc_seed=0), multiclass_method
     #yield 'BayesianHDy', hdy, hdy_hyper, lambda hyper: PQ(Cls(), val_split=val_split, stan_seed=0, **hyper), only_binary
     # yield f'BaKDE-Ait-numpyro', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(), kernel='aitchison', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
     #yield f'BaKDE-Gau-numpyro', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
     #yield f'BaKDE-Gau-scale', kde_gau_scale, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
-    yield f'BaKDE-Gau-pca5', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
-    yield f'BaKDE-Gau-pca5*', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
-    yield f'BaKDE-Gau-pca10', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
-    yield f'BaKDE-Gau-pca10*', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
+    #yield f'BaKDE-Gau-pca5', kde_gau_pca, band, lambda hyper: BayesianKDEy(Cls(), reduce=5, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
+    #yield f'BaKDE-Gau-pca5*', kde_gau_pca, band, lambda hyper: BayesianKDEy(Cls(), reduce=5, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
+    #yield f'BaKDE-Gau-pca10', kde_gau_pca10, band, lambda hyper: BayesianKDEy(Cls(), reduce=10, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
+    #yield f'BaKDE-Gau-pca10*', kde_gau_pca10, band, lambda hyper: BayesianKDEy(Cls(), reduce=10, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
     # yield f'BaKDE-Gau-H0', KDEyFresh(Cls(), bandwidth=0.4), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=0.4, kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
     # yield f'BaKDE-Gau-H1', KDEyFresh(Cls(), bandwidth=1.), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1., kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
     # yield f'BaKDE-Gau-H2', KDEyFresh(Cls(), bandwidth=1.5), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1.5,
@@ -89,19 +91,21 @@ def methods(data_handler: DatasetHandler):
     #                                                                                          engine='numpyro',
     #                                                                                          **hyper), multiclass_method
     # yield f'BaKDE-Ait-T*', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
+    # yield f'BaKDE-Ait-T!', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
     #yield f'BaKDE-Gau-T*', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
+    #yield f'BaKDE-Gau-T!', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
     # yield 'BayEMQ', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=1, exact_train_prev=False, val_split=val_split), multiclass_method
     # yield 'BayEMQ*', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=None, exact_train_prev=False, val_split=val_split), multiclass_method
+    # yield 'BayEMQ!', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=None, exact_train_prev=False, val_split=val_split), multiclass_method
+    # yield 'BaEMQ', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(**{k.replace('classifier__', ''): v for k, v in hyper.items()}), prior='uniform', temperature=1, exact_train_prev=False, val_split=val_split), multiclass_method
+
+    # yield 'BaACC!', acc, acc_hyper, lambda hyper: BayesianCC(Cls(**{k.replace('classifier__', ''): v for k, v in hyper.items()}), temperature=None, mcmc_seed=0), multiclass_method
+    # yield 'BaEMQ!', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(**{k.replace('classifier__', ''): v for k, v in hyper.items()}), prior='uniform', temperature=None, exact_train_prev=False), multiclass_method
 
 
 def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict):
     with qp.util.temp_seed(0):
-        if isinstance(point_quantifier, KDEyScaledB) and 'bandwidth' in grid:
-            def scale_bandwidth(bandwidth, n_classes, beta=0.5):            
-                return bandwidth * np.power(n_classes, beta)
-            n = dataset.get_training().n_classes
-            grid['bandwidth'] = [scale_bandwidth(b, n) for b in grid['bandwidth']]
-            print('bandwidth scaled')
+        point_quantifier = cp(point_quantifier)
         print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
         # model selection
         if len(grid)>0:
@@ -127,30 +131,26 @@ def temperature_calibration(dataset: DatasetHandler, uncertainty_quantifier):
         if uncertainty_quantifier.temperature is None:
             print('calibrating temperature')
             train, val_prot = dataset.get_train_valprot_for_modsel()
-            if dataset.name.startswith('LeQua'):            
-                temp_grid=[100., 500, 1000, 5_000, 10_000, 50_000]
-            else:
-                temp_grid=[.5, 1., 1.5, 2., 5., 10., 100., 1000.]
-            temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1, amplitude_threshold=.999)
+            temp_grid=[1., .5, 1.5, 2., 5., 10., 100., 1000.]
+            temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1, amplitude_threshold=1., criterion='winkler')
             uncertainty_quantifier.temperature = temperature
         else:
             temperature = uncertainty_quantifier.temperature
     return temperature
     
 
-
-def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, method_name:str, grid: dict, uncertainty_quant_constructor, hyper_choice_path: Path):
+def experiment(dataset: DatasetHandler, method: MethodDescriptor, hyper_choice_path: Path):
 
     with qp.util.temp_seed(0):
 
         # model selection
         best_hyperparams = qp.util.pickled_resource(
-            hyper_choice_path, model_selection, dataset, cp(point_quantifier), grid
+            hyper_choice_path, model_selection, dataset, method.surrogate_quantifier(), method.hyper_parameters
         )
         print(f'{best_hyperparams=}')
 
         t_init = time()
-        uncertainty_quantifier = uncertainty_quant_constructor(best_hyperparams)
+        uncertainty_quantifier = method.uncertainty_aware_quantifier(best_hyperparams)
         temperature = temperature_calibration(dataset, uncertainty_quantifier)
         training, test_generator = dataset.get_train_testprot_for_eval()
         uncertainty_quantifier.fit(*training.Xy)
@@ -176,7 +176,13 @@ def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier,
             results['test-time'].append(ttime)
             results['samples'].append(region.samples)
 
-            pbar.set_description(f'{method_name} MAE={np.mean(results["ae"]):.5f} W={np.mean(results["sre"]):.5f} Cov={np.mean(results["coverage"]):.5f} AMP={np.mean(results["amplitude"]):.5f}')
+            pbar.set_description(
+                f'{method.name} '
+                f'MAE={np.mean(results["ae"]):.5f} '
+                f'W={np.mean(results["sre"]):.5f} '
+                f'Cov={np.mean(results["coverage"]):.5f} '
+                f'AMP={np.mean(results["amplitude"]):.5f}'
+            )
 
         report = {
             'optim_hyper': best_hyperparams,
@@ -189,40 +195,31 @@ def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier,
         return report
 
 
-def check_skip_experiment(method_scope, dataset: DatasetHandler):
-    if method_scope == 'only_binary' and not dataset.is_binary():
-        return True
-    if method_scope == 'only_multiclass' and dataset.is_binary():
-        return True
-    return False
-
-
 if __name__ == '__main__':
 
     result_dir = RESULT_DIR
 
-    for data_handler in [CIFAR100Handler]:#, UCIMulticlassHandler,LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
+    for data_handler in [LeQuaHandler]:#, UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
         for dataset in data_handler.iter():
             qp.environ['SAMPLE_SIZE'] = dataset.sample_size
             print(f'dataset={dataset.name}')
-            #if dataset.name != 'abalone':
-            #    continue
 
             problem_type = 'binary' if dataset.is_binary() else 'multiclass'
 
-            for method_name, surrogate_quant, hyper_params, withconf_constructor, method_scope in methods(dataset):
-                if check_skip_experiment(method_scope, dataset):
+            for method in get_experimental_methods():
+                # skip combination?
+                if method.binary_only() and not dataset.is_binary():
                     continue
 
-                result_path = experiment_path(result_dir / problem_type, dataset.name, method_name)
-                hyper_path  = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name, surrogate_quant.__class__.__name__)
+                result_path = experiment_path(result_dir / problem_type, dataset.name, method.name)
+                hyper_path  = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name, method.surrogate_quantifier_name())
 
                 report = qp.util.pickled_resource(
-                    result_path, experiment, dataset, surrogate_quant, method_name, hyper_params, withconf_constructor, hyper_path
+                    result_path, experiment, dataset, method, hyper_path
                 )
 
                 print(f'dataset={dataset.name}, '
-                      f'method={method_name}: '
+                      f'method={method.name}: '
                       f'mae={report["results"]["ae"].mean():.5f}, '
                       f'W={report["results"]["sre"].mean():.5f}, '
                       f'coverage={report["results"]["coverage"].mean():.5f}, '
diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py
index cb1932f..5db5c37 100644
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@@ -1,6 +1,7 @@
 import pickle
 from collections import defaultdict
-
+import matplotlib.pyplot as plt
+import seaborn as sns
 from joblib import Parallel, delayed
 from tqdm import tqdm
 import pandas as pd
@@ -9,10 +10,16 @@ from pathlib import Path
 import quapy as qp
 from BayesianKDEy.commons import RESULT_DIR
 from BayesianKDEy.datasets import LeQuaHandler, UCIMulticlassHandler, VisualDataHandler, CIFAR100Handler
+from comparison_group import SelectGreaterThan, SelectByName, SelectSmallerThan
+from format import FormatModifierSelectColor
 from quapy.error import dist_aitchison
-from quapy.method.confidence import ConfidenceIntervals
+from quapy.method.confidence import ConfidenceIntervals, ConfidenceIntervalsILR, ConfidenceIntervalsCLR
 from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
 import quapy.functional as F
+from result_path.src.table import LatexTable
+import numpy as np
+import pandas as pd
+from itertools import chain
 
 pd.set_option('display.max_columns', None)
 pd.set_option('display.width', 2000)
@@ -23,19 +30,16 @@ pd.set_option("display.float_format", "{:.4f}".format)
 
 
 # methods = None  # show all methods
-methods = ['BayesianACC',
-           'BaKDE-Ait-numpyro',
-           'BaKDE-Ait-T*',
-           'BaKDE-Gau-numpyro',
-           'BaKDE-Gau-T*', 'BaKDE-Gau-pca5', 'BaKDE-Gau-pca5*', 'BaKDE-Gau-pca10', 'BaKDE-Gau-pca10*',
-           # 'BayEMQ-U-Temp1-2',
-           # 'BayEMQ-T*',
-           'BayEMQ',
-           'BayEMQ*',
-           # 'BootstrapACC',
-           # 'BootstrapHDy',
-           # 'BootstrapKDEy',
-           # 'BootstrapEMQ'
+methods = ['BoCC',
+           'BaACC!',
+           'BaEMQ!',
+           'BaKDE-Gau-T!',
+           'BaKDE-Ait-T!',
+           'BaKDE-Ait-T!2'
+           #'BootstrapACC',
+           #'BootstrapHDy',
+           #'BootstrapKDEy',
+           #'BootstrapEMQ'
            ]
 
 def region_score(true_prev, region: ConfidenceRegionABC):
@@ -55,11 +59,15 @@ def compute_coverage_amplitude(region_constructor, **kwargs):
 
     def process_one(samples, true_prevs):
         region = region_constructor(samples, **kwargs)
-        if isinstance(region, ConfidenceIntervals):
+        if isinstance(region, ConfidenceIntervals) or isinstance(region, ConfidenceIntervalsCLR) or isinstance(region, ConfidenceIntervalsILR):
             winkler = region.mean_winkler_score(true_prevs)
+            # winkler_e = region.mean_winkler_score(true_prevs, add_ae=True)
+            cov_soft = region.coverage_soft(true_prevs)
         else:
             winkler = None
-        return region.coverage(true_prevs), region.montecarlo_proportion(), winkler
+            # winkler_e = None
+            cov_soft = None
+        return region.coverage(true_prevs), region.montecarlo_proportion(), winkler, cov_soft
 
     out = Parallel(n_jobs=3)(
         delayed(process_one)(samples, true_prevs)
@@ -71,8 +79,8 @@ def compute_coverage_amplitude(region_constructor, **kwargs):
     )
 
     # unzip results
-    coverage, amplitude, winkler = zip(*out)
-    return list(coverage), list(amplitude), list(winkler)
+    coverage, amplitude, winkler, cov_soft = zip(*out)
+    return list(coverage), list(amplitude), list(winkler), list(cov_soft)
 
 
 def update_pickle(report, pickle_path, updated_dict:dict):
@@ -83,35 +91,151 @@ def update_pickle(report, pickle_path, updated_dict:dict):
 
 def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwargs):
     if f'coverage-{conf_name}' not in report:
-        covs, amps, winkler = compute_coverage_amplitude(conf_region_class, **kwargs)
-
-        # amperr (lower is better) counts the amplitude when the true vale was covered, or 1 (max amplitude) otherwise
-        amperrs = [amp if cov == 1.0 else 1. for amp, cov in zip(amps, covs)]
+        covs, amps, winkler, cov_soft = compute_coverage_amplitude(conf_region_class, **kwargs)
 
         update_fields = {
             f'coverage-{conf_name}': covs,
             f'amplitude-{conf_name}': amps,
             f'winkler-{conf_name}': winkler,
-            f'amperr-{conf_name}': amperrs,
+            f'coverage-soft-{conf_name}': cov_soft
         }
 
         update_pickle(report, file, update_fields)
 
 
+def pareto_front(df, x_col, y_col, maximize_y=True, minimize_x=True):
+    """
+    Returns a boolean mask indicating whether each row is Pareto-optimal.
+    """
+    X = df[x_col].values
+    Y = df[y_col].values
 
-def nicer(name:str):
+    is_pareto = np.ones(len(df), dtype=bool)
+
+    for i in range(len(df)):
+        if not is_pareto[i]:
+            continue
+        for j in range(len(df)):
+            if i == j:
+                continue
+
+            better_or_equal_x = X[j] <= X[i] if minimize_x else X[j] >= X[i]
+            better_or_equal_y = Y[j] >= Y[i] if maximize_y else Y[j] <= Y[i]
+
+            strictly_better = (
+                (X[j] < X[i] if minimize_x else X[j] > X[i]) or
+                (Y[j] > Y[i] if maximize_y else Y[j] < Y[i])
+            )
+
+            if better_or_equal_x and better_or_equal_y and strictly_better:
+                is_pareto[i] = False
+                break
+
+    return is_pareto
+
+def plot_coverage_vs_amplitude(
+    df,
+    coverage_col,
+    amplitude_col="a-CI",
+    method_col="method",
+    dataset_col=None,
+    error_col=None,
+    error_threshold=None,
+    nominal_coverage=0.95,
+    title=None,
+):
+    df_plot = df.copy()
+
+    # Optional error filtering
+    if error_col is not None and error_threshold is not None:
+        df_plot = df_plot[df_plot[error_col] <= error_threshold]
+
+    # Compute Pareto front
+    pareto_mask = pareto_front(
+        df_plot,
+        x_col=amplitude_col,
+        y_col=coverage_col,
+        maximize_y=True,
+        minimize_x=True
+    )
+
+    plt.figure(figsize=(7, 6))
+
+    # Base scatter
+    sns.scatterplot(
+        data=df_plot,
+        x=amplitude_col,
+        y=coverage_col,
+        hue=method_col,
+        # style=dataset_col,
+        alpha=0.6,
+        s=60,
+        legend=True
+    )
+
+    # Highlight Pareto front
+    plt.scatter(
+        df_plot.loc[pareto_mask, amplitude_col],
+        df_plot.loc[pareto_mask, coverage_col],
+        facecolors='none',
+        edgecolors='black',
+        s=120,
+        linewidths=1.5,
+        label="Pareto front"
+    )
+
+    # Nominal coverage line
+    plt.axhline(
+        nominal_coverage,
+        linestyle="--",
+        color="gray",
+        linewidth=1,
+        label="Nominal coverage"
+    )
+
+    plt.xlabel("Amplitude (fraction of simplex)")
+    plt.ylabel("Coverage")
+    plt.ylim(0, 1.05)
+
+    if title is not None:
+        plt.title(title)
+
+    plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
+    plt.tight_layout()
+    plt.show()
+
+
+def nicer_method(name:str):
     replacements = {
-        'Bayesian': 'Ba',
+        # 'Bayesian': 'Ba',
         'Bootstrap': 'Bo',
         '-numpyro': '',
         'emcee': 'emc',
-        '-T*': '*'
+        '-T*': '*',
+        '-T!': '',
+        '!': '',
+        '-Ait': r'$^{(\mathrm{Ait})}$',
+        '-Gau': r'$^{(\mathrm{Gau})}$'
     }
     for k, v in replacements.items():
         name = name.replace(k,v)
     return name
 
 
+def nicer_data(name:str):
+    replacements = {
+        'cifar': 'CIFAR',
+        '-l': '',
+        'mnist': 'MNIST',
+        'fashionmnist': 'fashionMNIST',
+        'svhn': 'SVHN',
+        '100coarse': '100(20)',
+    }
+    for k, v in replacements.items():
+        name = name.replace(k, v)
+    return name
+
+
 base_dir = RESULT_DIR
 
 table = defaultdict(list)
@@ -119,62 +243,56 @@ n_classes = {}
 tr_size   = {}
 tr_prev   = {}
 
-for dataset_handler in [UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
-    problem_type = 'binary' if dataset_handler.is_binary() else 'multiclass'
-    path = f'./{base_dir}/{problem_type}/*.pkl'
-    
-    for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
-        file = Path(file)
-        dataset, method = file.name.replace('.pkl', '').split('__')
-        if (method not in methods) or (dataset not in dataset_handler.get_datasets()):
-            continue
-
-        report = pickle.load(open(file, 'rb'))
-        results = report['results']
-        n_samples = len(results['ae'])
-        table['method'].extend([nicer(method)] * n_samples)
-        table['dataset'].extend([dataset] * n_samples)
-        table['ae'].extend(results['ae'])
-        table['rae'].extend(results['rae'])
-        # table['c-CI'].extend(results['coverage'])
-        # table['a-CI'].extend(results['amplitude'])
-
-        update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True)
-        # update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
-        # update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
-        # update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
-
-        table['c-CI'].extend(report['coverage-CI'])
-        table['a-CI'].extend(report['amplitude-CI'])
-        table['w-CI'].extend(report['winkler-CI'])
-        table['amperr-CI'].extend(report['amperr-CI'])
-
-        # table['c-CE'].extend(report['coverage-CE'])
-        # table['a-CE'].extend(report['amplitude-CE'])
-        # table['amperr-CE'].extend(report['amperr-CE'])
-
-        # table['c-CLR'].extend(report['coverage-CLR'])
-        # table['a-CLR'].extend(report['amplitude-CLR'])
-        # table['amperr-CLR'].extend(report['amperr-CLR'])
-
-        # table['c-ILR'].extend(report['coverage-ILR'])
-        # table['a-ILR'].extend(report['amplitude-ILR'])
-        # table['amperr-ILR'].extend(report['amperr-ILR'])
-
-        table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim']))
-        table['SRE'].extend(qp.error.sre(results['true-prevs'], results['point-estim'], report['train-prev'], eps=0.001))
-        # table['aitch-well'].extend(qp.error.dist_aitchison(results['true-prevs'], [ConfidenceEllipseILR(samples).mean_ for samples in results['samples']]))
-        # table['aitch'].extend()
-        # table['reg-score-ILR'].extend(
-        #     [region_score(true_prev, ConfidenceEllipseILR(samples)) for true_prev, samples in zip(results['true-prevs'], results['samples'])]
-        # )
-
-    for dataset in dataset_handler.iter():
+dataset_class = [UCIMulticlassHandler, CIFAR100Handler, VisualDataHandler, LeQuaHandler]
+dataset_order = []
+for handler in dataset_class:
+    for dataset in handler.iter():
+        dataset_order.append(dataset.name)
         train = dataset.get_training()
         n_classes[dataset.name] = train.n_classes
         tr_size[dataset.name] = len(train)
         tr_prev[dataset.name] = F.strprev(train.prevalence())
 
+
+problem_type = 'multiclass'
+path = f'./{base_dir}/{problem_type}/*.pkl'
+    
+for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
+    file = Path(file)
+    dataset, method = file.name.replace('.pkl', '').split('__')
+    if (method not in methods) or (dataset not in dataset_order):
+        continue
+
+    report = pickle.load(open(file, 'rb'))
+    results = report['results']
+    n_samples = len(results['ae'])
+    table['method'].extend([nicer_method(method)] * n_samples)
+    table['dataset'].extend([nicer_data(dataset)] * n_samples)
+    table['ae'].extend(results['ae'])
+    table['rae'].extend(results['rae'])
+    # table['c-CI'].extend(results['coverage'])
+    # table['a-CI'].extend(results['amplitude'])
+
+    # update_pickle_with_region(report, file, conf_name='CI-ILR', conf_region_class=ConfidenceIntervalsILR, bonferroni_correction=True)
+    # update_pickle_with_region(report, file, conf_name='CI-CLR', conf_region_class=ConfidenceIntervalsCLR, bonferroni_correction=True)
+    update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True)
+    update_pickle_with_region(report, file, conf_name='CInb', conf_region_class=ConfidenceIntervals, bonferroni_correction=False)  # no Bonferroni-correction
+    # update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
+    # update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
+    # update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
+
+    conf_bonferroni = 'CI'
+    conf_name='CInb'
+    table['c-CI'].extend(report[f'coverage-{conf_bonferroni}'])  # the true coverage is better measured with Bonferroni-correction
+    table['w-CI'].extend(report[f'winkler-{conf_name}'])
+    table['cs-CI'].extend(report[f'coverage-soft-{conf_name}'])
+    table['a-CI'].extend(report[f'amplitude-{conf_name}'])
+
+    # table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim']))  # not in this paper...
+    table['SRE'].extend(qp.error.sre(results['true-prevs'], results['point-estim'], report['train-prev'], eps=0.001))
+
+
+
 # remove datasets with more than max_classes classes
 # max_classes = 25
 # min_train   = 500
@@ -190,19 +308,100 @@ for dataset_handler in [UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, C
 #         df = df[df["dataset"] != data_name]
 
 
+
+
 df = pd.DataFrame(table)
+df['a-CI'] *= 100
+df['c-CI'] *= 100
+df['cs-CI'] *= 100
+
 for region in ['CI']: #, 'CLR', 'ILR', 'CI']:
     if problem_type == 'binary' and region=='ILR':
         continue
-    for column in [f'a-{region}', f'c-{region}', 'ae', 'SRE']:
+    for column in [f'a-{region}', 'ae', 'SRE', f'c-{region}', f'cs-{region}']: # f'w-{region}'
         pv = pd.pivot_table(
             df, index='dataset', columns='method', values=column, margins=True
         )
         pv['n_classes'] = pv.index.map(n_classes).astype('Int64')
         pv['tr_size'] = pv.index.map(tr_size).astype('Int64')
         #pv['tr-prev'] = pv.index.map(tr_prev)
-        pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"])
+        pv = pv.drop(columns=[col for col in pv.columns if col == "All" or col[-1]=='All'])
         print(f'{problem_type=} {column=}')
         print(pv)
         print('-'*80)
 
+        latex = LatexTable.from_dataframe(df, method='method', benchmark='dataset', value=column, name=column)
+        latex.format.configuration.show_std = False
+        #latex.reorder_methods([nicer_method(m) for m in methods])
+        latex.reorder_benchmarks([nicer_data(d) for d in dataset_order])
+        if column in ['ae', 'SRE']:
+            latex.format.configuration.lower_is_better = True
+            latex.format.configuration.stat_test = 'wilcoxon'
+            #latex.format.configuration.stat_test = None
+            # latex.format.configuration.show_std = True
+
+        if column in [f'c-{region}', f'cs-{region}']:
+            latex.format.configuration.lower_is_better = False
+            latex.format.configuration.stat_test = None
+            latex.format.configuration.with_color = False
+            latex.format.configuration.best_in_bold = False
+            latex.format.configuration.with_rank = False
+            latex.format.configuration.mean_prec = 0
+            latex.add_format_modifier(
+                format_modifier=FormatModifierSelectColor(
+                    comparison=SelectGreaterThan(reference_selector=89, input_selector=SelectByName())
+                )
+            )
+        if column in [f'a-{region}']:
+            latex.format.configuration.lower_is_better = True
+            latex.format.configuration.stat_test = None
+            latex.format.configuration.with_color = False
+            latex.format.configuration.best_in_bold = False
+            latex.format.configuration.mean_prec = 2
+            latex.add_format_modifier(
+                format_modifier=FormatModifierSelectColor(
+                    comparison=SelectSmallerThan(reference_selector=11, input_selector=SelectByName())
+                )
+            )
+            # latex.add_format_modifier(
+            #     format_modifier=FormatModifierSelectColor(
+            #         comparison=SelectSmallerThan(reference_selector=0.01, input_selector=SelectByName()),
+            #         intensity=50
+            #     )
+            # )
+
+        latex.format.configuration.resizebox=.5
+        latex.latexPDF(pdf_path=f'./tables/{latex.name}.pdf')
+
+
+df = df[df['method']!='BaACC']
+df = df[df['method']!='BaACC*']
+df = df[df['method']!='BaACC+']
+df = df[df['method']!='BaKDE-Ait*']
+df = df[df['method']!='BaKDE-Gau*']
+df = df[df['method']!='BayEMQ*']
+grouped = df.groupby(["method", "dataset"])
+agg = grouped.agg(
+    ae_mean=("ae", "mean"),
+    ae_std=("ae", "std"),
+    sre_mean=("SRE", "mean"),
+    sre_std=("SRE", "std"),
+    coverage_mean=("c-CI", "mean"),
+    coverage_std=("c-CI", "std"),
+    coverage_soft_mean=("cs-CI", "mean"),
+    amplitude_mean=("a-CI", "mean"),
+    amplitude_std=("a-CI", "std"),
+).reset_index()
+
+#plot_coverage_vs_amplitude(
+#    agg,
+#    coverage_col="coverage_soft_mean",
+#    amplitude_col="amplitude_mean",
+#    method_col="method",
+#    dataset_col="dataset",
+#    nominal_coverage=0.95,
+#    title="Marginal coverage vs amplitude"
+#)
+
+
+#print('RESTITUIR EL WILCOXON')
diff --git a/BayesianKDEy/map_experiments.py b/BayesianKDEy/map_experiments.py
new file mode 100644
index 0000000..65e34b8
--- /dev/null
+++ b/BayesianKDEy/map_experiments.py
@@ -0,0 +1,169 @@
+import os.path
+from pathlib import Path
+import pandas as pd
+from sklearn.linear_model import LogisticRegression
+from copy import deepcopy as cp
+import quapy as qp
+from BayesianKDEy.commons import KDEyReduce
+from _bayeisan_kdey import BayesianKDEy
+from _bayesian_mapls import BayesianMAPLS
+from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors, KDEyScaledB, KDEyFresh
+# import datasets
+from datasets import LeQuaHandler, UCIMulticlassHandler, DatasetHandler, VisualDataHandler, CIFAR100Handler
+from method.confidence import ConfidenceIntervals
+from temperature_calibration import temp_calibration
+from build.lib.quapy.data import LabelledCollection
+from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ, CC
+from quapy.model_selection import GridSearchQ
+from quapy.data import Dataset
+from quapy.method.confidence import BayesianCC, AggregativeBootstrap
+from quapy.method.aggregative import KDEyML, ACC
+from quapy.protocol import UPP
+import numpy as np
+from tqdm import tqdm
+from collections import defaultdict
+from time import time
+
+
+def methods(data_handler: DatasetHandler):
+    """
+    Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
+    - name: is a str representing the name of the method (e.g., 'BayesianKDEy')
+    - quantifier: is the base model (e.g., KDEyML())
+    - hyperparams: is a dictionary for the quantifier (e.g., {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]})
+    - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
+        quantifier with optimized hyperparameters
+    """
+    if isinstance(data_handler, VisualDataHandler):
+        Cls = LogisticRegression
+        cls_hyper = {}
+    else:
+        Cls = LogisticRegression
+        cls_hyper = {'classifier__C': np.logspace(-4, 4, 9), 'classifier__class_weight': ['balanced', None]}
+    kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper}
+    kdey_hyper_larger = {'bandwidth': np.logspace(-1, 0, 10), **cls_hyper}
+    kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper}
+
+    # surrogate quantifiers
+    kde_gau_scale = KDEyScaledB(Cls())
+
+    yield 'KDEy-G-exp', kdey_hyper, KDEyML(Cls())
+    # yield 'KDEy-G-exp2', kdey_hyper_larger, KDEyML(Cls())
+    # yield 'KDEy-G-log', kdey_hyper, KDEyML(Cls(), logdensities=True)
+    yield 'KDEy-Ait', kdey_hyper_clr, KDEyCLR(Cls())
+
+
+def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict):
+    with qp.util.temp_seed(0):
+        if isinstance(point_quantifier, KDEyScaledB) and 'bandwidth' in grid:
+            def scale_bandwidth(bandwidth, n_classes, beta=0.5):
+                return bandwidth * np.power(n_classes, beta)
+
+            n = dataset.get_training().n_classes
+            grid['bandwidth'] = [scale_bandwidth(b, n) for b in grid['bandwidth']]
+            print('bandwidth scaled')
+        print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
+        # model selection
+        if len(grid) > 0:
+            train, val_prot = dataset.get_train_valprot_for_modsel()
+            mod_sel = GridSearchQ(
+                model=point_quantifier,
+                param_grid=grid,
+                protocol=val_prot,
+                refit=False,
+                n_jobs=-1,
+                verbose=True
+            ).fit(*train.Xy)
+            best_params = mod_sel.best_params_
+        else:
+            best_params = {}
+
+        return best_params
+
+
+def experiment(dataset: DatasetHandler,
+               point_quantifier: AggregativeQuantifier,
+               method_name: str,
+               grid: dict,
+               hyper_choice_path: Path):
+
+    with qp.util.temp_seed(0):
+        # model selection
+        best_hyperparams = qp.util.pickled_resource(
+            hyper_choice_path, model_selection, dataset, cp(point_quantifier), grid
+        )
+        print(f'{best_hyperparams=}')
+
+        t_init = time()
+        training, test_generator = dataset.get_train_testprot_for_eval()
+        point_quantifier.fit(*training.Xy)
+        tr_time = time() - t_init
+
+        # test
+        train_prevalence = training.prevalence()
+        results = defaultdict(list)
+        pbar = tqdm(enumerate(test_generator()), total=test_generator.total())
+        for i, (sample_X, true_prevalence) in pbar:
+            t_init = time()
+            point_estimate = point_quantifier.predict(sample_X)
+            ttime = time() - t_init
+
+            results['true-prevs'].append(true_prevalence)
+            results['point-estim'].append(point_estimate)
+            results['shift'].append(qp.error.ae(true_prevalence, train_prevalence))
+            results['ae'].append(qp.error.ae(prevs_true=true_prevalence, prevs_hat=point_estimate))
+            results['rae'].append(qp.error.rae(prevs_true=true_prevalence, prevs_hat=point_estimate))
+            results['sre'].append(qp.error.sre(prevs_true=true_prevalence, prevs_hat=point_estimate, prevs_train=train_prevalence))
+            results['test-time'].append(ttime)
+
+            pbar.set_description(
+                f'{method_name} MAE={np.mean(results["ae"]):.5f} W={np.mean(results["sre"]):.5f}')
+
+        report = {
+            'optim_hyper': best_hyperparams,
+            'train_time': tr_time,
+            'train-prev': train_prevalence,
+            'results': {k: np.asarray(v) for k, v in results.items()},
+        }
+
+        return report
+
+
+if __name__ == '__main__':
+
+    result_dir = Path('results_map')
+
+    reports = defaultdict(list)
+
+    for data_handler in [UCIMulticlassHandler]:  # , UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
+        for dataset in data_handler.iter():
+            qp.environ['SAMPLE_SIZE'] = dataset.sample_size
+            # print(f'dataset={dataset.name}')
+
+            problem_type = 'binary' if dataset.is_binary() else 'multiclass'
+            for method_name, hyper_params, quantifier in methods(dataset):
+
+                result_path = experiment_path(result_dir / problem_type, dataset.name, method_name)
+                hyper_path = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name, method_name)
+
+                # if os.path.exists(result_path):
+                report = qp.util.pickled_resource(
+                    result_path, experiment, dataset, quantifier, method_name, hyper_params, hyper_path
+                )
+                reports['dataset'].append(dataset.name)
+                reports['method'].append(method_name)
+                reports['MAE'].append(report["results"]["ae"].mean())
+                reports['SRE'].append(report["results"]["sre"].mean())
+                reports['h'].append(report["optim_hyper"]["bandwidth"])
+
+                print(f'dataset={dataset.name}, '
+                      f'method={method_name}: '
+                      f'mae={reports["MAE"][-1]:.5f}, '
+                      f'W={reports["SRE"][-1]:.5f} '
+                      f'h={reports["h"][-1]}')
+
+    pv = pd.DataFrame(reports).pivot_table(values=['MAE', 'SRE', 'h'], index='dataset', columns='method', margins=True)
+    print(pv)
+
+
+
diff --git a/BayesianKDEy/methods.py b/BayesianKDEy/methods.py
new file mode 100644
index 0000000..55e6625
--- /dev/null
+++ b/BayesianKDEy/methods.py
@@ -0,0 +1,168 @@
+from abc import ABC, abstractmethod
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+
+from BayesianKDEy._bayeisan_kdey import BayesianKDEy
+from BayesianKDEy._bayesian_mapls import BayesianMAPLS
+from BayesianKDEy.commons import KDEyCLR, KDEyCLR2
+from quapy.method.aggregative import CC, ACC, EMQ, DMy, KDEyML
+from quapy.method.base import BaseQuantifier
+from quapy.method.confidence import AggregativeBootstrap, BayesianCC
+
+
+def get_experimental_methods():
+    #yield BootsCC()
+    #yield BayACC()
+    #yield BayEMQ()
+    #yield BayKDEyGau()
+    #yield BayKDEyAit()
+    yield BayKDEyAit2()
+
+
+# commons
+# ------------------------------------------------------------
+Cls = LogisticRegression
+cls_hyper = {
+    'classifier__C': np.logspace(-4,4,9),
+    'classifier__class_weight': ['balanced', None]
+}
+hdy_hyper = {'nbins': [3, 4, 5, 8, 9, 10, 12, 14, 16, 32], **cls_hyper}
+kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper}
+kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper}
+
+def hyper2cls(hyperparams):
+    return {k.replace('classifier__', ''): v for k, v in hyperparams.items()}
+
+
+# method descriptor logic
+# ------------------------------------------------------------
+class MethodDescriptor(ABC):
+    def __init__(self,
+                 name: str,
+                 surrogate_quantifier: BaseQuantifier,
+                 hyper_parameters: dict,
+                 ):
+        self.name = name
+        self.surrogate_quantifier_ = surrogate_quantifier
+        self.hyper_parameters = hyper_parameters
+
+    @abstractmethod
+    def binary_only(self): ...
+
+    def surrogate_quantifier(self):
+        return self.surrogate_quantifier_
+
+    def surrogate_quantifier_name(self):
+        return self.surrogate_quantifier_.__class__.__name__
+
+    @abstractmethod
+    def uncertainty_aware_quantifier(self, hyperparameters): ...
+
+
+class MulticlassMethodDescriptor(MethodDescriptor):
+
+    def binary_only(self):
+        return False
+
+
+# specific methods definitions
+# ------------------------------------------------------------
+# ------------------------------------------------------------
+
+# Bootstrap approaches:
+# ------------------------------------------------------------
+class BootsCC(MulticlassMethodDescriptor):
+    def __init__(self):
+        super().__init__(name='BoCC', surrogate_quantifier=CC(Cls()), hyper_parameters=cls_hyper)
+
+    def uncertainty_aware_quantifier(self, hyperparameters):
+        quantifier = CC(Cls()).set_params(**hyperparameters)
+        return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0)
+
+
+# class BootsACC(MulticlassMethodDescriptor):
+#     def __init__(self):
+#         super().__init__(name='BoACC', surrogate_quantifier=ACC(Cls()), hyper_parameters=cls_hyper)
+#
+#     def uncertainty_aware_quantifier(self, hyperparameters):
+#         quantifier = ACC(Cls()).set_params(**hyperparameters)
+#         return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0)
+#
+#
+# class BootsEMQ(MulticlassMethodDescriptor):
+#     def __init__(self):
+#         super().__init__(name='BoEMQ', surrogate_quantifier=EMQ(Cls(), exact_train_prev=False), hyper_parameters=cls_hyper)
+#
+#     def uncertainty_aware_quantifier(self, hyperparameters):
+#         quantifier = EMQ(Cls(), exact_train_prev=False).set_params(**hyperparameters)
+#         return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0)
+
+
+# class BootsHDy(MethodDescriptor):
+#     def __init__(self):
+#         super().__init__(name='BoHDy', surrogate_quantifier=DMy(Cls()), hyper_parameters=hdy_hyper)
+#
+#     def uncertainty_aware_quantifier(self, hyperparameters):
+#         quantifier = DMy(Cls()).set_params(**hyperparameters)
+#         return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0)
+#
+#     def binary_only(self):
+#         return True
+
+# class BootsKDEy(MulticlassMethodDescriptor):
+#     def __init__(self):
+#         super().__init__(name='BoKDEy', surrogate_quantifier=KDEyML(Cls()), hyper_parameters=kdey_hyper)
+#
+#     def uncertainty_aware_quantifier(self, hyperparameters):
+#         quantifier = KDEyML(Cls()).set_params(**hyperparameters)
+#         return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0)
+
+
+# Bayesian approaches:
+# ------------------------------------------------------------
+class BayACC(MulticlassMethodDescriptor):
+    def __init__(self):
+        super().__init__(name='BaACC!', surrogate_quantifier=ACC(Cls()), hyper_parameters=cls_hyper)
+
+    def uncertainty_aware_quantifier(self, hyperparameters):
+        classifier = Cls(**hyper2cls(hyperparameters))
+        return BayesianCC(classifier, temperature=None, mcmc_seed=0)  # is actually a Bayesian variant of ACC
+
+
+class BayEMQ(MulticlassMethodDescriptor):
+    def __init__(self):
+        super().__init__(name='BaEMQ!', surrogate_quantifier=EMQ(Cls(), exact_train_prev=False), hyper_parameters=cls_hyper)
+
+    def uncertainty_aware_quantifier(self, hyperparameters):
+        classifier = Cls(**hyper2cls(hyperparameters))
+        return BayesianMAPLS(classifier, prior='uniform', temperature=None, exact_train_prev=False)
+
+
+class BayKDEyGau(MulticlassMethodDescriptor):
+    def __init__(self):
+        kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper}
+        super().__init__(name='BaKDE-Gau-T!', surrogate_quantifier=KDEyML(Cls()), hyper_parameters=kdey_hyper)
+
+    def uncertainty_aware_quantifier(self, hyperparameters):
+        return BayesianKDEy(Cls(), kernel='gaussian', temperature=None, mcmc_seed=0, **hyperparameters)
+
+
+class BayKDEyAit(MulticlassMethodDescriptor):
+    def __init__(self):
+        kdey_hyper = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper}
+        super().__init__(name='BaKDE-Ait-T!', surrogate_quantifier=KDEyCLR(Cls()), hyper_parameters=kdey_hyper)
+
+    def uncertainty_aware_quantifier(self, hyperparameters):
+        return BayesianKDEy(Cls(), kernel='aitchison', temperature=None, mcmc_seed=0, **hyperparameters)
+    
+
+class BayKDEyAit2(MulticlassMethodDescriptor):
+    def __init__(self):
+        kdey_hyper = {'bandwidth': np.linspace(0.05, 2., 10), **cls_hyper}
+        super().__init__(name='BaKDE-Ait-T!2', surrogate_quantifier=KDEyCLR2(Cls()), hyper_parameters=kdey_hyper)
+
+    def uncertainty_aware_quantifier(self, hyperparameters):
+        return BayesianKDEy(Cls(), kernel='aitchison', temperature=None, mcmc_seed=0, **hyperparameters)
+
+
+
diff --git a/BayesianKDEy/plot_simplex.py b/BayesianKDEy/plot_simplex.py
index 2f1a79e..af44a6a 100644
--- a/BayesianKDEy/plot_simplex.py
+++ b/BayesianKDEy/plot_simplex.py
@@ -10,6 +10,8 @@ from sklearn.preprocessing import MinMaxScaler
 
 from BayesianKDEy.commons import antagonistic_prevalence, in_simplex
 from method.confidence import (ConfidenceIntervals as CI,
+                               ConfidenceIntervalsCLR as CICLR,
+                               ConfidenceIntervalsILR as CIILR,
                                ConfidenceEllipseSimplex as CE,
                                ConfidenceEllipseCLR as CLR,
                                ConfidenceEllipseILR as ILR)
@@ -260,6 +262,8 @@ def plot_regions(ax, region_layers, resolution, confine):
         )
 
 
+
+
 def plot_points(ax, point_layers):
     for layer in point_layers:
         pts = layer["points"]
@@ -433,25 +437,34 @@ def plot_kernels():
 if __name__ == '__main__':
     np.random.seed(1)
 
-    # n = 1000
-    # alpha = [1,1,1]
-    # prevs = np.random.dirichlet(alpha, size=n)
+    n = 1000
+    alpha = [15,10,7]
+    prevs = np.random.dirichlet(alpha, size=n)
 
-    # def regions():
-    #     confs = [0.99, 0.95, 0.90]
-    #     yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
+    def regions():
+        confs = [0.9, 0.95, 0.99]
+        # yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
         # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
+        # yield 'CI-CLR', [(f'{int(c * 100)}%', CICLR(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'CI-CLR-b', [(f'{int(c * 100)}%', CICLR(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
+        # yield 'CI-ILR', [(f'{int(c * 100)}%', CIILR(prevs, confidence_level=c).coverage) for c in confs]
+        yield 'CI-ILR-b', [(f'{int(c * 100)}%', CIILR(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
         # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
-        # yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
-        # yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'CE-CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'CE-ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
 
-    # resolution = 1000
-    # alpha_str = ','.join([f'{str(i)}' for i in alpha])
-    # for crname, cr in regions():
-    #     plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr, region_resolution=resolution,
-    #                      color='blue',
-    #                      save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png',
-    #                      )
+    resolution = 1000
+    alpha_str = ','.join([f'{str(i)}' for i in alpha])
+
+    dot_style = {"color": "gray", "alpha": .5, "s": 15, 'linewidth': .25, 'edgecolors': "black"}
+    point_layer = [
+        {"points": prevs, "label": "points", "style": dot_style},
+    ]
+
+    for crname, cr in regions():
+        region = [{'fn': fn, 'alpha':.6, 'label':label} for label, fn in cr]
+
+        plot_simplex(point_layers=point_layer, region_layers=region, show_legend=False, resolution=resolution, save_path=f'./plots/regions/{crname}.png')
 
     # def regions():
     #     confs = [0.99, 0.95, 0.90]
@@ -544,4 +557,4 @@ if __name__ == '__main__':
     #         save_path=f'./plots/prior_test/concentration_{c}.png'
     #     )
 
-    plot_kernels()
\ No newline at end of file
+    # plot_kernels()
\ No newline at end of file
diff --git a/BayesianKDEy/temperature_calibration.py b/BayesianKDEy/temperature_calibration.py
index f1b9326..5dc6540 100644
--- a/BayesianKDEy/temperature_calibration.py
+++ b/BayesianKDEy/temperature_calibration.py
@@ -13,12 +13,14 @@ def temp_calibration(method:WithConfidenceABC,
                      val_prot:AbstractProtocol,
                      temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.],
                      nominal_coverage=0.95,
-                     amplitude_threshold='auto',
+                     amplitude_threshold=1.,
+                     criterion='winkler',
                      n_jobs=1,
                      verbose=True):
 
-    assert (amplitude_threshold == 'auto' or (isinstance(amplitude_threshold, float)) and amplitude_threshold < 1.), \
-        f'wrong value for {amplitude_threshold=}, it must either be "auto" or a float < 1.0.'
+    assert (amplitude_threshold == 'auto' or (isinstance(amplitude_threshold, float)) and amplitude_threshold <= 1.), \
+        f'wrong value for {amplitude_threshold=}, it must either be "auto" or a float <= 1.0.'
+    assert criterion in {'auto', 'winkler'}, f'unknown {criterion=}; valid ones are auto or winkler'
 
     if amplitude_threshold=='auto':
         n_classes = train.n_classes
@@ -27,15 +29,16 @@ def temp_calibration(method:WithConfidenceABC,
     if isinstance(amplitude_threshold, float) and amplitude_threshold > 0.1:
         print(f'warning: the {amplitude_threshold=} is too large; this may lead to uninformative regions')
 
-    def evaluate_temperature_job(job_id, temp):
-        if verbose:
-            print(f'\tstarting exploration with temperature={temp}...')
+    def _evaluate_temperature_job(job_id, temp):
+        # if verbose:
+        #     print(f'\tstarting exploration with temperature={temp}...')
 
         local_method = copy.deepcopy(method)
         local_method.temperature = temp
 
         coverage = 0
         amplitudes = []
+        winklers = []
         # errs = []
 
         pbar = tqdm(enumerate(val_prot()), position=job_id, total=val_prot.total(), disable=not verbose)
@@ -47,35 +50,62 @@ def temp_calibration(method:WithConfidenceABC,
                 coverage += 1
 
             amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000))
+            winkler = None
+            if criterion=='winkler':
+                winkler = conf_region.mean_winkler_score(true_prev=prev, alpha=0.005)
+            winklers.append(winkler)
+
             # errs.append(qp.error.mae(prev, point_estim))
-            pbar.set_description(f'job={job_id} T={temp}: coverage={coverage/(i+1)*100:.2f}% amplitude={np.mean(amplitudes)*100:.2f}%')
+            pbar.set_description(
+                f'job={job_id} T={temp}: '
+                f'coverage={coverage/(i+1)*100:.2f}% '
+                f'amplitude={np.mean(amplitudes)*100:.4f}% '
+                + f'winkler={np.mean(winklers):.4f}%' if criterion=='winkler' else ''
+            )
 
         mean_coverage = coverage / val_prot.total()
         mean_amplitude = np.mean(amplitudes)
+        winkler_mean = np.mean(winklers) if criterion=='winkler' else None
 
-        if verbose:
-            print(f'Temperature={temp} got coverage={mean_coverage*100:.2f}% amplitude={mean_amplitude*100:.2f}%')
+        # if verbose:
+        #     print(
+        #         f'Temperature={temp} got '
+        #         f'coverage={mean_coverage*100:.2f}% '
+        #         f'amplitude={mean_amplitude*100:.2f}% '
+        #         + f'winkler={winkler_mean:.4f}' if criterion == 'winkler' else ''
+        #     )
 
-        return temp, mean_coverage, mean_amplitude
+        return temp, mean_coverage, mean_amplitude, winkler_mean
 
     temp_grid = sorted(temp_grid)
     method.fit(*train.Xy)
 
     raw_results = Parallel(n_jobs=n_jobs, backend="loky")(
-        delayed(evaluate_temperature_job)(job_id, temp)
+        delayed(_evaluate_temperature_job)(job_id, temp)
         for job_id, temp in tqdm(enumerate(temp_grid), disable=not verbose)
     )
     results = [
-        (temp, cov, amp)
-        for temp, cov, amp in raw_results
+        (temp, cov, amp, wink)
+        for temp, cov, amp, wink in raw_results
         if amp < amplitude_threshold
     ]
 
     chosen_temperature = 1.
     if len(results) > 0:
-        chosen_temperature = min(results, key=lambda x: abs(x[1]-nominal_coverage))[0]
+        if criterion=='winkler':
+            # choose min winkler
+            chosen_temperature, ccov, camp, cwink = min(results, key=lambda x: x[3])
+        else:
+            # choose best coverage (regardless of amplitude), i.e., closest to nominal
+            chosen_temperature, ccov, camp, cwink = min(results, key=lambda x: abs(x[1]-nominal_coverage))
 
-    print(f'chosen_temperature={chosen_temperature:.2f}')
+    if verbose:
+        print(
+            f'\nChosen_temperature={chosen_temperature:.2f} got '
+            f'coverage={ccov*100:.2f}% '
+            f'amplitude={camp*100:.4f}% '
+            + f'winkler={cwink:.4f}' if criterion=='winkler' else ''
+        )
 
     return chosen_temperature
 
diff --git a/CHANGE_LOG.txt b/CHANGE_LOG.txt
index 9c10d20..28d11a4 100644
--- a/CHANGE_LOG.txt
+++ b/CHANGE_LOG.txt
@@ -1,15 +1,21 @@
 Change Log 0.2.1
 -----------------
 
+- Added mechanisms for Temperature Calibration for coverage
+- Added MAPLS and BayesianEMQ
 - Added DirichletProtocol, which allows to generate samples according to a parameterized Dirichlet prior.
-- Added squared ratio error.
 - Improved efficiency of confidence regions coverage functions
 - Added Precise Quantifier to WithConfidence methods (a Bayesian adaptation of HDy)
+- Added Temperature parameter to BayesianCC
 - Improved documentation of confidence regions.
 - Added ReadMe method by Daniel Hopkins and Gary King
 - Internal index in LabelledCollection is now "lazy", and is only constructed if required.
-- Added dist_aitchison and mean_dist_aitchison as a new error evaluation metric.
-- Improved numerical stability of KDEyML through logsumexp; useful for cases with large number of classes, where densities for small bandwidths may become huge
+- Added new error metrics:
+    - dist_aitchison and mean_dist_aitchison as a new error evaluation metric.
+    - squared ratio error.
+- Improved numerical stability of KDEyML through logsumexp; useful for cases with large number of classes, where
+    densities for small bandwidths may become huge.
+
 
 Change Log 0.2.0
 -----------------
diff --git a/TODO.txt b/TODO.txt
index 2e1652b..0259e74 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -1,6 +1,11 @@
 Adapt examples; remaining: example 4-onwards
 not working: 15 (qunfold)
 
+Unify ConfidenceIntervalsTransformation with ConfidenceEllipseTransformation
+
+Unify functionality of withconfidence methods; the predict_conf has a clear similar structure across all 
+    variants, and should be unified in the super class
+
 Solve the warnings issue; right now there is a warning ignore in method/__init__.py:
 
 Add 'platt' to calib options in EMQ?
@@ -46,7 +51,15 @@ Para quitar el labelledcollection de los métodos:
                 - proporción en [0,1]
         - fit_classifier=False:
 
-
+- [TODO] add RLSbench?:
+    - https://arxiv.org/pdf/2302.03020
+    - https://github.com/acmi-lab/RLSbench
+- [TODO] check Table shift
+    - https://proceedings.neurips.cc/paper_files/paper/2023/hash/a76a757ed479a1e6a5f8134bea492f83-Abstract-Datasets_and_Benchmarks.html
+- [TODO] have a look at TorchDrift
+    - https://torchdrift.org/
+- [TODO] have a look at
+    - https://github.com/SeldonIO/alibi-detect/
 - [TODO] check if the KDEyML variant with sumlogexp is slower than the original one, or check whether we can explore
     an unconstrained space in which the parameter is already the log(prev); maybe also move to cvxq
 - [TODO] why not simplifying the epsilon of RAE? at the end, it is meant to smooth the denominator for avoiding div 0
diff --git a/quapy/method/_bayesian.py b/quapy/method/_bayesian.py
index cd0e8c2..b3b49d7 100644
--- a/quapy/method/_bayesian.py
+++ b/quapy/method/_bayesian.py
@@ -33,7 +33,7 @@ P_TEST_C: str = "P_test(C)"
 P_C_COND_Y: str = "P(C|Y)"
 
 
-def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray, alpha: np.ndarray) -> None:
+def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray, temperature:float, alpha: np.ndarray) -> None:
     """
     Defines a probabilistic model in `NumPyro <https://num.pyro.ai/>`_.
 
@@ -50,11 +50,40 @@ def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray, a
     pi_ = numpyro.sample(P_TEST_Y, dist.Dirichlet(jnp.asarray(alpha, dtype=jnp.float32)))
     p_c_cond_y = numpyro.sample(P_C_COND_Y, dist.Dirichlet(jnp.ones(K).repeat(L).reshape(L, K)))
 
-    with numpyro.plate('plate', L):
-        numpyro.sample('F_yc', dist.Multinomial(n_y_labeled, p_c_cond_y), obs=n_y_and_c_labeled)
+    if temperature==1:
+        # original implementation
+        with numpyro.plate('plate', L):
+            numpyro.sample('F_yc', dist.Multinomial(n_y_labeled, p_c_cond_y), obs=n_y_and_c_labeled)
 
-    p_c = numpyro.deterministic(P_TEST_C, jnp.einsum("yc,y->c", p_c_cond_y, pi_))
-    numpyro.sample('N_c', dist.Multinomial(jnp.sum(n_c_unlabeled), p_c), obs=n_c_unlabeled)
+        p_c = numpyro.deterministic(P_TEST_C, jnp.einsum("yc,y->c", p_c_cond_y, pi_))
+        numpyro.sample('N_c', dist.Multinomial(jnp.sum(n_c_unlabeled), p_c), obs=n_c_unlabeled) 
+    else:
+        # with temperature modification
+        with numpyro.plate('plate_y', L):
+            logp_F = dist.Multinomial(
+                n_y_labeled,
+                p_c_cond_y
+            ).log_prob(n_y_and_c_labeled)
+
+        numpyro.factor(
+            'F_yc_loglik',
+            jnp.sum(logp_F) / temperature
+        )
+        p_c = numpyro.deterministic(
+            P_TEST_C,
+            jnp.einsum("yc,y->c", p_c_cond_y, pi_)
+        )
+
+        # Likelihood datos no etiquetados
+        logp_N = dist.Multinomial(
+            jnp.sum(n_c_unlabeled),
+            p_c
+        ).log_prob(n_c_unlabeled)
+
+        numpyro.factor(
+            'N_c_loglik',
+            logp_N / temperature
+        )
 
 
 def sample_posterior_bayesianCC(
@@ -63,6 +92,7 @@ def sample_posterior_bayesianCC(
         num_warmup: int,
         num_samples: int,
         alpha: np.ndarray,
+        temperature = 1.,
         seed: int = 0,
 ) -> dict:
     """
@@ -88,7 +118,7 @@ def sample_posterior_bayesianCC(
         progress_bar=False
     )
     rng_key = jax.random.PRNGKey(seed)
-    mcmc.run(rng_key, n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=n_y_and_c_labeled, alpha=alpha)
+    mcmc.run(rng_key, n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=n_y_and_c_labeled, temperature=temperature, alpha=alpha)
     return mcmc.get_samples()
 
 
diff --git a/quapy/method/_kdey.py b/quapy/method/_kdey.py
index 09df327..95f23ae 100644
--- a/quapy/method/_kdey.py
+++ b/quapy/method/_kdey.py
@@ -178,17 +178,24 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
         with qp.util.temp_seed(self.random_state):
             epsilon = 1e-12
             n_classes = len(self.mix_densities)
-            #test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
-            test_log_densities = [self.pdf(kde_i, posteriors, self.kernel, log_densities=True) for kde_i in self.mix_densities]
+            if n_classes>=30:
+                # new version: improves numerical stability with logsumexp, at the cost of optimization efficiency.
+                # needed if the number of classes is large (approx >= 30) because densities tend to grow exponentially
+                test_log_densities = [self.pdf(kde_i, posteriors, self.kernel, log_densities=True) for kde_i in self.mix_densities]
 
-            #def neg_loglikelihood(prev):
-            #    prev = np.clip(prev, epsilon, 1.0)
-            #    test_mixture_likelihood = prev @ test_densities
-            #    test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
-            #    return  -np.sum(test_loglikelihood)
-            def neg_loglikelihood(prev):
-                test_loglikelihood = logsumexp(np.log(np.clip(prev, epsilon, 1.0))[:,None] + test_log_densities, axis=0)
-                return  -np.sum(test_loglikelihood)
+                def neg_loglikelihood(prev):
+                    prev = np.clip(prev, epsilon, 1.0)
+                    test_loglikelihood = logsumexp(np.log(prev)[:,None] + test_log_densities, axis=0)
+                    return -np.sum(test_loglikelihood)
+            else:
+                # original implementation
+                test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
+
+                def neg_loglikelihood(prev):
+                   # prev = np.clip(prev, epsilon, 1.0)
+                   test_mixture_likelihood = prev @ test_densities
+                   test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
+                   return -np.sum(test_loglikelihood)
 
             return F.optim_minimize(neg_loglikelihood, n_classes)
 
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index d565140..8be9694 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -163,6 +163,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
 
         :param X: array-like of shape `(n_samples, n_features)`, the training instances
         :param y: array-like of shape `(n_samples,)`, the labels
+        :return: a tuple (predictions, labels)
         """
         self._check_classifier(adapt_if_necessary=self.fit_classifier)
 
diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py
index 10b2245..d4416c0 100644
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@@ -341,6 +341,7 @@ class ConfidenceIntervals(ConfidenceRegionABC):
     """
     def __init__(self, samples, confidence_level=0.95, bonferroni_correction=False):
         assert 0 < confidence_level < 1, f'{confidence_level=} must be in range(0,1)'
+        assert samples.ndim == 2, 'unexpected shape; must be (n_bootstrap_samples, n_classes)'
 
         samples = np.asarray(samples)
 
@@ -383,6 +384,10 @@ class ConfidenceIntervals(ConfidenceRegionABC):
 
         return proportion
 
+    def coverage_soft(self, true_value):
+        within_intervals = np.logical_and(self.I_low <= true_value, true_value <= self.I_high)
+        return np.mean(within_intervals.astype(float))
+
     def __repr__(self):
         return '['+', '.join(f'({low:.4f}, {high:.4f})' for (low,high) in zip(self.I_low, self.I_high))+']'
 
@@ -390,25 +395,30 @@ class ConfidenceIntervals(ConfidenceRegionABC):
     def n_dim(self):
         return len(self.I_low)
 
-    def winkler_scores(self, true_prev):
+    def winkler_scores(self, true_prev, alpha=None, add_ae=False):
         true_prev = np.asarray(true_prev)
         assert true_prev.ndim == 1, 'unexpected dimensionality for true_prev'
         assert len(true_prev)==self.n_dim, \
             f'unexpected number of dimensions; found {true_prev.ndim}, expected {self.n_dim}'
 
-        def winkler_score(low, high, true_val, alpha):
+        def winkler_score(low, high, true_val, alpha, center):
             amp = high-low
-            scale_cost = 1./alpha
+            scale_cost = 2./alpha
             cost = np.max([0, low-true_val], axis=0) + np.max([0, true_val-high], axis=0)
-            return amp + scale_cost*cost
+            err = 0
+            if add_ae:
+                err = abs(true_val - center)
+            return amp + scale_cost*cost + err
 
+        alpha = alpha or self.alpha
         return np.asarray(
-            [winkler_score(low_i, high_i, true_v, self.alpha)
-                for (low_i, high_i, true_v) in zip(self.I_low, self.I_high, true_prev)]
+            [winkler_score(low_i, high_i, true_v, alpha, center)
+                for (low_i, high_i, true_v, center) in zip(self.I_low, self.I_high, true_prev, self.point_estimate())]
         )
 
-    def mean_winkler_score(self, true_prev):
-        return np.mean(self.winkler_scores(true_prev))
+    def mean_winkler_score(self, true_prev, alpha=None, add_ae=False):
+        return np.mean(self.winkler_scores(true_prev, alpha=alpha, add_ae=add_ae))
+
 
 
 class ConfidenceEllipseSimplex(ConfidenceRegionABC):
@@ -486,8 +496,8 @@ class ConfidenceEllipseTransformed(ConfidenceRegionABC):
         samples = np.asarray(samples)
         self.transformation = transformation
         Z = self.transformation(samples)
-        # self.mean_ = np.mean(samples, axis=0)
-        self.mean_ = self.transformation.inverse(np.mean(Z, axis=0))
+        self.mean_ = np.mean(samples, axis=0)
+        # self.mean_ = self.transformation.inverse(np.mean(Z, axis=0))
         self.conf_region_z = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
         self._samples = samples
         self.alpha = 1.-confidence_level
@@ -549,7 +559,99 @@ class ConfidenceEllipseILR(ConfidenceEllipseTransformed):
 
 
 
+class ConfidenceIntervalsTransformed(ConfidenceRegionABC):
+    """
+    Instantiates a Confidence Interval region in a transformed space.
 
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param confidence_level: float, the confidence level (default 0.95)
+    :param bonferroni_correction: bool (default False), if True, a Bonferroni correction
+        is applied to the significance level (`alpha`) before computing confidence intervals.
+        The correction consists of replacing `alpha` with `alpha/n_classes`. When
+        `n_classes=2` the correction is not applied because there is only one verification test
+        since the other class is constrained. This is not necessarily true for n_classes>2.
+    """
+
+    def __init__(self, samples, transformation: CompositionalTransformation, confidence_level=0.95, bonferroni_correction=False):
+        samples = np.asarray(samples)
+        self.transformation = transformation
+        Z = self.transformation(samples)
+        self.mean_ = np.mean(samples, axis=0)
+        # self.mean_ = self.transformation.inverse(np.mean(Z, axis=0))
+        self.conf_region_z = ConfidenceIntervals(Z, confidence_level=confidence_level, bonferroni_correction=bonferroni_correction)
+        self._samples = samples
+        self.alpha = 1.-confidence_level
+
+    @property
+    def samples(self):
+        return self._samples
+
+    def point_estimate(self):
+        """
+        Returns the point estimate, the center of the ellipse.
+
+        :return: np.ndarray of shape (n_classes,)
+        """
+        # The inverse of the CLR does not coincide with the true mean, because the geometric mean
+        # requires smoothing the prevalence vectors and this affects the softmax (inverse);
+        # return self.clr.inverse(self.mean_) # <- does not coincide
+        return self.mean_
+
+    def coverage(self, true_value):
+        """
+        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
+        fraction of these that are contained in the region, if more than one value is passed. If only one value is
+        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
+
+        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
+        :return: float in [0,1]
+        """
+        transformed_values = self.transformation(true_value)
+        return self.conf_region_z.coverage(transformed_values)
+
+    def coverage_soft(self, true_value):
+        transformed_values = self.transformation(true_value)
+        return self.conf_region_z.coverage_soft(transformed_values)
+
+    def winkler_scores(self, true_prev, alpha=None, add_ae=False):
+        transformed_values = self.transformation(true_prev)
+        return self.conf_region_z.winkler_scores(transformed_values, alpha=alpha, add_ae=add_ae)
+
+    def mean_winkler_score(self, true_prev, alpha=None, add_ae=False):
+        transformed_values = self.transformation(true_prev)
+        return self.conf_region_z.mean_winkler_score(transformed_values, alpha=alpha, add_ae=add_ae)
+
+
+class ConfidenceIntervalsCLR(ConfidenceIntervalsTransformed):
+    """
+    Instantiates a Confidence Intervals in the Centered-Log Ratio (CLR) space.
+
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param confidence_level: float, the confidence level (default 0.95)
+    :param bonferroni_correction: bool (default False), if True, a Bonferroni correction
+        is applied to the significance level (`alpha`) before computing confidence intervals.
+        The correction consists of replacing `alpha` with `alpha/n_classes`. When
+        `n_classes=2` the correction is not applied because there is only one verification test
+        since the other class is constrained. This is not necessarily true for n_classes>2.
+    """
+    def __init__(self, samples, confidence_level=0.95, bonferroni_correction=False):
+        super().__init__(samples, CLRtransformation(), confidence_level=confidence_level, bonferroni_correction=bonferroni_correction)
+
+
+class ConfidenceIntervalsILR(ConfidenceIntervalsTransformed):
+    """
+    Instantiates a Confidence Intervals in the Isometric-Log Ratio (CLR) space.
+
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param confidence_level: float, the confidence level (default 0.95)
+    :param bonferroni_correction: bool (default False), if True, a Bonferroni correction
+        is applied to the significance level (`alpha`) before computing confidence intervals.
+        The correction consists of replacing `alpha` with `alpha/n_classes`. When
+        `n_classes=2` the correction is not applied because there is only one verification test
+        since the other class is constrained. This is not necessarily true for n_classes>2.
+    """
+    def __init__(self, samples, confidence_level=0.95, bonferroni_correction=False):
+        super().__init__(samples, ILRtransformation(), confidence_level=confidence_level, bonferroni_correction=bonferroni_correction)
 
 
 
@@ -611,23 +713,35 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
         self.verbose = verbose
 
     def aggregation_fit(self, classif_predictions, labels):
-        data = LabelledCollection(classif_predictions, labels, classes=self.classes_)
+
         self.quantifiers = []
         if self.n_train_samples==1:
             self.quantifier.aggregation_fit(classif_predictions, labels)
             self.quantifiers.append(self.quantifier)
         else:
-            # model-based bootstrap (only on the aggregative part)
-            n_examples = len(data)
-            full_index = np.arange(n_examples)
-            with qp.util.temp_seed(self.random_state):
-                for i in range(self.n_train_samples):
-                    quantifier = copy.deepcopy(self.quantifier)
-                    index = resample(full_index, n_samples=n_examples)
-                    classif_predictions_i = classif_predictions.sampling_from_index(index)
-                    data_i = data.sampling_from_index(index)
-                    quantifier.aggregation_fit(classif_predictions_i, data_i)
-                    self.quantifiers.append(quantifier)
+            if classif_predictions is None or labels is None:
+                # The entire dataset was consumed for classifier training, implying there is no need for training
+                # an aggregation function. If the bootstrap method was configured to train different aggregators
+                # (i.e., self.n_train_samples>1), then an error is raise. Otherwise, the method ends.
+                if self.n_train_samples > 1:
+                    raise ValueError(
+                        f'The underlying quantifier ({self.quantifier.__class__.__name__}) has consumed, all training '
+                        f'data, meaning the aggregation function needs none, but {self.n_train_samples=} is > 1, which '
+                        f'is inconsistent.'
+                    )
+            else:
+                # model-based bootstrap (only on the aggregative part)
+                data = LabelledCollection(classif_predictions, labels, classes=self.classes_)
+                n_examples = len(data)
+                full_index = np.arange(n_examples)
+                with qp.util.temp_seed(self.random_state):
+                    for i in range(self.n_train_samples):
+                        quantifier = copy.deepcopy(self.quantifier)
+                        index = resample(full_index, n_samples=n_examples)
+                        classif_predictions_i = classif_predictions.sampling_from_index(index)
+                        data_i = data.sampling_from_index(index)
+                        quantifier.aggregation_fit(classif_predictions_i, data_i)
+                        self.quantifiers.append(quantifier)
         return self
 
     def aggregate(self, classif_predictions: np.ndarray):
@@ -653,8 +767,8 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
         return prev_estim, conf
 
     def aggregate_conf(self, classif_predictions: np.ndarray, confidence_level=None):
-        if confidence_level is None:
-            confidence_level = self.confidence_level
+        confidence_level = confidence_level or self.confidence_level
+
 
         n_samples = classif_predictions.shape[0]
         prevs = []
@@ -665,11 +779,8 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
                     for i in range(self.n_test_samples)
                 )
                 prevs.extend(results)
-                # for i in tqdm(range(self.n_test_samples), desc='resampling', total=self.n_test_samples, disable=not self.verbose):
-                #     sample_i = resample(classif_predictions, n_samples=n_samples)
-                #     prev_i = quantifier.aggregate(sample_i)
-                #     prevs.append(prev_i)
 
+        prevs = np.array(prevs)
         conf = WithConfidenceABC.construct_region(prevs, confidence_level, method=self.region)
         prev_estim = conf.point_estimate()
 
@@ -741,6 +852,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
                  mcmc_seed: int = 0,
                  confidence_level: float = 0.95,
                  region: str = 'intervals',
+                 temperature = 1., 
                  prior = 'uniform'):
 
         if num_warmup <= 0:
@@ -761,6 +873,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
         self.mcmc_seed = mcmc_seed
         self.confidence_level = confidence_level
         self.region = region
+        self.temperature = temperature
         self.prior = prior
 
         # Array of shape (n_classes, n_predicted_classes,) where entry (y, c) is the number of instances
@@ -804,6 +917,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
             num_warmup=self.num_warmup,
             num_samples=self.num_samples,
             alpha=alpha,
+            temperature=self.temperature,
             seed=self.mcmc_seed,
         )
         return self._samples
diff --git a/result_table b/result_table
new file mode 160000
index 0000000..9d433e3
--- /dev/null
+++ b/result_table
@@ -0,0 +1 @@
+Subproject commit 9d433e3e35b4d111a3914a1e7d3257a8fcf24a9b