quantification over time experiment

new plots
refactor folders
2025-12-10 19:43:08 +01:00 · 2025-12-10 13:11:49 +01:00 · 2025-12-10 13:10:03 +01:00 · 2025-12-10 12:06:03 +01:00 · 2025-12-10 12:05:10 +01:00 · 2025-12-10 11:57:57 +01:00
40 changed files with 3613 additions and 210 deletions
--- a/BayesianKDEy/TODO.txt
+++ b/BayesianKDEy/TODO.txt
@ -0,0 +1,7 @@
+- Add other methods that natively provide uncertainty quantification methods?
+- MPIW (Mean Prediction Interval Width): is the average of the amplitudes (w/o aggregating coverage whatsoever)
+- Implement Interval Score or Winkler Score
+- analyze across shift
+- add Bayesian EM
+- optimize also C and class_weight?
+
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@ -0,0 +1,215 @@
+from numpy.ma.core import shape
+from sklearn.base import BaseEstimator
+import numpy as np
+
+import quapy.util
+from quapy.method._kdey import KDEBase
+from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
+from quapy.functional import CLRtransformation, ILRtransformation
+from quapy.method.aggregative import AggregativeSoftQuantifier
+from tqdm import tqdm
+import quapy.functional as F
+#import emcee
+
+
+
+class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
+    """
+    `Bayesian version of KDEy.
+
+    :param classifier: a scikit-learn's BaseEstimator, or None, in which case the classifier is taken to be
+        the one indicated in `qp.environ['DEFAULT_CLS']`
+    :param val_split:  specifies the data used for generating classifier predictions. This specification
+        can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to
+        be extracted from the training set; or as an integer (default 5), indicating that the predictions
+        are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
+        for `k`); or as a tuple `(X,y)` defining the specific set of data to use for validation. Set to
+        None when the method does not require any validation data, in order to avoid that some portion of
+        the training data be wasted.
+    :param num_warmup: number of warmup iterations for the MCMC sampler (default 500)
+    :param num_samples: number of samples to draw from the posterior (default 1000)
+    :param mcmc_seed: random seed for the MCMC sampler (default 0)
+    :param confidence_level: float in [0,1] to construct a confidence region around the point estimate (default 0.95)
+    :param region: string, set to `intervals` for constructing confidence intervals (default), or to
+        `ellipse` for constructing an ellipse in the probability simplex, or to `ellipse-clr` for
+        constructing an ellipse in the Centered-Log Ratio (CLR) unconstrained space.
+    :param verbose: bool, whether to display progress bar
+    """
+    def __init__(self,
+                 classifier: BaseEstimator=None,
+                 fit_classifier=True,
+                 val_split: int = 5,
+                 kernel='gaussian',
+                 bandwidth=0.1,
+                 num_warmup: int = 500,
+                 num_samples: int = 1_000,
+                 mcmc_seed: int = 0,
+                 confidence_level: float = 0.95,
+                 region: str = 'intervals',
+                 explore='simplex',
+                 step_size=0.05,
+                 temperature=1.,
+                 verbose: bool = False):
+
+        if num_warmup <= 0:
+            raise ValueError(f'parameter {num_warmup=} must be a positive integer')
+        if num_samples <= 0:
+            raise ValueError(f'parameter {num_samples=} must be a positive integer')
+        assert explore in ['simplex', 'clr', 'ilr'], \
+            f'unexpected value for param {explore=}; valid ones are "simplex", "clr", and "ilr"'
+        assert temperature>0., f'temperature must be >0'
+
+        super().__init__(classifier, fit_classifier, val_split)
+        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel)
+        self.kernel = self._check_kernel(kernel)
+        self.num_warmup = num_warmup
+        self.num_samples = num_samples
+        self.mcmc_seed = mcmc_seed
+        self.confidence_level = confidence_level
+        self.region = region
+        self.explore = explore
+        self.step_size = step_size
+        self.temperature = temperature
+        self.verbose = verbose
+
+    def aggregation_fit(self, classif_predictions, labels):
+        self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth, self.kernel)
+        return self
+
+    def aggregate(self, classif_predictions):
+        # self.prevalence_samples = self._bayesian_kde(classif_predictions, init=None, verbose=self.verbose)
+        self.prevalence_samples = self._bayesian_emcee(classif_predictions)
+        return self.prevalence_samples.mean(axis=0)
+
+    def predict_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
+        if confidence_level is None:
+            confidence_level = self.confidence_level
+        classif_predictions = self.classify(instances)
+        point_estimate = self.aggregate(classif_predictions)
+        samples = self.prevalence_samples  # available after calling "aggregate" function
+        region = WithConfidenceABC.construct_region(samples, confidence_level=confidence_level, method=self.region)
+        return point_estimate, region
+
+    def _bayesian_kde(self, X_probs, init=None, verbose=False):
+        """
+        Bayes:
+            P(prev|data) = P(data|prev) P(prev) / P(data)
+        i.e.,
+            posterior = likelihood * prior / evidence
+        we assume the likelihood be:
+            prev @ [kde_i_likelihood(data) 1..i..n]
+            prior be uniform in simplex
+        """
+
+        rng = np.random.default_rng(self.mcmc_seed)
+        kdes = self.mix_densities
+        test_densities = np.asarray([self.pdf(kde_i, X_probs, self.kernel) for kde_i in kdes])
+
+        def log_likelihood(prev, epsilon=1e-10):
+            test_likelihoods = prev @ test_densities
+            test_loglikelihood = np.log(test_likelihoods + epsilon)
+            return (1./self.temperature) * np.sum(test_loglikelihood)
+
+        # def log_prior(prev):
+            # todo: adapt to arbitrary prior knowledge (e.g., something around training prevalence)
+            # return 1/np.sum((prev-init)**2) # it is not 1 but we assume uniform, son anyway is an useless constant
+
+        # def log_prior(prev, alpha_scale=1000):
+        #     alpha = np.array(init) * alpha_scale
+        #     return dirichlet.logpdf(prev, alpha)
+
+        def log_prior(prev):
+            return 0
+
+        def sample_neighbour(prev, step_size):
+            # random-walk Metropolis-Hastings
+            d = len(prev)
+            neighbour = None
+            if self.explore=='simplex':
+                dir_noise = rng.normal(scale=step_size/np.sqrt(d), size=d)
+                neighbour = F.normalize_prevalence(prev + dir_noise, method='mapsimplex')
+            elif self.explore=='clr':
+                clr = CLRtransformation()
+                clr_point = clr(prev)
+                dir_noise = rng.normal(scale=step_size, size=d)
+                clr_neighbour = clr_point+dir_noise
+                neighbour = clr.inverse(clr_neighbour)
+                assert in_simplex(neighbour), 'wrong CLR transformation'
+            elif self.explore=='ilr':
+                ilr = ILRtransformation()
+                ilr_point = ilr(prev)
+                dir_noise = rng.normal(scale=step_size, size=d-1)
+                ilr_neighbour = ilr_point + dir_noise
+                neighbour = ilr.inverse(ilr_neighbour)
+                assert in_simplex(neighbour), 'wrong ILR transformation'
+            return neighbour
+
+        n_classes = X_probs.shape[1]
+        current_prev = F.uniform_prevalence(n_classes) if init is None else init
+        current_likelihood = log_likelihood(current_prev) + log_prior(current_prev)
+
+        # Metropolis-Hastings with adaptive rate
+        step_size = self.step_size
+        target_acceptance = 0.3
+        adapt_rate = 0.05
+        acceptance_history = []
+
+        samples = []
+        total_steps = self.num_samples + self.num_warmup
+        for i in tqdm(range(total_steps), total=total_steps, disable=not verbose):
+            proposed_prev = sample_neighbour(current_prev, step_size)
+
+            # probability of acceptance
+            proposed_likelihood = log_likelihood(proposed_prev) + log_prior(proposed_prev)
+            acceptance = proposed_likelihood - current_likelihood
+
+            # decide acceptance
+            accepted = np.log(rng.random()) < acceptance
+            if accepted:
+                current_prev = proposed_prev
+                current_likelihood = proposed_likelihood
+
+            samples.append(current_prev)
+            acceptance_history.append(1. if accepted else 0.)
+
+            # if i < self.num_warmup and i%10==0 and len(acceptance_history)>=100:
+            if i % 10 == 0 and len(acceptance_history) >= 100:
+                recent_accept_rate = np.mean(acceptance_history[-100:])
+                step_size *= np.exp(adapt_rate * (recent_accept_rate - target_acceptance))
+                # step_size = float(np.clip(step_size, min_step, max_step))
+                if i %100==0:
+                    print(f'acceptance-rate={recent_accept_rate*100:.3f}%, step-size={step_size:.5f}')
+
+        # remove "warmup" initial iterations
+        samples = np.asarray(samples[self.num_warmup:])
+        return samples
+
+    def _bayesian_emcee(self, X_probs):
+        ndim = X_probs.shape[1]
+        nwalkers = 32
+
+        f = CLRtransformation()
+
+        def log_likelihood(unconstrained, test_densities, epsilon=1e-10):
+            prev = f.inverse(unconstrained)
+            test_likelihoods = prev @ test_densities
+            test_loglikelihood = np.log(test_likelihoods + epsilon)
+            return np.sum(test_loglikelihood)
+
+        kdes = self.mix_densities
+        test_densities = np.asarray([self.pdf(kde_i, X_probs, self.kernel) for kde_i in kdes])
+
+        # p0 = np.random.normal(nwalkers, ndim)
+        p0 = F.uniform_prevalence_sampling(ndim, nwalkers)
+        p0 = f(p0)
+        sampler = emcee.EnsembleSampler(nwalkers, ndim, log_likelihood, args=[test_densities])
+
+        state = sampler.run_mcmc(p0, self.num_warmup, skip_initial_state_check=True)
+        sampler.reset()
+        sampler.run_mcmc(state, self.num_samples, skip_initial_state_check=True)
+        samples = sampler.get_chain(flat=True)
+        samples = f.inverse(samples)
+        return samples
+
+def in_simplex(x):
+    return np.all(x >= 0) and np.isclose(x.sum(), 1)
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@ -0,0 +1,196 @@
+import os
+import warnings
+from os.path import join
+from pathlib import Path
+
+from sklearn.calibration import CalibratedClassifierCV
+from sklearn.linear_model import LogisticRegression as LR
+from sklearn.model_selection import GridSearchCV, StratifiedKFold
+from copy import deepcopy as cp
+import quapy as qp
+from BayesianKDEy._bayeisan_kdey import BayesianKDEy
+from build.lib.quapy.data import LabelledCollection
+from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ
+from quapy.method.base import BinaryQuantifier, BaseQuantifier
+from quapy.model_selection import GridSearchQ
+from quapy.data import Dataset
+# from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
+from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ, WithConfidenceABC, AggregativeBootstrap
+from quapy.functional import strprev
+from quapy.method.aggregative import KDEyML, ACC
+from quapy.protocol import UPP
+import quapy.functional as F
+import numpy as np
+from tqdm import tqdm
+from scipy.stats import dirichlet
+from collections import defaultdict
+from time import time
+from sklearn.base import clone, BaseEstimator
+
+
+class KDEyCLR(KDEyML):
+    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
+        super().__init__(
+            classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
+            random_state=random_state, kernel='aitchison'
+        )
+
+
+class KDEyILR(KDEyML):
+    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
+        super().__init__(
+            classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
+            random_state=random_state, kernel='ilr'
+        )
+
+
+def methods():
+    """
+    Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
+    - name: is a str representing the name of the method (e.g., 'BayesianKDEy')
+    - quantifier: is the base model (e.g., KDEyML())
+    - hyperparams: is a dictionary for the quantifier (e.g., {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]})
+    - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
+        quantifier with optimized hyperparameters
+    """
+    acc_hyper = {}
+    emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs']}
+    hdy_hyper = {'nbins': [3,4,5,8,16,32]}
+    kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
+    kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
+
+    multiclass_method = 'multiclass'
+    only_binary = 'only_binary'
+    only_multiclass = 'only_multiclass'
+
+    # yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
+    # yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0), multiclass_method
+
+    yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method
+
+    # yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
+    # yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary
+    #
+    # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
+    # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
+    # yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
+    # yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
+    # yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method
+    # yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), only_multiclass
+    # yield 'BayKDEy*ILR2', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, explore='ilr', step_size=.1, **hyper), only_multiclass
+
+
+def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
+    with qp.util.temp_seed(0):
+        print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
+        # model selection
+        if len(grid)>0:
+            train, val  = train.split_stratified(train_prop=0.6, random_state=0)
+            mod_sel = GridSearchQ(
+                model=point_quantifier,
+                param_grid=grid,
+                protocol=qp.protocol.UPP(val, repeats=250, random_state=0),
+                refit=False,
+                n_jobs=-1,
+                verbose=True
+            ).fit(*train.Xy)
+            best_params = mod_sel.best_params_
+        else:
+            best_params = {}
+
+        return best_params
+
+
+def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method_name:str, grid: dict, withconf_constructor, hyper_choice_path: Path):
+    with qp.util.temp_seed(0):
+
+        training, test = dataset.train_test
+
+        # model selection
+        best_hyperparams = qp.util.pickled_resource(
+            hyper_choice_path, model_selection, training, cp(point_quantifier), grid
+        )
+
+        t_init = time()
+        withconf_quantifier = withconf_constructor(best_hyperparams).fit(*training.Xy)
+        tr_time = time() - t_init
+
+        # test
+        train_prevalence = training.prevalence()
+        results = defaultdict(list)
+        test_generator = UPP(test, repeats=100, random_state=0)
+        for i, (sample_X, true_prevalence) in tqdm(enumerate(test_generator()), total=test_generator.total(), desc=f'{method_name} predictions'):
+            t_init = time()
+            point_estimate, region = withconf_quantifier.predict_conf(sample_X)
+            ttime = time()-t_init
+            results['true-prevs'].append(true_prevalence)
+            results['point-estim'].append(point_estimate)
+            results['shift'].append(qp.error.ae(true_prevalence, train_prevalence))
+            results['ae'].append(qp.error.ae(prevs_true=true_prevalence, prevs_hat=point_estimate))
+            results['rae'].append(qp.error.rae(prevs_true=true_prevalence, prevs_hat=point_estimate))
+            results['coverage'].append(region.coverage(true_prevalence))
+            results['amplitude'].append(region.montecarlo_proportion(n_trials=50_000))
+            results['test-time'].append(ttime)
+            results['samples'].append(region.samples)
+
+        report = {
+            'optim_hyper': best_hyperparams,
+            'train_time': tr_time,
+            'train-prev': train_prevalence,
+            'results': {k:np.asarray(v) for k,v in results.items()}
+        }
+
+        return report
+
+
+def experiment_path(dir:Path, dataset_name:str, method_name:str):
+    os.makedirs(dir, exist_ok=True)
+    return dir/f'{dataset_name}__{method_name}.pkl'
+
+
+if __name__ == '__main__':
+
+    binary = {
+        'datasets': qp.datasets.UCI_BINARY_DATASETS,
+        'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
+        'sample_size': 500
+    }
+
+    multiclass = {
+        'datasets': qp.datasets.UCI_MULTICLASS_DATASETS,
+        'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
+        'sample_size': 1000
+    }
+
+    result_dir = Path('./results')
+
+    for setup in [binary, multiclass]: # [binary, multiclass]:
+        qp.environ['SAMPLE_SIZE'] = setup['sample_size']
+        for data_name in setup['datasets']:
+            print(f'dataset={data_name}')
+            # if data_name=='breast-cancer' or data_name.startswith("cmc") or data_name.startswith("ctg"):
+            #     print(f'skipping dataset: {data_name}')
+            #     continue
+            data = setup['fetch_fn'](data_name)
+            is_binary = data.n_classes==2
+            result_subdir = result_dir / ('binary' if is_binary else 'multiclass')
+            hyper_subdir  = result_dir / 'hyperparams' / ('binary' if is_binary else 'multiclass')
+            for method_name, method, hyper_params, withconf_constructor, method_scope in methods():
+                if method_scope == 'only_binary' and not is_binary:
+                    continue
+                if method_scope == 'only_multiclass' and is_binary:
+                    continue
+                result_path = experiment_path(result_subdir, data_name, method_name)
+                hyper_path  = experiment_path(hyper_subdir, data_name, method.__class__.__name__)
+                report = qp.util.pickled_resource(
+                    result_path, experiment, data, method, method_name, hyper_params, withconf_constructor, hyper_path
+                )
+                print(f'dataset={data_name}, '
+                      f'method={method_name}: '
+                      f'mae={report["results"]["ae"].mean():.3f}, '
+                      f'coverage={report["results"]["coverage"].mean():.5f}, '
+                      f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
+
+
+
+
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@ -0,0 +1,170 @@
+import pickle
+from collections import defaultdict
+
+from joblib import Parallel, delayed
+from tqdm import tqdm
+import pandas as pd
+from glob import glob
+from pathlib import Path
+import quapy as qp
+from error import dist_aitchison
+from quapy.method.confidence import ConfidenceIntervals
+from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
+
+pd.set_option('display.max_columns', None)
+pd.set_option('display.width', 2000)
+pd.set_option('display.max_rows', None)
+pd.set_option("display.expand_frame_repr", False)
+pd.set_option("display.precision", 4)
+pd.set_option("display.float_format", "{:.4f}".format)
+
+
+def region_score(true_prev, region: ConfidenceRegionABC):
+    amp = region.montecarlo_proportion(50_000)
+    if true_prev in region:
+        cost = 0
+    else:
+        scale_cost = 1/region.alpha
+        cost = scale_cost * dist_aitchison(true_prev, region.closest_point_in_region(true_prev))
+    return amp + cost
+
+
+
+def compute_coverage_amplitude(region_constructor, **kwargs):
+    all_samples = results['samples']
+    all_true_prevs = results['true-prevs']
+
+    def process_one(samples, true_prevs):
+        region = region_constructor(samples, **kwargs)
+        if isinstance(region, ConfidenceIntervals):
+            winkler = region.mean_winkler_score(true_prevs)
+        else:
+            winkler = None
+        return region.coverage(true_prevs), region.montecarlo_proportion(), winkler
+
+    out = Parallel(n_jobs=3)(
+        delayed(process_one)(samples, true_prevs)
+        for samples, true_prevs in tqdm(
+            zip(all_samples, all_true_prevs),
+            total=len(all_samples),
+            desc='constructing ellipses'
+        )
+    )
+
+    # unzip results
+    coverage, amplitude, winkler = zip(*out)
+    return list(coverage), list(amplitude), list(winkler)
+
+
+def update_pickle(report, pickle_path, updated_dict:dict):
+    for k,v in updated_dict.items():
+        report[k]=v
+    pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
+
+
+def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwargs):
+    if f'coverage-{conf_name}' not in report:
+        covs, amps, winkler = compute_coverage_amplitude(conf_region_class, **kwargs)
+
+        update_fields = {
+            f'coverage-{conf_name}': covs,
+            f'amplitude-{conf_name}': amps,
+            f'winkler-{conf_name}': winkler
+        }
+
+        update_pickle(report, file, update_fields)
+
+methods = None  # show all methods
+# methods = ['BayesianACC', 'BayesianKDEy']
+
+for setup in ['multiclass']:
+    path = f'./results/{setup}/*.pkl'
+    table = defaultdict(list)
+    for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
+        file = Path(file)
+        dataset, method = file.name.replace('.pkl', '').split('__')
+        if methods is not None and method not in methods:
+            continue
+        report = pickle.load(open(file, 'rb'))
+        results = report['results']
+        n_samples = len(results['ae'])
+        table['method'].extend([method.replace('Bayesian','Ba').replace('Bootstrap', 'Bo')] * n_samples)
+        table['dataset'].extend([dataset] * n_samples)
+        table['ae'].extend(results['ae'])
+        table['rae'].extend(results['rae'])
+        # table['c-CI'].extend(results['coverage'])
+        # table['a-CI'].extend(results['amplitude'])
+
+        update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True)
+        update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
+        update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
+        update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
+
+        table['c-CI'].extend(report['coverage-CI'])
+        table['a-CI'].extend(report['amplitude-CI'])
+        table['w-CI'].extend(report['winkler-CI'])
+
+        table['c-CE'].extend(report['coverage-CE'])
+        table['a-CE'].extend(report['amplitude-CE'])
+
+        table['c-CLR'].extend(report['coverage-CLR'])
+        table['a-CLR'].extend(report['amplitude-CLR'])
+
+        table['c-ILR'].extend(report['coverage-ILR'])
+        table['a-ILR'].extend(report['amplitude-ILR'])
+
+        table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim']))
+        # table['aitch-well'].extend(qp.error.dist_aitchison(results['true-prevs'], [ConfidenceEllipseILR(samples).mean_ for samples in results['samples']]))
+        # table['aitch'].extend()
+        table['reg-score-ILR'].extend(
+            [region_score(true_prev, ConfidenceEllipseILR(samples)) for true_prev, samples in zip(results['true-prevs'], results['samples'])]
+        )
+
+
+
+    df = pd.DataFrame(table)
+
+    n_classes = {}
+    tr_size   = {}
+    for dataset in df['dataset'].unique():
+        fetch_fn = {
+            'binary': qp.datasets.fetch_UCIBinaryDataset,
+            'multiclass': qp.datasets.fetch_UCIMulticlassDataset
+        }[setup]
+        data = fetch_fn(dataset)
+        n_classes[dataset] = data.n_classes
+        tr_size[dataset] = len(data.training)
+
+    # remove datasets with more than max_classes classes
+    # max_classes = 30
+    # min_train   = 1000
+    # for data_name, n in n_classes.items():
+    #     if n > max_classes:
+    #         df = df[df["dataset"] != data_name]
+    # for data_name, n in tr_size.items():
+    #     if n < min_train:
+    #         df = df[df["dataset"] != data_name]
+
+    for region in ['ILR']: # , 'CI', 'CE', 'CLR', 'ILR']:
+        if setup == 'binary' and region=='ILR':
+            continue
+        # pv = pd.pivot_table(
+        #     df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
+        # )
+        pv = pd.pivot_table(
+            df, index='dataset', columns='method', values=[
+                #f'w-{region}',
+                # 'ae',
+                # 'rae',
+                # f'aitch',
+                # f'aitch-well'
+                'reg-score-ILR',
+            ], margins=True
+        )
+        pv['n_classes'] = pv.index.map(n_classes).astype('Int64')
+        pv['tr_size'] = pv.index.map(tr_size).astype('Int64')
+        pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"])
+        print(f'{setup=}')
+        print(pv)
+        print('-'*80)
+
--- a/BayesianKDEy/plot_simplex.py
+++ b/BayesianKDEy/plot_simplex.py
@ -0,0 +1,258 @@
+import os
+import pickle
+from pathlib import Path
+
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.colors import ListedColormap
+from scipy.stats import gaussian_kde
+
+from method.confidence import (ConfidenceIntervals as CI,
+                               ConfidenceEllipseSimplex as CE,
+                               ConfidenceEllipseCLR as CLR,
+                               ConfidenceEllipseILR as ILR)
+
+
+
+def get_region_colormap(name="blue", alpha=0.40):
+    name = name.lower()
+    if name == "blue":
+        base = (76/255, 114/255, 176/255)
+    elif name == "orange":
+        base = (221/255, 132/255, 82/255)
+    elif name == "violet":
+        base = (129/255, 114/255, 178/255)
+    else:
+        raise ValueError(f"Unknown palette name: {name}")
+
+    cmap = ListedColormap([
+        (1, 1, 1, 0),              # 0: transparent white
+        (base[0], base[1], base[2], alpha)  # 1: color
+    ])
+
+    return cmap
+
+
+def plot_prev_points(samples=None,
+                     show_samples=True,
+                     true_prev=None,
+                     point_estim=None, train_prev=None, show_mean=True, show_legend=True,
+                     region=None,
+                     region_resolution=1000,
+                     confine_region_in_simplex=False,
+                     color='blue',
+                     save_path=None):
+
+    plt.rcParams.update({
+        'font.size': 10,  # tamaño base de todo el texto
+        'axes.titlesize': 12,  # título del eje
+        'axes.labelsize': 10,  # etiquetas de ejes
+        'xtick.labelsize': 8,  # etiquetas de ticks
+        'ytick.labelsize': 8,
+        'legend.fontsize': 9,  # leyenda
+    })
+
+    def cartesian(p):
+        dim = p.shape[-1]
+        p = p.reshape(-1,dim)
+        x = p[:, 1] + p[:, 2] * 0.5
+        y = p[:, 2] * np.sqrt(3) / 2
+        return x, y
+
+    def barycentric_from_xy(x, y):
+        """
+        Given cartesian (x,y) in simplex returns baricentric coordinates (p1,p2,p3).
+        """
+        p3 = 2 * y / np.sqrt(3)
+        p2 = x - 0.5 * p3
+        p1 = 1 - p2 - p3
+        return np.stack([p1, p2, p3], axis=-1)
+
+    # simplex coordinates
+    v1 = np.array([0, 0])
+    v2 = np.array([1, 0])
+    v3 = np.array([0.5, np.sqrt(3)/2])
+
+    # Plot
+    fig, ax = plt.subplots(figsize=(6, 6))
+
+    if region is not None:
+        if callable(region):
+            region_list = [("region", region)]
+        else:
+            region_list = region  # lista de (name, fn)
+
+    if region is not None:
+        # rectangular mesh
+        x_min, x_max = -0.2, 1.2
+        y_min, y_max = -0.2, np.sqrt(3) / 2 + 0.2
+
+        xs = np.linspace(x_min, x_max, region_resolution)
+        ys = np.linspace(y_min, y_max, region_resolution)
+        grid_x, grid_y = np.meshgrid(xs, ys)
+
+        # barycentric
+        pts_bary = barycentric_from_xy(grid_x, grid_y)
+
+        # mask within simplex
+        if confine_region_in_simplex:
+            in_simplex = np.all(pts_bary >= 0, axis=-1)
+        else:
+            in_simplex = np.full(shape=(region_resolution, region_resolution), fill_value=True, dtype=bool)
+
+        # --- Colormap 0 → blanco, 1 → rojo semitransparente ---
+
+        # iterar sobre todas las regiones
+        for (rname, rfun) in region_list:
+            mask = np.zeros_like(in_simplex, dtype=float)
+            valid_pts = pts_bary[in_simplex]
+            mask_vals = np.array([float(rfun(p)) for p in valid_pts])
+            mask[in_simplex] = mask_vals
+
+            ax.pcolormesh(
+                xs, ys, mask,
+                shading='auto',
+                cmap=get_region_colormap(color),
+                alpha=0.3,
+            )
+
+    if samples is not None:
+        if show_samples:
+            ax.scatter(*cartesian(samples), s=15, alpha=0.5, edgecolors='none', label='samples', color='black', linewidth=0.5)
+    if show_mean is not None:
+        if isinstance(show_mean, np.ndarray):
+            ax.scatter(*cartesian(show_mean), s=10, alpha=1, label='sample-mean', edgecolors='black')
+        elif show_mean==True and samples is not None:
+            ax.scatter(*cartesian(samples.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black')
+        else:
+            raise ValueError(f'show_mean should either be a boolean (if True, then samples must be provided) or '
+                             f'the mean point itself')
+    if train_prev is not None:
+        ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black')
+    if point_estim is not None:
+        ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black')
+    if train_prev is not None:
+        ax.scatter(*cartesian(train_prev), s=10, alpha=1, label='train-prev', edgecolors='black')
+
+    # edges
+    triangle = np.array([v1, v2, v3, v1])
+    ax.plot(triangle[:, 0], triangle[:, 1], color='black')
+
+    # vertex labels
+    ax.text(-0.05, -0.05, "Y=1", ha='right', va='top')
+    ax.text(1.05, -0.05, "Y=2", ha='left', va='top')
+    ax.text(0.5, np.sqrt(3)/2 + 0.05, "Y=3", ha='center', va='bottom')
+
+    ax.set_aspect('equal')
+    ax.axis('off')
+    if show_legend:
+        plt.legend(
+            loc='center left',
+            bbox_to_anchor=(1.05, 0.5),
+        )
+    plt.tight_layout()
+    if save_path is None:
+        plt.show()
+    else:
+        os.makedirs(Path(save_path).parent, exist_ok=True)
+        plt.savefig(save_path)
+
+
+def plot_prev_points_matplot(points):
+
+    # project 2D
+    v1 = np.array([0, 0])
+    v2 = np.array([1, 0])
+    v3 = np.array([0.5, np.sqrt(3) / 2])
+    x = points[:, 1] + points[:, 2] * 0.5
+    y = points[:, 2] * np.sqrt(3) / 2
+
+    # kde
+    xy = np.vstack([x, y])
+    kde = gaussian_kde(xy, bw_method=0.25)
+    xmin, xmax = 0, 1
+    ymin, ymax = 0, np.sqrt(3) / 2
+
+    # grid
+    xx, yy = np.mgrid[xmin:xmax:200j, ymin:ymax:200j]
+    positions = np.vstack([xx.ravel(), yy.ravel()])
+    zz = np.reshape(kde(positions).T, xx.shape)
+
+    # mask points in simplex
+    def in_triangle(x, y):
+        return (y >= 0) & (y <= np.sqrt(3) * np.minimum(x, 1 - x))
+
+    mask = in_triangle(xx, yy)
+    zz_masked = np.ma.array(zz, mask=~mask)
+
+    # plot
+    fig, ax = plt.subplots(figsize=(6, 6))
+    ax.imshow(
+        np.rot90(zz_masked),
+        cmap=plt.cm.viridis,
+        extent=[xmin, xmax, ymin, ymax],
+        alpha=0.8,
+    )
+
+    # Bordes del triángulo
+    triangle = np.array([v1, v2, v3, v1])
+    ax.plot(triangle[:, 0], triangle[:, 1], color='black', lw=2)
+
+    # Puntos (opcional)
+    ax.scatter(x, y, s=5, c='white', alpha=0.3)
+
+    # Etiquetas
+    ax.text(-0.05, -0.05, "A (1,0,0)", ha='right', va='top')
+    ax.text(1.05, -0.05, "B (0,1,0)", ha='left', va='top')
+    ax.text(0.5, np.sqrt(3) / 2 + 0.05, "C (0,0,1)", ha='center', va='bottom')
+
+    ax.set_aspect('equal')
+    ax.axis('off')
+    plt.show()
+
+if __name__ == '__main__':
+    np.random.seed(1)
+
+    n = 1000
+    # alpha = [3,5,10]
+    alpha = [10,1,1]
+    prevs = np.random.dirichlet(alpha, size=n)
+
+    def regions():
+        confs = [0.99, 0.95, 0.90]
+        yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
+        # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
+
+    # resolution = 1000
+    # alpha_str = ','.join([f'{str(i)}' for i in alpha])
+    # for crname, cr in regions():
+    #     plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr, region_resolution=resolution,
+    #                      color='blue',
+    #                      save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png',
+    #                      )
+
+
+    def regions():
+        confs = [0.99, 0.95, 0.90]
+        yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
+        # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
+        # yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
+
+    resolution = 1000
+    alpha_str = ','.join([f'{str(i)}' for i in alpha])
+    region  = ILR(prevs, confidence_level=.99)
+    p = np.asarray([0.1, 0.8, 0.1])
+    plot_prev_points(prevs, show_samples=False,
+                     show_mean=region.mean_,
+                     # show_mean=prevs.mean(axis=0),
+                     show_legend=False, region=[('', region.coverage)], region_resolution=resolution,
+                     color='blue',
+                     true_prev=p,
+                     train_prev=region.closest_point_in_region(p),
+                     save_path=f'./plots3/simplex_ilr.png',
+                     )
--- a/BayesianKDEy/quick_experiment_bayesian_kdey.py
+++ b/BayesianKDEy/quick_experiment_bayesian_kdey.py
@ -0,0 +1,56 @@
+import warnings
+
+from sklearn.linear_model import LogisticRegression
+import quapy as qp
+from BayesianKDEy._bayeisan_kdey import BayesianKDEy
+from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
+from method.confidence import ConfidenceIntervals
+from quapy.functional import strprev
+from quapy.method.aggregative import KDEyML
+from quapy.protocol import UPP
+import quapy.functional as F
+import numpy as np
+from tqdm import tqdm
+from scipy.stats import dirichlet
+
+
+
+
+if __name__ == '__main__':
+    qp.environ["SAMPLE_SIZE"] = 500
+    cls = LogisticRegression()
+    bayes_kdey = BayesianKDEy(cls, bandwidth=.3, kernel='aitchison', mcmc_seed=0)
+
+    datasets = qp.datasets.UCI_BINARY_DATASETS
+    train, test = qp.datasets.fetch_UCIBinaryDataset(datasets[0]).train_test
+
+    # train, test = qp.datasets.fetch_UCIMulticlassDataset('academic-success', standardize=True).train_test
+
+    with qp.util.temp_seed(0):
+        print('fitting KDEy')
+        bayes_kdey.fit(*train.Xy)
+
+        shifted = test.sampling(500, *[0.2, 0.8])
+        # shifted = test.sampling(500, *test.prevalence()[::-1])
+        # shifted = test.sampling(500, *F.uniform_prevalence_sampling(train.n_classes))
+        prev_hat = bayes_kdey.predict(shifted.X)
+        mae = qp.error.mae(shifted.prevalence(), prev_hat)
+        print(f'true_prev={strprev(shifted.prevalence())}')
+        print(f'prev_hat={strprev(prev_hat)}, {mae=:.4f}')
+
+        prev_hat, conf_interval = bayes_kdey.predict_conf(shifted.X)
+
+        mae = qp.error.mae(shifted.prevalence(), prev_hat)
+        print(f'mean posterior {strprev(prev_hat)}, {mae=:.4f}')
+        print(f'CI={conf_interval}')
+        print(f'\tcontains true={conf_interval.coverage(true_value=shifted.prevalence())==1}')
+        print(f'\tamplitude={conf_interval.montecarlo_proportion(50_000)*100.:.3f}%')
+
+        if train.n_classes == 3:
+            plot_prev_points(bayes_kdey.prevalence_samples, true_prev=shifted.prevalence(), point_estim=prev_hat, train_prev=train.prevalence())
+            # plot_prev_points_matplot(samples)
+
+        # report = qp.evaluation.evaluation_report(kdey, protocol=UPP(test), verbose=True)
+        # print(report.mean(numeric_only=True))
+
+
--- a/BayesianKDEy/single_experiment_debug.py
+++ b/BayesianKDEy/single_experiment_debug.py
@ -0,0 +1,91 @@
+import os
+import warnings
+from os.path import join
+from pathlib import Path
+
+from sklearn.calibration import CalibratedClassifierCV
+from sklearn.linear_model import LogisticRegression as LR
+from sklearn.model_selection import GridSearchCV, StratifiedKFold
+from copy import deepcopy as cp
+import quapy as qp
+from BayesianKDEy._bayeisan_kdey import BayesianKDEy
+from BayesianKDEy.full_experiments import experiment, experiment_path, KDEyCLR
+from build.lib.quapy.data import LabelledCollection
+from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier
+from quapy.method.base import BinaryQuantifier, BaseQuantifier
+from quapy.model_selection import GridSearchQ
+from quapy.data import Dataset
+# from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
+from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ, WithConfidenceABC, AggregativeBootstrap
+from quapy.functional import strprev
+from quapy.method.aggregative import KDEyML, ACC
+from quapy.protocol import UPP
+import quapy.functional as F
+import numpy as np
+from tqdm import tqdm
+from scipy.stats import dirichlet
+from collections import defaultdict
+from time import time
+from sklearn.base import clone, BaseEstimator
+
+
+def methods():
+    """
+    Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
+    - name: is a str representing the name of the method (e.g., 'BayesianKDEy')
+    - quantifier: is the base model (e.g., KDEyML())
+    - hyperparams: is a dictionary for the quantifier (e.g., {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]})
+    - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
+        quantifier with optimized hyperparameters
+    """
+    acc_hyper = {}
+    hdy_hyper = {'nbins': [3,4,5,8,16,32]}
+    kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
+    kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
+
+    wrap_hyper = lambda dic: {f'quantifier__{k}':v for k,v in dic.items()}
+
+    # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
+    # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
+    # for T in [10, 20, 50, 100., 500]:
+    #     yield f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', explore='ilr', mcmc_seed=0, temperature=T, num_warmup=3000, num_samples=1000, step_size=.1, **hyper),
+
+    yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, **hyper),
+
+
+
+if __name__ == '__main__':
+
+    binary = {
+        'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
+        'sample_size': 500
+    }
+
+    multiclass = {
+        'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
+        'sample_size': 1000
+    }
+
+    setup = multiclass
+    # data_name = 'isolet'
+    # data_name = 'cmc'
+    data_name = 'abalone'
+
+    qp.environ['SAMPLE_SIZE'] = setup['sample_size']
+    print(f'dataset={data_name}')
+    data = setup['fetch_fn'](data_name)
+    is_binary = data.n_classes==2
+    hyper_subdir  = Path('./results') / 'hyperparams' / ('binary' if is_binary else 'multiclass')
+    for method_name, method, hyper_params, withconf_constructor in methods():
+        hyper_path = experiment_path(hyper_subdir, data_name, method.__class__.__name__)
+        report = experiment(data, method, method_name, hyper_params, withconf_constructor, hyper_path)
+
+        print(f'dataset={data_name}, '
+              f'method={method_name}: '
+              f'mae={report["results"]["ae"].mean():.3f}, '
+              f'coverage={report["results"]["coverage"].mean():.5f}, '
+              f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
+
+
+
+
--- a/CHANGE_LOG.txt
+++ b/CHANGE_LOG.txt
@ -1,7 +1,16 @@
-Change Log 0.2.0
+Change Log 0.2.1
 -----------------

-CLEAN TODO-FILE
+- Added squared ratio error.
+- Improved efficiency of confidence regions coverage functions
+- Added Precise Quantifier to WithConfidence methods (a Bayesian adaptation of HDy)
+- Improved documentation of confidence regions.
+- Added ReadMe method by Daniel Hopkins and Gary King
+- Internal index in LabelledCollection is now "lazy", and is only constructed if required.
+- I have added dist_aitchison and mean_dist_aitchison as a new error evaluation metric
+
+Change Log 0.2.0
+-----------------

 - Base code Refactor:
    - Removing coupling between LabelledCollection and quantification methods; the fit interface changes:
--- a/IEEEProc2025_plots/over_time_experiment.py
+++ b/IEEEProc2025_plots/over_time_experiment.py
@ -0,0 +1,300 @@
+import os
+from collections import defaultdict
+from typing import List, Dict
+
+import matplotlib.pyplot as plt
+import kagglehub
+import pandas as pd
+from pathlib import Path
+import numpy as np
+from qunfold import KMM
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.decomposition import TruncatedSVD
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.linear_model import LogisticRegression as LR, LogisticRegressionCV
+from tqdm import tqdm
+import quapy as qp
+from data import LabelledCollection, Dataset
+import quapy.functional as F
+from method.composable import QUnfoldWrapper
+from quapy.method.aggregative import DistributionMatchingY, EMQ, KDEyML
+from quapy.method.non_aggregative import DistributionMatchingX
+from quapy.method.aggregative import CC, ACC, HDy
+from transformers import pipeline
+
+
+pd.set_option('display.max_columns', None)
+pd.set_option('display.width', 2000)
+pd.set_option('display.max_rows', None)
+pd.set_option("display.expand_frame_repr", False)
+pd.set_option("display.precision", 4)
+pd.set_option("display.float_format", "{:.4f}".format)
+
+
+def prepare_xy_date_blocks(df, freq="M"):
+    """
+    df: DataFrame con columnas 'text', 'airline_sentiment', 'tweet_created'
+    freq: frecuencia de los bloques temporales ('D', 'W', 'M', etc.)
+
+    Devuelve:
+        X: lista de textos
+        y: np.ndarray de etiquetas
+        date: lista de índices enteros por bloque temporal
+        idx2date: lista con los límites temporales de cada bloque (tuplas)
+    """
+
+    df["tweet_created"] = pd.to_datetime(df["tweet_created"], errors="coerce")
+    df = df.sort_values("tweet_created").reset_index(drop=True)
+
+    X = df["text"].astype(str).values
+    y = df["airline_sentiment"].values
+
+    # group dates by requested frequency
+    date_groups = df["tweet_created"].dt.to_period(freq)
+
+    # assigns index to date blocks
+    unique_periods = date_groups.unique()
+    period_to_idx = {p: i for i, p in enumerate(unique_periods)}
+
+    date = np.asarray([period_to_idx[p] for p in date_groups])
+
+    # get true limits of period intervals
+    idx2date = []
+    for p in unique_periods:
+        start = p.start_time
+        end = p.end_time
+        idx2date.append((start, end))
+
+    return X, y, date, idx2date
+
+
+def prepare_labelled_collections():
+    # loads and prepares the Twitter US Arlines Sentiment dataset (from Kaggle)
+    # returns a labelled collection for the training data (day 0 and 1), and a list of the
+    # test sets (days 2 to 8) and the time limits for each test period
+    # The dataset is originally ternary (negative, neutral, positive), but we binarize it discarding neutral
+
+    # Download latest version
+    path = kagglehub.dataset_download("crowdflower/twitter-airline-sentiment")
+    df = pd.read_csv(Path(path) / 'Tweets.csv')
+    X, y, date, idx2date = prepare_xy_date_blocks(df, freq="D")
+
+    # binarize
+
+    keep_idx = (y!='neutral')
+    X = X[keep_idx]
+    y = y[keep_idx]
+    date = date[keep_idx]
+    y[y != 'negative'] = 1
+    y[y == 'negative'] = 0
+    y = y.astype(int)
+
+    # use day 0 for training, the rest for test
+    X_train, y_train = X[date<=1], y[date<=1]
+    train = LabelledCollection(X_train, y_train)
+    print(f'training has {len(train)} docs and prevalence={F.strprev(train.prevalence())} classes={train.classes}')
+
+    tests = []
+    test_init = []
+    for date_i in range(2, max(date)+1):
+        X_test_i, y_test_i = X[date==date_i], y[date==date_i]
+        test_i = LabelledCollection(X_test_i, y_test_i, classes=train.classes)
+        print(f'test-{date_i} has {len(test_i)} docs and prevalence={F.strprev(test_i.prevalence())}')
+        tests.append(test_i)
+        test_init.append(idx2date[date_i])
+
+    return train, tests, test_init
+
+
+from scipy.interpolate import CubicSpline
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def smooth_curve(dates, values, num_points=300):
+    """
+    dates: list of timestamps
+    values: list of Y-values
+    num_points: number of points in the smooth curve
+
+    Returns new_x, new_y for plotting a smooth line.
+    """
+    # Convert datetime to numeric (matplotlib float representation)
+    x = [d.timestamp() for d in dates]
+    x = np.array(x)
+    y = np.array(values)
+
+    # Create new X-axis with more points
+    x_new = np.linspace(x.min(), x.max(), num_points)
+
+    # Smooth spline
+    spline = CubicSpline(x, y)
+    y_new = spline(x_new)
+
+    # Convert numeric x_new back to datetime
+    dates_new = [pd.to_datetime(t, unit='s') for t in x_new]
+
+    return dates_new, y_new
+
+
+def plot_prevalences(results_dict, target_class=1, target_label='positive', savepath=None):
+    """
+    Plot prevalence estimates over time for each method contained in results_dict.
+
+    Parameters
+    ----------
+    results_dict : dict
+        A dictionary where:
+            - "date-start" : list of datetime-like objects
+            - all other keys : list of prevalence vectors (arrays), e.g. [p_pos, p_neg]
+              Only the first component (p_pos) will be plotted.
+    """
+    dates = results_dict["date-start"]
+
+    # Create figure
+    plt.figure(figsize=(20, 10))
+
+    # Plot one line per method (except "date-start")
+    for method, values in results_dict.items():
+        if method == "date-start":
+            continue
+
+        # Extract first component from each prevalence array
+        target_component = [v[target_class]*100 for v in values]
+
+        dates_smooth, y_smooth = smooth_curve(dates, target_component)
+
+        if method=='true-prev':
+            line,=plt.plot(dates_smooth, y_smooth, label=method, linewidth=3, linestyle='-')
+        else:
+            line,=plt.plot(dates_smooth, y_smooth, label=method, linewidth=2, linestyle='--')
+        plt.plot(dates, target_component, 'o', markersize=10, color=line.get_color())
+
+    # Axis labels
+    # plt.xlabel("Date")
+    plt.ylabel("% of "+target_label+" tweets")
+
+    # Rotate date labels for readability
+    plt.xticks(rotation=45)
+
+    plt.minorticks_on()
+    plt.grid(which='major', linestyle='-', linewidth=0.5)
+    plt.grid(which='minor', linestyle=':', linewidth=0.3)
+
+    # Place the legend outside to the right
+    plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
+
+    plt.tight_layout()
+    if savepath is not None:
+        os.makedirs(Path(savepath).parent, exist_ok=True)
+        plt.savefig(savepath)
+    else:
+        plt.show()
+
+
+class HDxDensify(DistributionMatchingX):
+    def fit(self, X, y):
+        self.reductor = TruncatedSVD(n_components=5, random_state=0)
+        Xred = self.reductor.fit_transform(X)
+        return super().fit(Xred, y)
+
+    def predict(self, X):
+        Xred = self.reductor.transform(X)
+        return super().predict(Xred)
+
+
+class QUnfoldWrapperDensify(QUnfoldWrapper):
+    def fit(self, X, y):
+        self.reductor = TruncatedSVD(n_components=5, random_state=0)
+        Xred = self.reductor.fit_transform(X)
+        return super().fit(Xred, y)
+
+    def predict(self, X):
+        Xred = self.reductor.transform(X)
+        return super().predict(Xred)
+
+
+# A scikit-learn's style wrapper for a huggingface-based pre-trained transformer for binary sentiment classification
+class HFTextClassifier(BaseEstimator, ClassifierMixin):
+    def __init__(self, model_name='distilbert-base-uncased-finetuned-sst-2-english'):
+        self.pipe = pipeline("sentiment-analysis", model=model_name)
+        self.classes_ = np.asarray([0,1])
+
+    def fit(self, X, y=None):
+        return self
+
+    def _binary_decisions(self, transformer_output: List[Dict]):
+        return np.array([(1 if p['label']=='POSITIVE' else 0) for p in transformer_output], dtype=int)
+
+    def predict(self, X):
+        X = list(map(str, X))
+        preds = self.pipe(X, truncation=True)
+        return self._binary_decisions(preds)
+
+    def predict_proba(self, X):
+        X = list(map(str, X))
+        n_examples = len(X)
+        preds = self.pipe(X, truncation=True)
+        decisions = self._binary_decisions(preds)
+        scores = np.array([p['score'] for p in preds], dtype=float)
+        probas = np.zeros(shape=(len(X), 2), dtype=float)
+        probas[np.arange(n_examples),decisions] = scores
+        probas[np.arange(n_examples),~decisions] = 1-scores
+        return probas
+
+# def methods(pre_trained_classifier):
+#     yield 'CC', CC(pre_trained_classifier, fit_classifier=False)
+
+USE_LOGISTIC_REGRESSION = True
+
+if USE_LOGISTIC_REGRESSION:
+    new_classifier = lambda:LR()
+    to_fit = True
+else:
+    pretrained = HFTextClassifier()
+    new_classifier = lambda:pretrained
+    to_fit = False
+
+
+def methods():
+    yield 'CC', CC(new_classifier(), fit_classifier=to_fit)
+    yield 'ACC', ACC(new_classifier(), fit_classifier=to_fit)
+    yield 'HDy', DistributionMatchingY(new_classifier(), fit_classifier=to_fit)
+    yield 'HDx', HDxDensify()
+    yield 'KMM', QUnfoldWrapperDensify(KMM())
+    yield 'SLD', EMQ(new_classifier(), fit_classifier=to_fit)
+    yield 'KDEy', KDEyML(new_classifier(), fit_classifier=to_fit)
+
+
+train, tests, test_init = prepare_labelled_collections()
+
+if USE_LOGISTIC_REGRESSION:
+    # vectorize text for logistic regression
+    vectorizer = TfidfVectorizer(min_df=5, sublinear_tf=True)
+    Xtr = vectorizer.fit_transform(train.X)
+    train = LabelledCollection(Xtr, train.labels, train.classes_)
+    for i in range(len(tests)):
+        Xte = vectorizer.transform(tests[i].X)
+        tests[i] = LabelledCollection(Xte, tests[i].labels, train.classes_)
+
+
+results = defaultdict(list)
+for test_i, test_init_i in zip(tests, test_init):
+    results['true-prev'].append(test_i.prevalence())
+    results['date-start'].append(test_init_i[0])
+
+for q_name, quant in methods():
+    quant.fit(*train.Xy)
+    for test_i, test_init_i in tqdm(zip(tests, test_init), desc=f'{q_name} predicting', total=len(tests)):
+        pred_i = quant.predict(test_i.X)
+        results[q_name].append(pred_i)
+
+suffix = '_lr' if USE_LOGISTIC_REGRESSION else '_transformer'
+plot_prevalences(results, savepath=f'./plots_ieee/over_time{suffix}.pdf')
+
+
+
+
+
+
+
--- a/IEEEProc2025_plots/plots_ieee_3histograms.py
+++ b/IEEEProc2025_plots/plots_ieee_3histograms.py
@ -0,0 +1,73 @@
+import itertools
+
+import seaborn as sns
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+
+palette = itertools.cycle(sns.color_palette())
+
+def setframe():
+    fig.spines['top'].set_visible(False)
+    fig.spines['left'].set_visible(False)
+    fig.get_yaxis().set_ticks([])
+    fig.spines['right'].set_visible(False)
+    # fig.axis('off')
+
+nbins = 50
+figsize = (5, 2)
+ymax = 0.2
+
+negatives = np.random.normal(loc = 0.3, scale=0.2, size=20000)
+negatives = np.asarray([x for x in negatives if 0 <= x <= 1])
+
+plt.figure(figsize=figsize)
+plt.xlim(0, 1)
+plt.ylim(0, ymax)
+fig = sns.histplot(data=negatives, binrange=(0,1), bins=nbins,  stat='probability', color=next(palette))
+plt.title('Negative distribution')
+fig.set(yticklabels=[])
+fig.set(ylabel=None)
+setframe()
+# fig.get_figure().savefig('plots_cacm/negatives.pdf')
+# plt.clf()
+
+# -------------------------------------------------------------
+
+positives1 = np.random.normal(loc = 0.75, scale=0.06, size=20000)
+positives2 = np.random.normal(loc = 0.65, scale=0.1, size=1)
+positives = np.concatenate([positives1, positives2])
+np.random.shuffle(positives)
+positives = np.asarray([x for x in positives if 0 <= x <= 1])
+
+# plt.figure(figsize=figsize)
+plt.xlim(0, 1)
+plt.ylim(0, ymax)
+fig = sns.histplot(data=positives, binrange=(0,1), bins=nbins,  stat='probability', color=next(palette))
+plt.title('')
+fig.set(yticklabels=[])
+fig.set(ylabel=None)
+setframe()
+fig.get_figure().savefig('plots_cacm/training.pdf')
+
+# -------------------------------------------------------------
+
+prev = 0.2
+test = np.concatenate([
+    negatives[:int(len(negatives)*(1-prev))],
+    positives[:int(len(positives)*(prev))],
+])
+
+
+plt.figure(figsize=figsize)
+plt.xlim(0, 1)
+plt.ylim(0, ymax)
+fig = sns.histplot(data=test, binrange=(0,1), bins=nbins,  stat='probability', color=next(palette))
+plt.title('')
+fig.set(yticklabels=[])
+fig.set(ylabel=None)
+setframe()
+fig.get_figure().savefig('plots_cacm/test.pdf')
+
+
--- a/IEEEProc2025_plots/plots_ieee_errdrift_deprecated.py
+++ b/IEEEProc2025_plots/plots_ieee_errdrift_deprecated.py
@ -0,0 +1,86 @@
+from copy import deepcopy
+
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+
+import quapy as qp
+from method.non_aggregative import DMx
+from protocol import APP
+from quapy.method.aggregative import CC, ACC, DMy
+from sklearn.svm import LinearSVC
+
+qp.environ['SAMPLE_SIZE'] = 100
+DATASETS = qp.datasets.UCI_DATASETS[10:]
+
+def fit_eval_task(args):
+    model_name, model, train, test = args
+    with qp.util.temp_seed(0):
+        model = deepcopy(model)
+        model.fit(train)
+        true_prev, estim_prev = qp.evaluation.prediction(model, APP(test, repeats=100, random_state=0))
+    return model_name, true_prev, estim_prev
+
+
+def gen_data():
+
+    def base_classifier():
+        return LogisticRegression()
+        #return LinearSVC(class_weight='balanced')
+
+
+    def models():
+        yield 'CC', CC(base_classifier())
+        yield 'ACC', ACC(base_classifier())
+        yield 'HDy', DMy(base_classifier(), val_split=10, nbins=10, n_jobs=-1)
+        yield 'HDx', DMx(nbins=10, n_jobs=-1)
+
+    # train, test = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=10).train_test
+    method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
+
+    for dataset_name in DATASETS:
+        train, test = qp.datasets.fetch_UCIDataset(dataset_name).train_test
+        print(dataset_name, train.X.shape)
+
+        outs = qp.util.parallel(
+            fit_eval_task,
+            ((method_name, model, train, test) for method_name, model in models()),
+            seed=0,
+            n_jobs=-1
+        )
+
+        for method_name, true_prev, estim_prev in outs:
+            method_names.append(method_name)
+            true_prevs.append(true_prev)
+            estim_prevs.append(estim_prev)
+            tr_prevs.append(train.prevalence())
+
+    return method_names, true_prevs, estim_prevs, tr_prevs
+
+method_names, true_prevs, estim_prevs, tr_prevs = qp.util.pickled_resource('../quick_experiment/pickled_plot_data.pkl', gen_data)
+
+def remove_dataset(dataset_order, num_methods=4):
+    sel_names, sel_true, sel_estim, sel_tr = [],[],[],[]
+    for i, (name, true, estim, tr) in enumerate(zip(method_names, true_prevs, estim_prevs, tr_prevs)):
+        dataset_pos = i//num_methods
+        if dataset_pos not in dataset_order:
+            sel_names.append(name)
+            sel_true.append(true)
+            sel_estim.append(estim)
+            sel_tr.append(tr)
+    return np.asarray(sel_names), np.asarray(sel_true), np.asarray(sel_estim), np.asarray(sel_tr)
+
+print(DATASETS)
+selected = 10
+for i in [selected]:
+    print(i, DATASETS[i])
+    all_ = set(range(len(DATASETS)))
+    remove_index = sorted(all_ - {i})
+    sel_names, sel_true, sel_estim, sel_tr = remove_dataset(dataset_order=remove_index, num_methods=4)
+
+    p=sel_tr[0][1]
+    sel_names = ['CC$_{'+str(p)+'}$' if x=='CC' else x for x in sel_names]
+
+    # qp.plot.binary_diagonal(sel_names, sel_true, sel_estim, train_prev=sel_tr[0], show_std=False, savepath=f'./plots/bin_diag_{i}.png')
+    qp.plot.error_by_drift(sel_names, sel_true, sel_estim, sel_tr, n_bins=10, savepath=f'./plots/err_drift_{i}.png', show_std=True, show_density=False, title="")
+    # qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, savepath='./plots/bin_bias.png')
+    # qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, nbins=3, savepath='./plots/bin_bias_bin.png')
--- a/IEEEProc2025_plots/plots_ieee_histogram3D.py
+++ b/IEEEProc2025_plots/plots_ieee_histogram3D.py
@ -0,0 +1,62 @@
+
+import math
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split, cross_val_predict
+from sklearn.neighbors import KernelDensity
+import matplotlib.pyplot as plt
+import numpy as np
+
+from data import LabelledCollection
+
+scale = 100
+
+
+import quapy as qp
+
+negatives = np.random.normal(loc = 0.2, scale=0.2, size=20000)
+negatives = np.asarray([x for x in negatives if 0 <= x <= 1])
+
+positives = np.random.normal(loc = 0.75, scale=0.05, size=20000)
+positives = np.asarray([x for x in positives if 0 <= x <= 1])
+
+prev = 0.1
+test = np.concatenate([
+    negatives[:int(len(negatives)*(1-prev))],
+    positives[:int(len(positives)*(prev))],
+])
+
+
+nbins = 30
+
+plt.rcParams.update({'font.size': 7})
+
+fig = plt.figure()
+positions = np.asarray([2,1,0])
+colors = ['r', 'g', 'b']
+
+
+ax = fig.add_subplot(111, projection='3d')
+ax.set_box_aspect((3, 1, 0.8))
+
+for post, c, z in zip([test, positives, negatives], colors, positions):
+
+    hist, bins = np.histogram(post, bins=np.linspace(0,1, nbins+1), density=True)
+    xs = (bins[:-1] + bins[1:])/2
+
+    ax.bar(xs, hist, width=1 / nbins, zs=z, zdir='y', color=c, ec=c, alpha=0.6)
+
+
+ax.yaxis.set_ticks(positions)
+ax.yaxis.set_ticklabels([' '*20+'Test distribution', ' '*20+'Positive distribution', ' '*20+'Negative distribution'])
+# ax.xaxis.set_ticks([])
+# ax.xaxis.set_ticklabels([], minor=True)
+ax.zaxis.set_ticks([])
+ax.zaxis.set_ticklabels([], minor=True)
+
+
+#plt.figure(figsize=(10,6))
+#plt.show()
+plt.savefig('./histograms3d_CACM2023.pdf')
+
+
--- a/IEEEProc2025_plots/plotting_diagonal_4methods.py
+++ b/IEEEProc2025_plots/plotting_diagonal_4methods.py
@ -0,0 +1,63 @@
+from sklearn.decomposition import TruncatedSVD
+from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
+from sklearn.model_selection import GridSearchCV
+
+import quapy as qp
+from data import LabelledCollection
+from method.non_aggregative import DMx
+from protocol import APP
+from quapy.method.aggregative import CC, DMy, ACC, EMQ
+from sklearn.svm import LinearSVC
+import numpy as np
+from tqdm import tqdm
+
+qp.environ['SAMPLE_SIZE'] = 500
+
+def cls():
+    return LogisticRegressionCV(n_jobs=-1,Cs=10)
+    # return LogisticRegression(C=.1)
+
+def gen_methods():
+    yield CC(cls()), r'CC$_{10' + r'\%}$'
+    yield ACC(cls()), 'ACC'
+    yield DMy(cls(), val_split=10, nbins=10, n_jobs=-1), 'HDy'
+    yield DMx(nbins=10, n_jobs=-1), 'HDx'
+    yield EMQ(cls()), 'SLD'
+    # yield EMQ(cls(), calib='vs'), 'SLD-VS'
+
+def gen_data():
+
+    train, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test
+
+    method_data = []
+    training_prevalence = 0.1
+    training_size = 5000
+    # since the problem is binary, it suffices to specify the negative prevalence, since the positive is constrained
+    train_sample = train.sampling(training_size, 1-training_prevalence, random_state=0)
+    # train_sample = train
+
+    for model, method_name in tqdm(gen_methods(), total=4):
+        with qp.util.temp_seed(1):
+            if method_name == 'HDx':
+                X, y = train_sample.Xy
+                svd = TruncatedSVD(n_components=5, random_state=0)
+                Xred = svd.fit_transform(X)
+                train_sample_dense = LabelledCollection(Xred, y)
+
+                X, y = test.Xy
+                test_dense = LabelledCollection(svd.transform(X), y)
+
+                model.fit(*train_sample_dense.Xy)
+                true_prev, estim_prev = qp.evaluation.prediction(model, APP(test_dense, repeats=100, random_state=0))
+            else:
+                model.fit(*train_sample.Xy)
+                true_prev, estim_prev = qp.evaluation.prediction(model, APP(test, repeats=100, random_state=0))
+        method_data.append((method_name, true_prev, estim_prev, train_sample.prevalence()))
+
+    return zip(*method_data)
+
+
+method_names, true_prevs, estim_prevs, tr_prevs = gen_data()
+
+# qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, savepath='./plots_ieee/bin_diag_4methods.pdf')
+qp.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=10, savepath='./plots_ieee/err_drift_4methods.pdf', title='', show_density=False, show_std=True)
--- a/IEEEProc2025_plots/plotting_diagonal_CCvariants.py
+++ b/IEEEProc2025_plots/plotting_diagonal_CCvariants.py
@ -0,0 +1,40 @@
+from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
+from sklearn.model_selection import GridSearchCV
+
+import quapy as qp
+from protocol import APP
+from quapy.method.aggregative import CC
+from sklearn.svm import LinearSVC
+import numpy as np
+from tqdm import tqdm
+
+qp.environ['SAMPLE_SIZE'] = 500
+
+def gen_data():
+
+    train, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test
+
+    method_data = []
+    for training_prevalence in tqdm(np.linspace(0.1, 0.9, 9), total=9):
+        training_size = 5000
+        # since the problem is binary, it suffices to specify the negative prevalence, since the positive is constrained
+        train_sample = train.sampling(training_size, 1-training_prevalence)
+
+        # cls = GridSearchCV(LinearSVC(), param_grid={'C': np.logspace(-2,2,5), 'class_weight':[None, 'balanced']}, n_jobs=-1)
+        # cls = GridSearchCV(LogisticRegression(), param_grid={'C': np.logspace(-2, 2, 5), 'class_weight': [None, 'balanced']}, n_jobs=-1)
+        # cls.fit(*train_sample.Xy)
+
+        model = CC(LogisticRegressionCV(n_jobs=-1,Cs=10))
+
+        model.fit(train_sample)
+        true_prev, estim_prev = qp.evaluation.prediction(model, APP(test, repeats=100, random_state=0))
+        method_name = 'CC$_{'+f'{int(100*training_prevalence)}' + '\%}$'
+        method_data.append((method_name, true_prev, estim_prev, train_sample.prevalence()))
+
+    return zip(*method_data)
+
+
+method_names, true_prevs, estim_prevs, tr_prevs = gen_data()
+
+qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, savepath='./plots_cacm/bin_diag_cc.pdf')
+# qp.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=10, savepath='./plots_cacm/err_drift_cc.pdf', title='', show_density=False)
--- a/IEEEProc2025_plots/types_of_shift.py
+++ b/IEEEProc2025_plots/types_of_shift.py
@ -0,0 +1,126 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.stats import gaussian_kde
+
+
+def plot_kde_background(ax, data, cmap="Blues", alpha=0.35, gridsize=200):
+    """
+    data: array Nx2
+    """
+    # KDE
+    kde = gaussian_kde(data.T)
+
+    # Grid for evaluation
+    x_min, x_max = data[:, 0].min() - 1, data[:, 0].max() + 1
+    y_min, y_max = data[:, 1].min() - 1, data[:, 1].max() + 1
+
+    X, Y = np.meshgrid(
+        np.linspace(x_min, x_max, gridsize),
+        np.linspace(y_min, y_max, gridsize)
+    )
+    Z = kde(np.vstack([X.ravel(), Y.ravel()])).reshape(X.shape)
+
+    # Draw background density
+    ax.contourf(X, Y, Z, levels=30, cmap=cmap, alpha=alpha)
+
+
+# ======================================================
+#  Define 3 Gaussian sources in 2D
+# ======================================================
+
+# Means
+mu1 = np.array([0, 0])  # negative
+mu2 = np.array([3, 0])  # positive
+mu3 = np.array([0, 3])  # positive
+
+# Covariances
+Sigma = np.array([[1, 0.2],
+                  [0.2, 1]])
+
+
+def sample_gaussian(mu, Sigma, n):
+    return np.random.multivariate_normal(mu, Sigma, n)
+
+
+# ======================================================
+#  Generate datasets for the 4 scenarios
+# ======================================================
+
+density = 20
+
+# ---------- Scenario 1: Baseline ----------
+G1_1 = sample_gaussian(mu1, Sigma, 100*density)
+G2_1 = sample_gaussian(mu2, Sigma, 100*density)
+G3_1 = sample_gaussian(mu3, Sigma, 100*density)
+
+# ---------- Scenario 2: Prior Probability Shift ----------
+G1_2 = sample_gaussian(mu1, Sigma, 300*density)
+G2_2 = sample_gaussian(mu2, Sigma, 50*density)
+G3_2 = sample_gaussian(mu3, Sigma, 50*density)
+
+# ---------- Scenario 3: Covariate Shift ----------
+# same class proportions but G3 moves (X-shift)
+mu3_shift = mu3 + np.array([1.5, 0])
+G1_3 = sample_gaussian(mu1, Sigma, 100*density)
+G2_3 = sample_gaussian(mu2, Sigma, 100*density)
+G3_3 = sample_gaussian(mu3_shift, Sigma, 100*density)  # shifted covariates
+
+# ---------- Scenario 4: Concept Shift ----------
+# same data as Scenario 1, but G3 becomes negative
+G1_4 = G1_1
+G2_4 = G2_1
+G3_4 = G3_1  # but will be colored as negative
+
+
+# ======================================================
+#  Plotting function for each scenario
+# ======================================================
+
+def plot_scenario(ax, G1, G2, G3, title, G3_negative=False):
+    # plot_kde_background(ax, G1, cmap="Reds", alpha=0.75)
+    # plot_kde_background(ax, G2, cmap="Blues", alpha=0.75)
+    # plot_kde_background(ax, G3, cmap="Greens", alpha=0.75)
+
+    ax.scatter(G1[:, 0], G1[:, 1], s=12, color='red', alpha=0.1, label='Negative ($\ominus$)')
+    ax.scatter(G2[:, 0], G2[:, 1], s=12, color='blue', alpha=0.1, label='Positive ($\oplus$)')
+
+    if G3_negative:
+        ax.scatter(G3[:, 0], G3[:, 1], s=12, color='red', alpha=0.1) #, label='Negative ($\ominus$)')
+    else:
+        ax.scatter(G3[:, 0], G3[:, 1], s=12, color='blue', alpha=0.1) #, label='Positive ($\oplus$)')
+
+    ax.set_title(title)
+    ax.set_xlabel("$x_1$")
+    ax.set_ylabel("$x_2$")
+    ax.set_xticks([])
+    ax.set_yticks([])
+    ax.grid(alpha=0.3)
+
+
+# ======================================================
+#  Generate 2×2 grid of subplots
+# ======================================================
+
+fig, axes = plt.subplots(2, 2, figsize=(9, 9))
+
+plot_scenario(axes[0, 0], G1_1, G2_1, G3_1,
+              "Training data")
+
+plot_scenario(axes[0, 1], G1_2, G2_2, G3_2,
+              "Prior Probability Shift")
+
+plot_scenario(axes[1, 0], G1_3, G2_3, G3_3,
+              "Covariate Shift",
+              G3_negative=False)
+
+plot_scenario(axes[1, 1], G1_4, G2_4, G3_4,
+              "Concept Shift",
+              G3_negative=True)
+
+# One global legend
+handles, labels = axes[0, 0].get_legend_handles_labels()
+fig.legend(handles, labels, loc='upper center', ncol=3, fontsize=12)
+
+plt.tight_layout(rect=[0, 0, 1, 0.95])
+# plt.show()
+plt.savefig('dataset_shift_types.pdf')
--- a/IEEEProc2025_plots/uniform_sampling_simplex.py
+++ b/IEEEProc2025_plots/uniform_sampling_simplex.py
@ -0,0 +1,97 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d.art3d import Poly3DCollection
+from quapy.functional import uniform_prevalence_sampling
+
+# Vertices of a regular tetrahedron
+v1 = np.array([0, 0, 0])
+v2 = np.array([1, 0, 0])
+v3 = np.array([0.5, np.sqrt(3)/2, 0])
+v4 = np.array([0.5, np.sqrt(3)/6, np.sqrt(6)/3])
+
+vertices = np.array([v1, v2, v3, v4])
+
+# Function to map (p1,p2,p3,p4) to 3D coordinates
+def prob_to_xyz(p):
+    return p[0]*v1 + p[1]*v2 + p[2]*v3 + p[3]*v4
+
+# --- Example: 5 random distributions inside the simplex
+rand_probs = uniform_prevalence_sampling(n_classes=4, size=3000)
+points_xyz = np.array([prob_to_xyz(p) for p in rand_probs])
+
+# --- Plotting
+fig = plt.figure(figsize=(8, 8))
+ax = fig.add_subplot(111, projection='3d')
+
+# Draw tetrahedron faces
+faces = [
+    [v1, v2, v3],
+    [v1, v2, v4],
+    [v1, v3, v4],
+    [v2, v3, v4]
+]
+
+poly = Poly3DCollection(faces, alpha=0.15, edgecolor='k', facecolor=None)
+# poly = Poly3DCollection(faces, alpha=0.15, edgecolor='k', facecolor=None)
+ax.add_collection3d(poly)
+
+edges = [
+    [v1, v2],
+    [v1, v3],
+    [v1, v4],
+    [v2, v3],
+    [v2, v4],
+    [v3, v4]
+]
+
+for edge in edges:
+    xs, ys, zs = zip(*edge)
+    ax.plot(xs, ys, zs, color='black', linewidth=1)
+
+# Draw vertices
+# ax.scatter(vertices[:,0], vertices[:,1], vertices[:,2], s=60, color='red')
+
+# Labels
+offset = 0.08
+labels = ["$y_1$", "$y_2$", "$y_3$", "$y_4$"]
+for i, v in enumerate(vertices):
+    direction = v / np.linalg.norm(v)
+    label_pos = v + offset * direction
+    ax.text(label_pos[0], label_pos[1], label_pos[2], labels[i], fontsize=14, color='black')
+
+# Plot random points
+ax.scatter(points_xyz[:,0], points_xyz[:,1], points_xyz[:,2], s=10, c='blue', alpha=0.2)
+
+# Axes formatting
+ax.set_xlabel("X")
+ax.set_ylabel("Y")
+ax.set_zlabel("Z")
+ax.set_title("4-Class Probability Simplex (Tetrahedron)")
+
+# ax.view_init(elev=65, azim=20)
+
+ax.set_xticks([])
+ax.set_yticks([])
+ax.set_zticks([])
+
+ax.set_xticklabels([])
+ax.set_yticklabels([])
+ax.set_zticklabels([])
+
+ax.xaxis.set_ticks_position('none')  # evita que dibuje marcas
+ax.yaxis.set_ticks_position('none')
+ax.zaxis.set_ticks_position('none')
+
+ax.xaxis.pane.set_visible(False)
+ax.yaxis.pane.set_visible(False)
+ax.zaxis.pane.set_visible(False)
+
+ax.set_xlabel('')
+ax.set_ylabel('')
+ax.set_zlabel('')
+
+ax.grid(False)
+
+plt.tight_layout()
+# plt.show()
+plt.savefig('plots_ieee/tetrahedron.pdf')
--- a/KDEyAitchison/commons.py
+++ b/KDEyAitchison/commons.py
@ -0,0 +1,56 @@
+import numpy as np
+import pandas as pd
+
+from quapy.method.aggregative import EMQ, KDEyML, PACC
+from sklearn.linear_model import LogisticRegression
+
+METHODS = ['PACC',
+           'EMQ',
+           'KDEy-ML',
+           'KDEy-MLA'
+           ]
+
+
+# common hyperparameterss
+hyper_LR = {
+    'classifier__C': np.logspace(-3, 3, 7),
+    'classifier__class_weight': ['balanced', None]
+}
+
+hyper_kde = {
+    'bandwidth': np.linspace(0.001, 0.5, 100)
+}
+
+hyper_kde_aitchison = {
+    'bandwidth': np.linspace(0.01, 2, 100)
+}
+
+# instances a new quantifier based on a string name
+def new_method(method, **lr_kwargs):
+    lr = LogisticRegression(**lr_kwargs)
+
+    if method == 'KDEy-ML':
+        param_grid = {**hyper_kde, **hyper_LR}
+        quantifier = KDEyML(lr, kernel='gaussian')
+    elif method == 'KDEy-MLA':
+        param_grid = {**hyper_kde_aitchison, **hyper_LR}
+        quantifier = KDEyML(lr, kernel='aitchison')
+    elif method == 'EMQ':
+        param_grid = hyper_LR
+        quantifier = EMQ(lr)
+    elif method == 'PACC':
+        param_grid = hyper_LR
+        quantifier = PACC(lr)
+    else:
+        raise NotImplementedError('unknown method', method)
+
+    return param_grid, quantifier
+
+
+def show_results(result_path):
+    df = pd.read_csv(result_path+'.csv', sep='\t')
+
+    pd.set_option('display.max_columns', None)
+    pd.set_option('display.max_rows', None)
+    pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE", "MRAE"])
+    print(pv)
--- a/KDEyAitchison/show_results.py
+++ b/KDEyAitchison/show_results.py
@ -0,0 +1,38 @@
+import pickle
+import os
+import sys
+
+import pandas as pd
+
+import quapy as qp
+from quapy.model_selection import GridSearchQ
+from quapy.protocol import UPP
+from commons import METHODS, new_method, show_results
+from new_table import LatexTable
+
+
+SEED = 1
+
+
+
+if __name__ == '__main__':
+    print(qp.datasets.UCI_MULTICLASS_DATASETS)
+    for optim in ['mae', 'mrae']:
+        table = LatexTable()
+        result_dir = f'results/ucimulti/{optim}'
+
+        for method in METHODS:
+            print()
+            global_result_path = f'{result_dir}/{method}'
+            print(f'Method\tDataset\tMAE\tMRAE\tKLD')
+            for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
+                # print(dataset)
+                local_result_path = global_result_path + '_' + dataset
+                if os.path.exists(local_result_path + '.dataframe'):
+                    report = pd.read_csv(local_result_path+'.dataframe')
+                    print(f'{method}\t{dataset}\t{report[optim].mean():.5f}')
+                    table.add(benchmark=dataset, method=method, v=report[optim].values)
+                else:
+                    print(dataset, 'not found for method', method)
+        table.latexPDF(f'./tables/{optim}.pdf', landscape=False)
+
--- a/KDEyAitchison/ucimulti_experiments.py
+++ b/KDEyAitchison/ucimulti_experiments.py
@ -0,0 +1,94 @@
+import pickle
+import os
+import sys
+
+import pandas as pd
+
+import quapy as qp
+from quapy.model_selection import GridSearchQ
+from quapy.protocol import UPP
+from commons import METHODS, new_method, show_results
+
+
+SEED = 1
+
+
+
+if __name__ == '__main__':
+
+    qp.environ['SAMPLE_SIZE'] = 500
+    qp.environ['N_JOBS'] = -1
+    n_bags_val = 250
+    n_bags_test = 1000
+    for optim in ['mae', 'mrae']:
+        result_dir = f'results/ucimulti/{optim}'
+
+        os.makedirs(result_dir, exist_ok=True)
+
+        for method in METHODS:
+
+            print('Init method', method)
+
+            global_result_path = f'{result_dir}/{method}'
+            # show_results(global_result_path)
+            # sys.exit(0)
+
+            if not os.path.exists(global_result_path + '.csv'):
+                with open(global_result_path + '.csv', 'wt') as csv:
+                    csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\n')
+
+            with open(global_result_path + '.csv', 'at') as csv:
+
+                for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
+
+                    print('init', dataset)
+
+                    local_result_path = global_result_path + '_' + dataset
+                    if os.path.exists(local_result_path + '.dataframe'):
+                        print(f'result file {local_result_path}.dataframe already exist; skipping')
+                        report = pd.read_csv(local_result_path+'.dataframe')
+                        print(report["mae"].mean())
+                        # data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
+                        # csv.write(f'{method}\t{data.name}\t{report["mae"].mean():.5f}\t{report["mrae"].mean():.5f}\t{report["kld"].mean():.5f}\n')
+                        continue
+
+                    with qp.util.temp_seed(SEED):
+
+                        param_grid, quantifier = new_method(method, max_iter=3000)
+
+                        data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
+
+                        # model selection
+                        train, test = data.train_test
+                        train, val = train.split_stratified(random_state=SEED)
+
+                        protocol = UPP(val, repeats=n_bags_val)
+                        modsel = GridSearchQ(
+                            quantifier, param_grid, protocol, refit=True, n_jobs=-1, verbose=True, error=optim
+                        )
+
+                        try:
+                            modsel.fit(*train.Xy)
+
+                            print(f'best params {modsel.best_params_}')
+                            print(f'best score {modsel.best_score_}')
+                            pickle.dump(
+                                (modsel.best_params_, modsel.best_score_,),
+                                open(f'{local_result_path}.hyper.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)
+
+                            quantifier = modsel.best_model()
+                        except:
+                            print('something went wrong... trying to fit the default model')
+                            quantifier.fit(*train.Xy)
+
+
+                        protocol = UPP(test, repeats=n_bags_test)
+                        report = qp.evaluation.evaluation_report(
+                            quantifier, protocol, error_metrics=['mae', 'mrae', 'kld'], verbose=True
+                        )
+                        report.to_csv(f'{local_result_path}.dataframe')
+                        print(f'{method}\t{data.name}\t{report["mae"].mean():.5f}\t{report["mrae"].mean():.5f}\t{report["kld"].mean():.5f}\n')
+                        csv.write(f'{method}\t{data.name}\t{report["mae"].mean():.5f}\t{report["mrae"].mean():.5f}\t{report["kld"].mean():.5f}\n')
+                        csv.flush()
+
+        show_results(global_result_path)
--- a/docs/source/manuals/methods.md
+++ b/docs/source/manuals/methods.md
@ -604,7 +604,10 @@ estim_prevalence = model.predict(dataset.test.X)

 _(New in v0.2.0!)_ Some quantification methods go beyond providing a single point estimate of class prevalence values and also produce confidence regions, which characterize the uncertainty around the point estimate. In QuaPy, two such methods are currently implemented:

-* Aggregative Bootstrap: The Aggregative Bootstrap method extends any aggregative quantifier by generating confidence regions for class prevalence estimates through bootstrapping. Key features of this method include:
+* Aggregative Bootstrap: The Aggregative Bootstrap method extends any aggregative quantifier by generating confidence regions for class prevalence estimates through bootstrapping. The method is described in the paper [Moreo, A., Salvati, N.
+    An Efficient Method for Deriving Confidence Intervals in Aggregative Quantification.
+    Learning to Quantify: Methods and Applications (LQ 2025), co-located at ECML-PKDD 2025.
+    pp 12-33, Porto (Portugal)](https://lq-2025.github.io/proceedings/CompleteVolume.pdf). Key features of this method include:

    * Optimized Computation: The bootstrap is applied to pre-classified instances, significantly speeding up training and inference.
 During training, bootstrap repetitions are performed only after training the classifier once. These repetitions are used to train multiple aggregation functions.
--- a/docs/source/quapy.method.rst
+++ b/docs/source/quapy.method.rst
@ -60,6 +60,14 @@ quapy.method.composable module
   :undoc-members:
   :show-inheritance:

+quapy.method.confidence module
+------------------------------
+
+.. automodule:: quapy.method.confidence
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 Module contents
 ---------------

--- a/examples/16.confidence_regions.py
+++ b/examples/16.confidence_regions.py
@ -36,7 +36,7 @@ with qp.util.temp_seed(0):
    true_prev = shifted_test.prevalence()

    # by calling "quantify_conf", we obtain the point estimate and the confidence intervals around it
-    pred_prev, conf_intervals = pacc.quantify_conf(shifted_test.X)
+    pred_prev, conf_intervals = pacc.predict_conf(shifted_test.X)

    # conf_intervals is an instance of ConfidenceRegionABC, which provides some useful utilities like:
    # - coverage: a function which computes the fraction of true values that belong to the confidence region
--- a/examples/18.ReadMe_for_text_analysis.py
+++ b/examples/18.ReadMe_for_text_analysis.py
@ -0,0 +1,60 @@
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.feature_selection import SelectKBest, chi2
+
+import quapy as qp
+from quapy.method.non_aggregative import ReadMe
+import quapy.functional as F
+from sklearn.pipeline import Pipeline
+
+"""
+This example showcases how to use the non-aggregative method ReadMe proposed by Hopkins and King.
+This method is for text analysis, so let us first instantiate a dataset for sentiment quantification (we
+use IMDb for this example). The method is quite computationally expensive, so we will restrict the training
+set to 1000 documents only.
+"""
+reviews = qp.datasets.fetch_reviews('imdb').reduce(n_train=1000, random_state=0)
+
+"""
+We need to convert text to bag-of-words representations. Actually, ReadMe requires the representations to be
+binary (i.e., storing a 1 whenever a document contains certain word, or 0 otherwise), so we will not use
+TFIDF weighting. We will also retain the top 1000 most important features according to chi2.
+"""
+encode_0_1 = Pipeline([
+    ('0_1_terms', CountVectorizer(min_df=5, binary=True)),
+    ('feat_sel',  SelectKBest(chi2, k=1000))
+])
+train, test = qp.data.preprocessing.instance_transformation(reviews, encode_0_1, inplace=True).train_test
+
+"""
+We now instantiate ReadMe, with the prob_model='full' (default behaviour, implementing the Hopkins and King original
+idea). This method consists of estimating Q(Y) by solving:
+
+Q(X) = \sum_i Q(X|Y=i) Q(Y=i)
+
+without resorting to estimating the posteriors Q(Y=i|X), by solving a linear least-squares problem.
+However, since Q(X) and Q(X|Y=i) are matrices of shape (2^K, 1) and (2^K, n), with K the number of features
+and n the number of classes, their calculation becomes intractable. ReadMe instead performs bagging (i.e., it
+samples small sets of features and averages the results) thus reducing K to a few terms. In our example we
+set K (bagging_range) to 20, and the number of bagging_trials to 100. 
+
+ReadMe also computes confidence intervals via bootstrap. We set the number of bootstrap trials to 100.  
+"""
+readme = ReadMe(prob_model='full', bootstrap_trials=100, bagging_trials=100, bagging_range=20, random_state=0, verbose=True)
+readme.fit(*train.Xy)   # <- there is actually nothing happening here (only bootstrap resampling); the method is "lazy"
+                        #    and postpones most of the calculations to the test phase.
+
+# since the method is slow, we will only test 3 cases with different imbalances
+few_negatives = [0.25, 0.75]
+balanced = [0.5, 0.5]
+few_positives = [0.75, 0.25]
+
+for test_prev in [few_negatives, balanced, few_positives]:
+    sample = reviews.test.sampling(500, *test_prev, random_state=0)  # draw sets of 500 documents with desired prevs
+    prev_estim, conf = readme.predict_conf(sample.X)
+    err = qp.error.mae(sample.prevalence(), prev_estim)
+    print(f'true-prevalence={F.strprev(sample.prevalence())},\n'
+          f'predicted-prevalence={F.strprev(prev_estim)}, with confidence intervals {conf},\n'
+          f'MAE={err:.4f}')
+
+
+
--- a/experimental_non_aggregative/custom_vectorizers.py
+++ b/experimental_non_aggregative/custom_vectorizers.py
@ -0,0 +1,254 @@
+from scipy.sparse import csc_matrix, csr_matrix
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.feature_extraction.text import TfidfTransformer, TfidfVectorizer, CountVectorizer
+import numpy as np
+from joblib import Parallel, delayed
+import sklearn
+import math
+from scipy.stats import t
+
+
+class ContTable:
+    def __init__(self, tp=0, tn=0, fp=0, fn=0):
+        self.tp=tp
+        self.tn=tn
+        self.fp=fp
+        self.fn=fn
+
+    def get_d(self): return self.tp + self.tn + self.fp + self.fn
+
+    def get_c(self): return self.tp + self.fn
+
+    def get_not_c(self): return self.tn + self.fp
+
+    def get_f(self): return self.tp + self.fp
+
+    def get_not_f(self): return self.tn + self.fn
+
+    def p_c(self): return (1.0*self.get_c())/self.get_d()
+
+    def p_not_c(self): return 1.0-self.p_c()
+
+    def p_f(self): return (1.0*self.get_f())/self.get_d()
+
+    def p_not_f(self): return 1.0-self.p_f()
+
+    def p_tp(self): return (1.0*self.tp) / self.get_d()
+
+    def p_tn(self): return (1.0*self.tn) / self.get_d()
+
+    def p_fp(self): return (1.0*self.fp) / self.get_d()
+
+    def p_fn(self): return (1.0*self.fn) / self.get_d()
+
+    def tpr(self):
+        c = 1.0*self.get_c()
+        return self.tp / c if c > 0.0 else 0.0
+
+    def fpr(self):
+        _c = 1.0*self.get_not_c()
+        return self.fp / _c if _c > 0.0 else 0.0
+
+
+def __ig_factor(p_tc, p_t, p_c):
+    den = p_t * p_c
+    if den != 0.0 and p_tc != 0:
+        return p_tc * math.log(p_tc / den, 2)
+    else:
+        return 0.0
+
+
+def information_gain(cell):
+    return __ig_factor(cell.p_tp(), cell.p_f(), cell.p_c()) + \
+           __ig_factor(cell.p_fp(), cell.p_f(), cell.p_not_c()) +\
+           __ig_factor(cell.p_fn(), cell.p_not_f(), cell.p_c()) + \
+           __ig_factor(cell.p_tn(), cell.p_not_f(), cell.p_not_c())
+
+
+def squared_information_gain(cell):
+    return information_gain(cell)**2
+
+
+def posneg_information_gain(cell):
+    ig = information_gain(cell)
+    if cell.tpr() < cell.fpr():
+        return -ig
+    else:
+        return ig
+
+
+def pos_information_gain(cell):
+    if cell.tpr() < cell.fpr():
+        return 0
+    else:
+        return information_gain(cell)
+
+def pointwise_mutual_information(cell):
+    return __ig_factor(cell.p_tp(), cell.p_f(), cell.p_c())
+
+
+def gss(cell):
+    return cell.p_tp()*cell.p_tn() - cell.p_fp()*cell.p_fn()
+
+
+def chi_square(cell):
+    den = cell.p_f() * cell.p_not_f() * cell.p_c() * cell.p_not_c()
+    if den==0.0: return 0.0
+    num = gss(cell)**2
+    return num / den
+
+
+def conf_interval(xt, n):
+    if n>30:
+        z2 = 3.84145882069 # norm.ppf(0.5+0.95/2.0)**2
+    else:
+        z2 = t.ppf(0.5 + 0.95 / 2.0, df=max(n-1,1)) ** 2
+    p = (xt + 0.5 * z2) / (n + z2)
+    amplitude = 0.5 * z2 * math.sqrt((p * (1.0 - p)) / (n + z2))
+    return p, amplitude
+
+
+def strength(minPosRelFreq, minPos, maxNeg):
+    if minPos > maxNeg:
+        return math.log(2.0 * minPosRelFreq, 2.0)
+    else:
+        return 0.0
+
+
+#set cancel_features=True to allow some features to be weighted as 0 (as in the original article)
+#however, for some extremely imbalanced dataset caused all documents to be 0
+def conf_weight(cell, cancel_features=False):
+    c = cell.get_c()
+    not_c = cell.get_not_c()
+    tp = cell.tp
+    fp = cell.fp
+
+    pos_p, pos_amp = conf_interval(tp, c)
+    neg_p, neg_amp = conf_interval(fp, not_c)
+
+    min_pos = pos_p-pos_amp
+    max_neg = neg_p+neg_amp
+    den = (min_pos + max_neg)
+    minpos_relfreq = min_pos / (den if den != 0 else 1)
+
+    str_tplus = strength(minpos_relfreq, min_pos, max_neg);
+
+    if str_tplus == 0 and not cancel_features:
+        return 1e-20
+
+    return str_tplus
+
+
+def get_tsr_matrix(cell_matrix, tsr_score_funtion):
+    nC = len(cell_matrix)
+    nF = len(cell_matrix[0])
+    tsr_matrix = [[tsr_score_funtion(cell_matrix[c,f]) for f in range(nF)] for c in range(nC)]
+    return np.array(tsr_matrix)
+
+
+def feature_label_contingency_table(positive_document_indexes, feature_document_indexes, nD):
+    tp_ = len(positive_document_indexes & feature_document_indexes)
+    fp_ = len(feature_document_indexes - positive_document_indexes)
+    fn_ = len(positive_document_indexes - feature_document_indexes)
+    tn_ = nD - (tp_ + fp_ + fn_)
+    return ContTable(tp=tp_, tn=tn_, fp=fp_, fn=fn_)
+
+
+def category_tables(feature_sets, category_sets, c, nD, nF):
+    return [feature_label_contingency_table(category_sets[c], feature_sets[f], nD) for f in range(nF)]
+
+
+def get_supervised_matrix(coocurrence_matrix, label_matrix, n_jobs=-1):
+    """
+    Computes the nC x nF supervised matrix M where Mcf is the 4-cell contingency table for feature f and class c.
+    Efficiency O(nF x nC x log(S)) where S is the sparse factor
+    """
+
+    nD, nF = coocurrence_matrix.shape
+    nD2, nC = label_matrix.shape
+
+    if nD != nD2:
+        raise ValueError('Number of rows in coocurrence matrix shape %s and label matrix shape %s is not consistent' %
+                         (coocurrence_matrix.shape,label_matrix.shape))
+
+    def nonzero_set(matrix, col):
+        return set(matrix[:, col].nonzero()[0])
+
+    if isinstance(coocurrence_matrix, csr_matrix):
+        coocurrence_matrix = csc_matrix(coocurrence_matrix)
+    feature_sets = [nonzero_set(coocurrence_matrix, f) for f in range(nF)]
+    category_sets = [nonzero_set(label_matrix, c) for c in range(nC)]
+    cell_matrix = Parallel(n_jobs=n_jobs, backend="threading")(
+        delayed(category_tables)(feature_sets, category_sets, c, nD, nF) for c in range(nC)
+    )
+    return np.array(cell_matrix)
+
+
+class TSRweighting(BaseEstimator,TransformerMixin):
+    """
+    Supervised Term Weighting function based on any Term Selection Reduction (TSR) function (e.g., information gain,
+    chi-square, etc.) or, more generally, on any function that could be computed on the 4-cell contingency table for
+    each category-feature pair.
+    The supervised_4cell_matrix is a `(n_classes, n_words)` matrix containing the 4-cell contingency tables
+    for each class-word pair, and can be pre-computed (e.g., during the feature selection phase) and passed as an
+    argument.
+    When `n_classes>1`, i.e., in multiclass scenarios, a global_policy is used in order to determine a
+    single feature-score which informs about its relevance. Accepted policies include "max" (takes the max score
+    across categories), "ave" and "wave" (take the average, or weighted average, across all categories -- weights
+    correspond to the class prevalence), and "sum" (which sums all category scores).
+    """
+
+    def __init__(self, tsr_function, global_policy='max', supervised_4cell_matrix=None, sublinear_tf=True, norm='l2', min_df=3, n_jobs=-1):
+        if global_policy not in ['max', 'ave', 'wave', 'sum']: raise ValueError('Global policy should be in {"max", "ave", "wave", "sum"}')
+        self.tsr_function = tsr_function
+        self.global_policy = global_policy
+        self.supervised_4cell_matrix = supervised_4cell_matrix
+        self.sublinear_tf = sublinear_tf
+        self.norm = norm
+        self.min_df = min_df
+        self.n_jobs = n_jobs
+
+    def fit(self, X, y):
+        self.count_vectorizer = CountVectorizer(min_df=self.min_df)
+        X = self.count_vectorizer.fit_transform(X)
+
+        self.tf_vectorizer = TfidfTransformer(
+            norm=None, use_idf=False, smooth_idf=False, sublinear_tf=self.sublinear_tf
+        ).fit(X)
+
+        if len(y.shape) == 1:
+            y = np.expand_dims(y, axis=1)
+
+        nD, nC = y.shape
+        nF = len(self.tf_vectorizer.get_feature_names_out())
+
+        if self.supervised_4cell_matrix is None:
+            self.supervised_4cell_matrix = get_supervised_matrix(X, y, n_jobs=self.n_jobs)
+        else:
+            if self.supervised_4cell_matrix.shape != (nC, nF):
+                raise ValueError("Shape of supervised information matrix is inconsistent with X and y")
+
+        tsr_matrix = get_tsr_matrix(self.supervised_4cell_matrix, self.tsr_function)
+
+        if self.global_policy == 'ave':
+            self.global_tsr_vector = np.average(tsr_matrix, axis=0)
+        elif self.global_policy == 'wave':
+            category_prevalences = [sum(y[:,c])*1.0/nD for c in range(nC)]
+            self.global_tsr_vector = np.average(tsr_matrix, axis=0, weights=category_prevalences)
+        elif self.global_policy == 'sum':
+            self.global_tsr_vector = np.sum(tsr_matrix, axis=0)
+        elif self.global_policy == 'max':
+            self.global_tsr_vector = np.amax(tsr_matrix, axis=0)
+        return self
+
+    def fit_transform(self, X, y):
+        return self.fit(X,y).transform(X)
+
+    def transform(self, X):
+        if not hasattr(self, 'global_tsr_vector'): raise NameError('TSRweighting: transform method called before fit.')
+        X = self.count_vectorizer.transform(X)
+        tf_X = self.tf_vectorizer.transform(X).toarray()
+        weighted_X = np.multiply(tf_X, self.global_tsr_vector)
+        if self.norm is not None and self.norm!='none':
+            weighted_X = sklearn.preprocessing.normalize(weighted_X, norm=self.norm, axis=1, copy=False)
+        return csr_matrix(weighted_X)
--- a/experimental_non_aggregative/method_dxs.py
+++ b/experimental_non_aggregative/method_dxs.py
@ -0,0 +1,208 @@
+from scipy.sparse import issparse
+from sklearn.decomposition import TruncatedSVD
+from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import StandardScaler
+
+import quapy as qp
+from data import LabelledCollection
+import numpy as np
+
+from experimental_non_aggregative.custom_vectorizers import *
+from method._kdey import KDEBase
+from protocol import APP
+from quapy.method.aggregative import HDy, DistributionMatchingY
+from quapy.method.base import BaseQuantifier
+from scipy import optimize
+import pandas as pd
+import quapy.functional as F
+
+
+# TODO: explore the bernoulli (term presence/absence) variant
+# TODO: explore the multinomial (term frequency) variant
+# TODO: explore the multinomial + length normalization variant
+# TODO: consolidate the TSR-variant (e.g., using information gain) variant;
+#   - works better with the idf?
+#   - works better with length normalization?
+#   - etc
+
+class DxS(BaseQuantifier):
+    def __init__(self, vectorizer=None, divergence='topsoe'):
+        self.vectorizer = vectorizer
+        self.divergence = divergence
+
+    # def __as_distribution(self, instances):
+    #     return np.asarray(instances.sum(axis=0) / instances.sum()).flatten()
+
+    def __as_distribution(self, instances):
+        dist = instances.mean(axis=0)
+        return np.asarray(dist).flatten()
+
+    def fit(self, text_instances, labels):
+
+        classes = np.unique(labels)
+
+        if self.vectorizer is not None:
+            text_instances = self.vectorizer.fit_transform(text_instances, y=labels)
+
+        distributions = []
+        for class_i in classes:
+            distributions.append(self.__as_distribution(text_instances[labels == class_i]))
+
+        self.validation_distribution = np.asarray(distributions)
+
+        return self
+
+    def predict(self, text_instances):
+        if self.vectorizer is not None:
+            text_instances = self.vectorizer.transform(text_instances)
+
+        test_distribution = self.__as_distribution(text_instances)
+        divergence = qp.functional.get_divergence(self.divergence)
+        n_classes, n_feats = self.validation_distribution.shape
+
+        def match(prev):
+            prev = np.expand_dims(prev, axis=0)
+            mixture_distribution = (prev @ self.validation_distribution).flatten()
+            return divergence(test_distribution, mixture_distribution)
+
+        # the initial point is set as the uniform distribution
+        uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
+
+        # solutions are bounded to those contained in the unit-simplex
+        bounds = tuple((0, 1) for x in range(n_classes))  # values in [0,1]
+        constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)})  # values summing up to 1
+        r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
+        return r.x
+
+
+
+class KDExML(BaseQuantifier, KDEBase):
+
+    def __init__(self, bandwidth=0.1, standardize=False):
+        self._check_bandwidth(bandwidth)
+        self.bandwidth = bandwidth
+        self.standardize = standardize
+
+    def fit(self, X, y):
+        classes = sorted(np.unique(y))
+
+        if self.standardize:
+            self.scaler = StandardScaler()
+            X = self.scaler.fit_transform(X)
+
+        if issparse(X):
+            X = X.toarray()
+
+        self.mix_densities = self.get_mixture_components(X, y, classes, self.bandwidth)
+        return self
+
+    def predict(self, X):
+        """
+        Searches for the mixture model parameter (the sought prevalence values) that maximizes the likelihood
+        of the data (i.e., that minimizes the negative log-likelihood)
+
+        :param X: instances in the sample
+        :return: a vector of class prevalence estimates
+        """
+        epsilon = 1e-10
+        if issparse(X):
+            X = X.toarray()
+        n_classes = len(self.mix_densities)
+        if self.standardize:
+            X = self.scaler.transform(X)
+        test_densities = [self.pdf(kde_i, X) for kde_i in self.mix_densities]
+
+        def neg_loglikelihood(prev):
+            test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))
+            test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
+            return  -np.sum(test_loglikelihood)
+
+        return F.optim_minimize(neg_loglikelihood, n_classes)
+
+
+
+if __name__ == '__main__':
+
+    qp.environ['SAMPLE_SIZE'] = 250
+    qp.environ['N_JOBS'] = -1
+    min_df = 10
+    # dataset = 'imdb'
+    repeats = 10
+    error = 'mae'
+
+    div = 'topsoe'
+
+    # generates tuples (dataset, method, method_name)
+    # (the dataset is needed for methods that process the dataset differently)
+    def gen_methods():
+
+        for dataset in qp.datasets.REVIEWS_SENTIMENT_DATASETS:
+
+            data = qp.datasets.fetch_reviews(dataset, tfidf=False)
+
+            # bernoulli_vectorizer = CountVectorizer(min_df=min_df, binary=True)
+            # dxs = DxS(divergence=div, vectorizer=bernoulli_vectorizer)
+            # yield data, dxs, 'DxS-Bernoulli'
+            #
+            # multinomial_vectorizer = CountVectorizer(min_df=min_df, binary=False)
+            # dxs = DxS(divergence=div, vectorizer=multinomial_vectorizer)
+            # yield data, dxs, 'DxS-multinomial'
+            #
+            # tf_vectorizer = TfidfVectorizer(sublinear_tf=False, use_idf=False, min_df=min_df, norm=None)
+            # dxs = DxS(divergence=div, vectorizer=tf_vectorizer)
+            # yield data, dxs, 'DxS-TF'
+            #
+            # logtf_vectorizer = TfidfVectorizer(sublinear_tf=True, use_idf=False, min_df=min_df, norm=None)
+            # dxs = DxS(divergence=div, vectorizer=logtf_vectorizer)
+            # yield data, dxs, 'DxS-logTF'
+            #
+            # tfidf_vectorizer = TfidfVectorizer(use_idf=True, min_df=min_df, norm=None)
+            # dxs = DxS(divergence=div, vectorizer=tfidf_vectorizer)
+            # yield data, dxs, 'DxS-TFIDF'
+            #
+            # tfidf_vectorizer = TfidfVectorizer(use_idf=True, min_df=min_df, norm='l2')
+            # dxs = DxS(divergence=div, vectorizer=tfidf_vectorizer)
+            # yield data, dxs, 'DxS-TFIDF-l2'
+
+            tsr_vectorizer = TSRweighting(tsr_function=information_gain, min_df=min_df, norm='l2')
+            dxs = DxS(divergence=div, vectorizer=tsr_vectorizer)
+            yield data, dxs, 'DxS-TFTSR-l2'
+
+            data = qp.datasets.fetch_reviews(dataset, tfidf=True, min_df=min_df)
+
+            kdex = KDExML()
+            reduction = TruncatedSVD(n_components=100, random_state=0)
+            red_data = qp.data.preprocessing.instance_transformation(data, transformer=reduction, inplace=False)
+            yield red_data, kdex, 'KDEx'
+
+            hdy = HDy(LogisticRegression())
+            yield data, hdy, 'HDy'
+
+            # dm = DistributionMatchingY(LogisticRegression(), divergence=div, nbins=5)
+            # yield data, dm, 'DM-5b'
+            #
+            # dm = DistributionMatchingY(LogisticRegression(), divergence=div, nbins=10)
+            # yield data, dm, 'DM-10b'
+
+
+
+
+    result_path = 'results.csv'
+    with open(result_path, 'wt') as csv:
+        csv.write(f'Method\tDataset\tMAE\tMRAE\n')
+        for data, quantifier, quant_name in gen_methods():
+            quantifier.fit(*data.training.Xy)
+            report = qp.evaluation.evaluation_report(quantifier, APP(data.test, repeats=repeats), error_metrics=['mae','mrae'], verbose=True)
+            means = report.mean(numeric_only=True)
+            csv.write(f'{quant_name}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
+
+    df = pd.read_csv(result_path, sep='\t')
+    # print(df)
+
+    pv = df.pivot_table(index='Method', columns="Dataset", values=["MAE", "MRAE"])
+    print(pv)
+
+
+
+
--- a/quapy/init.py
+++ b/quapy/init.py
@ -13,7 +13,7 @@ from . import model_selection
 from . import classification
 import os

-__version__ = '0.2.0'
+__version__ = '0.2.1'


 def _default_cls():
--- a/quapy/data/base.py
+++ b/quapy/data/base.py
@ -33,7 +33,6 @@ class LabelledCollection:
        else:
            self.instances = np.asarray(instances)
        self.labels = np.asarray(labels)
-        n_docs = len(self)
        if classes is None:
            self.classes_ = F.classes_from_labels(self.labels)
        else:
@ -41,7 +40,13 @@ class LabelledCollection:
            self.classes_.sort()
            if len(set(self.labels).difference(set(classes))) > 0:
                raise ValueError(f'labels ({set(self.labels)}) contain values not included in classes_ ({set(classes)})')
-        self.index = {class_: np.arange(n_docs)[self.labels == class_] for class_ in self.classes_}
+        self._index = None
+
+    @property
+    def index(self):
+        if not hasattr(self, '_index') or self._index is None:
+            self._index = {class_: np.arange(len(self))[self.labels == class_] for class_ in self.classes_}
+        return self._index

    @classmethod
    def load(cls, path: str, loader_func: callable, classes=None, **loader_kwargs):
--- a/quapy/data/preprocessing.py
+++ b/quapy/data/preprocessing.py
@ -10,6 +10,37 @@ from quapy.util import map_parallel
 from .base import LabelledCollection


+def instance_transformation(dataset:Dataset, transformer, inplace=False):
+    """
+    Transforms a :class:`quapy.data.base.Dataset` applying the `fit_transform` and `transform` functions
+    of a (sklearn's) transformer.
+
+    :param dataset: a :class:`quapy.data.base.Dataset` where the instances of training and test collections are
+        lists of str
+    :param transformer: TransformerMixin implementing `fit_transform` and `transform` functions
+    :param inplace: whether or not to apply the transformation inplace (True), or to a new copy (False, default)
+    :return: a new :class:`quapy.data.base.Dataset` with transformed instances (if inplace=False) or a reference to the
+        current Dataset (if inplace=True) where the instances have been transformed
+    """
+    training_transformed = transformer.fit_transform(*dataset.training.Xy)
+    test_transformed = transformer.transform(dataset.test.X)
+    orig_name = dataset.name
+
+    if inplace:
+        dataset.training = LabelledCollection(training_transformed, dataset.training.labels, dataset.classes_)
+        dataset.test = LabelledCollection(test_transformed, dataset.test.labels, dataset.classes_)
+        if hasattr(transformer, 'vocabulary_'):
+            dataset.vocabulary = transformer.vocabulary_
+        return dataset
+    else:
+        training = LabelledCollection(training_transformed, dataset.training.labels.copy(), dataset.classes_)
+        test = LabelledCollection(test_transformed, dataset.test.labels.copy(), dataset.classes_)
+        vocab = None
+        if hasattr(transformer, 'vocabulary_'):
+            vocab = transformer.vocabulary_
+        return Dataset(training, test, vocabulary=vocab, name=orig_name)
+
+
 def text2tfidf(dataset:Dataset, min_df=3, sublinear_tf=True, inplace=False, **kwargs):
    """
    Transforms a :class:`quapy.data.base.Dataset` of textual instances into a :class:`quapy.data.base.Dataset` of
@ -29,18 +60,7 @@ def text2tfidf(dataset:Dataset, min_df=3, sublinear_tf=True, inplace=False, **kw
    __check_type(dataset.test.instances, np.ndarray, str)

    vectorizer = TfidfVectorizer(min_df=min_df, sublinear_tf=sublinear_tf, **kwargs)
-    training_documents = vectorizer.fit_transform(dataset.training.instances)
-    test_documents = vectorizer.transform(dataset.test.instances)
-
-    if inplace:
-        dataset.training = LabelledCollection(training_documents, dataset.training.labels, dataset.classes_)
-        dataset.test = LabelledCollection(test_documents, dataset.test.labels, dataset.classes_)
-        dataset.vocabulary = vectorizer.vocabulary_
-        return dataset
-    else:
-        training = LabelledCollection(training_documents, dataset.training.labels.copy(), dataset.classes_)
-        test = LabelledCollection(test_documents, dataset.test.labels.copy(), dataset.classes_)
-        return Dataset(training, test, vectorizer.vocabulary_)
+    return instance_transformation(dataset, vectorizer, inplace)


 def reduce_columns(dataset: Dataset, min_df=5, inplace=False):
--- a/quapy/error.py
+++ b/quapy/error.py
@ -3,6 +3,7 @@
 import numpy as np
 from sklearn.metrics import f1_score
 import quapy as qp
+from functional import CLRtransformation


 def from_name(err_name):
@ -128,6 +129,59 @@ def se(prevs_true, prevs_hat):
    return ((prevs_hat - prevs_true) ** 2).mean(axis=-1)


+def sre(prevs_true, prevs_hat, prevs_train, eps=0.):
+    """
+    The squared ratio error is defined as
+    :math:`SRE(p,\\hat{p},p^{tr})=\\frac{1}{|\\mathcal{Y}|}\\sum_{i \\in \\mathcal{Y}}(w_i-\\hat{w}_i)^2`,
+    where
+    :math:`w_i=\\frac{p_i}{p^{tr}_i}=\\frac{Q(Y=i)}{P(Y=i)}`,
+    and `\\hat{w}_i` is the estimate obtained by replacing the true test prior with an estimate, and
+     :math:`\\mathcal{Y}` are the classes of interest
+    :param prevs_true: array-like, true prevalence values
+    :param prevs_hat: array-like, estimated prevalence values
+    :param prevs_train: array-like, training prevalence values
+    :param eps: float, for smoothing the prevalence values. It is 0 by default (no smoothing), meaning the
+        training prevalence is expected to be >0 everywhere.
+    :return: the squared ratio error
+    """
+    prevs_true = np.asarray(prevs_true)
+    prevs_hat = np.asarray(prevs_hat)
+    prevs_train = np.asarray(prevs_train)
+    assert prevs_true.shape == prevs_hat.shape, f'wrong shape {prevs_true.shape=} vs {prevs_hat.shape=}'
+    assert prevs_true.shape[-1]==prevs_train.shape[-1], 'wrong shape for training prevalence'
+    if eps>0:
+        prevs_true = smooth(prevs_true, eps)
+        prevs_hat = smooth(prevs_hat, eps)
+        prevs_train = smooth(prevs_train, eps)
+
+    N = prevs_true.shape[-1]
+    w = prevs_true / prevs_train
+    w_hat = prevs_hat / prevs_train
+    return (1./N) * (w - w_hat)**2.
+
+
+def msre(prevs_true, prevs_hat, prevs_train, eps=0.):
+    """
+    Returns the mean across all experiments. See :function:`sre`.
+    :param prevs_true: array-like, true prevalence values of shape (n_experiments, n_classes,) or (n_classes,)
+    :param prevs_hat: array-like, estimated prevalence values of shape equal to prevs_true
+    :param prevs_train: array-like, training prevalence values of  (n_experiments, n_classes,) or (n_classes,)
+    :param eps: float, for smoothing the prevalence values. It is 0 by default (no smoothing), meaning the
+        training prevalence is expected to be >0 everywhere.
+    :return: the squared ratio error
+    """
+    return np.mean(squared_ratio_error(prevs_true, prevs_hat, prevs_train, eps))
+
+
+def dist_aitchison(prevs_true, prevs_hat):
+    clr = CLRtransformation()
+    return np.linalg.norm(clr(prevs_true) - clr(prevs_hat), axis=-1)
+
+
+def mean_dist_aitchison(prevs_true, prevs_hat):
+    return np.mean(dist_aitchison(prevs_true, prevs_hat))
+
+
 def mkld(prevs_true, prevs_hat, eps=None):
    """Computes the mean Kullback-Leibler divergence (see :meth:`quapy.error.kld`) across the
    sample pairs. The distributions are smoothed using the `eps` factor
@ -374,8 +428,8 @@ def __check_eps(eps=None):


 CLASSIFICATION_ERROR = {f1e, acce}
-QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld}
-QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld}
+QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld, msre, mean_dist_aitchison}
+QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld, sre, dist_aitchison}
 QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, nrae, mkld, mnkld, mrae}
 CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
 QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
@ -387,6 +441,7 @@ ERROR_NAMES = \
 f1_error = f1e
 acc_error = acce
 mean_absolute_error = mae
+squared_ratio_error = sre
 absolute_error = ae
 mean_relative_absolute_error = mrae
 relative_absolute_error = rae
--- a/quapy/functional.py
+++ b/quapy/functional.py
@ -1,10 +1,15 @@
 import warnings
+from abc import ABC, abstractmethod
 from collections import defaultdict
+from functools import lru_cache
 from typing import Literal, Union, Callable
 from numpy.typing import ArrayLike

 import scipy
 import numpy as np
+from scipy.special import softmax
+
+import quapy as qp


 # ------------------------------------------------------------------------------------------
@ -583,8 +588,8 @@ def solve_adjustment(
    """
    Function that tries to solve for :math:`p` the equation :math:`q = M p`, where :math:`q` is the vector of
    `unadjusted counts` (as estimated, e.g., via classify and count) with :math:`q_i` an estimate of
-    :math:`P(\hat{Y}=y_i)`, and where :math:`M` is the matrix of `class-conditional rates` with :math:`M_{ij}` an
-    estimate of :math:`P(\hat{Y}=y_i|Y=y_j)`.
+    :math:`P(\\hat{Y}=y_i)`, and where :math:`M` is the matrix of `class-conditional rates` with :math:`M_{ij}` an
+    estimate of :math:`P(\\hat{Y}=y_i|Y=y_j)`.

    :param class_conditional_rates: array of shape `(n_classes, n_classes,)` with entry `(i,j)` being the estimate
        of :math:`P(\hat{Y}=y_i|Y=y_j)`, that is, the probability that an instance that belongs to class :math:`y_j`
@ -649,3 +654,96 @@ def solve_adjustment(
        raise ValueError(f'unknown {solver=}')


+# ------------------------------------------------------------------------------------------
+# Transformations from Compositional analysis
+# ------------------------------------------------------------------------------------------
+
+class CompositionalTransformation(ABC):
+    """
+    Abstract class of transformations from compositional data.
+    Basically, callable functions with an "inverse" function.
+    """
+    @abstractmethod
+    def __call__(self, X): ...
+
+    @abstractmethod
+    def inverse(self, Z): ...
+
+    EPSILON=1e-6
+
+
+class CLRtransformation(CompositionalTransformation):
+    """
+    Centered log-ratio (CLR), from compositional analysis
+    """
+    def __call__(self, X):
+        """
+        Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
+        actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
+
+        :param X: np.ndarray of (n_instances, n_dimensions) to be transformed
+        :param epsilon: small float for prevalence smoothing
+        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
+        """
+        X = np.asarray(X)
+        X = qp.error.smooth(X, self.EPSILON)
+        G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True))  # geometric mean
+        return np.log(X / G)
+
+    def inverse(self, Z):
+        """
+        Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
+
+        :param Z: np.ndarray of (n_instances, n_dimensions) to be transformed
+        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
+        """
+        return scipy.special.softmax(Z, axis=-1)
+
+
+class ILRtransformation(CompositionalTransformation):
+    """
+    Isometric log-ratio (ILR), from compositional analysis
+    """
+    def __call__(self, X):
+        X = np.asarray(X)
+        X = qp.error.smooth(X, self.EPSILON)
+        k = X.shape[-1]
+        V = self.get_V(k)  # (k-1, k)
+        logp = np.log(X)
+        return logp @ V.T
+
+    def inverse(self, Z):
+        Z = np.asarray(Z)
+        # get dimension
+        k_minus_1 = Z.shape[-1]
+        k = k_minus_1 + 1
+        V = self.get_V(k)  # (k-1, k)
+        logp = Z @ V
+        p = np.exp(logp)
+        p = p / np.sum(p, axis=-1, keepdims=True)
+        return p
+
+    @lru_cache(maxsize=None)
+    def get_V(self, k):
+        def helmert_matrix(k):
+            """
+            Returns the (k x k) Helmert matrix.
+            """
+            H = np.zeros((k, k))
+            for i in range(1, k):
+                H[i, :i] = 1
+                H[i, i] = -(i)
+                H[i] = H[i] / np.sqrt(i * (i + 1))
+            # row 0 stays zeros; will be discarded
+            return H
+
+        def ilr_basis(k):
+            """
+            Constructs an orthonormal ILR basis using the Helmert submatrix.
+            Output shape: (k-1, k)
+            """
+            H = helmert_matrix(k)
+            V = H[1:, :]  # remove first row of zeros
+            return V
+
+        return ilr_basis(k)
--- a/quapy/method/init.py
+++ b/quapy/method/init.py
@ -29,6 +29,7 @@ AGGREGATIVE_METHODS = {
    aggregative.KDEyHD,
    # aggregative.OneVsAllAggregative,
    confidence.BayesianCC,
+    confidence.PQ,
 }

 BINARY_METHODS = {
@ -40,6 +41,7 @@ BINARY_METHODS = {
    aggregative.MAX,
    aggregative.MS,
    aggregative.MS2,
+    confidence.PQ,
 }

 MULTICLASS_METHODS = {
--- a/quapy/method/_bayesian.py
+++ b/quapy/method/_bayesian.py
@ -1,13 +1,21 @@
 """
 Utility functions for `Bayesian quantification <https://arxiv.org/abs/2302.09159>`_ methods.
 """
+import contextlib
+import os
+import sys
+
 import numpy as np
+import importlib.resources

 try:
    import jax
    import jax.numpy as jnp
    import numpyro
    import numpyro.distributions as dist
+    import stan
+    import logging
+    import stan.common

    DEPENDENCIES_INSTALLED = True
 except ImportError:
@ -15,6 +23,7 @@ except ImportError:
    jnp = None
    numpyro = None
    dist = None
+    stan = None

    DEPENDENCIES_INSTALLED = False

@ -77,3 +86,71 @@ def sample_posterior(
    rng_key = jax.random.PRNGKey(seed)
    mcmc.run(rng_key, n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=n_y_and_c_labeled)
    return mcmc.get_samples()
+
+
+
+def load_stan_file():
+    return importlib.resources.files('quapy.method').joinpath('stan/pq.stan').read_text(encoding='utf-8')
+
+
+@contextlib.contextmanager
+def _suppress_stan_logging():
+    with open(os.devnull, "w") as devnull:
+        old_stderr = sys.stderr
+        sys.stderr = devnull
+        try:
+            yield
+        finally:
+            sys.stderr = old_stderr
+
+
+def pq_stan(stan_code, n_bins, pos_hist, neg_hist, test_hist, number_of_samples, num_warmup, stan_seed):
+    """
+    Perform Bayesian prevalence estimation using a Stan model for probabilistic quantification.
+
+    This function builds and samples from a Stan model that implements a bin-based Bayesian
+    quantifier. It uses the class-conditional histograms of the classifier
+    outputs for positive and negative examples, along with the test histogram, to estimate
+    the posterior distribution of prevalence in the test set.
+
+    Parameters
+    ----------
+    stan_code : str
+        The Stan model code as a string. 
+    n_bins : int
+        Number of bins used to build the histograms for positive and negative examples.
+    pos_hist : array-like of shape (n_bins,)
+        Histogram counts of the classifier outputs for the positive class.
+    neg_hist : array-like of shape (n_bins,)
+        Histogram counts of the classifier outputs for the negative class.
+    test_hist : array-like of shape (n_bins,)
+        Histogram counts of the classifier outputs for the test set, binned using the same bins.
+    number_of_samples : int
+        Number of post-warmup samples to draw from the Stan posterior.
+    num_warmup : int
+        Number of warmup iterations for the sampler.
+    stan_seed : int
+        Random seed for Stan model compilation and sampling, ensuring reproducibility.
+
+    Returns
+    -------
+    prev_samples : numpy.ndarray
+        An array of posterior samples of the prevalence (`prev`) in the test set.
+        Each element corresponds to one draw from the posterior distribution.
+    """
+
+    logging.getLogger("stan.common").setLevel(logging.ERROR)
+
+    stan_data = {
+            'n_bucket': n_bins,
+            'train_neg': neg_hist.tolist(),
+            'train_pos': pos_hist.tolist(),
+            'test': test_hist.tolist(),
+            'posterior': 1
+        }
+
+    with _suppress_stan_logging():
+        stan_model = stan.build(stan_code, data=stan_data, random_seed=stan_seed)
+        fit = stan_model.sample(num_chains=1, num_samples=number_of_samples,num_warmup=num_warmup)
+
+    return fit['prev']
--- a/quapy/method/_kdey.py
+++ b/quapy/method/_kdey.py
@ -15,9 +15,11 @@ class KDEBase:
    """

    BANDWIDTH_METHOD = ['scott', 'silverman']
+    KERNELS = ['gaussian', 'aitchison', 'ilr']
+

    @classmethod
-    def _check_bandwidth(cls, bandwidth):
+    def _check_bandwidth(cls, bandwidth, kernel):
        """
        Checks that the bandwidth parameter is correct

@ -27,32 +29,56 @@ class KDEBase:
        assert bandwidth in KDEBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
            f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values'
        if isinstance(bandwidth, float):
-            assert 0 < bandwidth < 1,  \
-                "the bandwith for KDEy should be in (0,1), since this method models the unit simplex"
+            assert kernel!='gaussian' or (0 < bandwidth < 1),  \
+                ("the bandwidth for a Gaussian kernel in KDEy should be in (0,1), "
+                 "since this method models the unit simplex")
        return bandwidth

-    def get_kde_function(self, X, bandwidth):
+    @classmethod
+    def _check_kernel(cls, kernel):
+        """
+        Checks that the kernel parameter is correct
+
+        :param kernel: str
+        :return: the validated kernel
+        """
+        assert kernel in KDEBase.KERNELS, f'unknown {kernel=}'
+        return kernel
+
+    def get_kde_function(self, X, bandwidth, kernel):
        """
        Wraps the KDE function from scikit-learn.

        :param X: data for which the density function is to be estimated
        :param bandwidth: the bandwidth of the kernel
+        :param kernel: the kernel (valid ones are in KDEBase.KERNELS)
        :return: a scikit-learn's KernelDensity object
        """
+        if kernel == 'aitchison':
+            X = self.clr_transform(X)
+        elif kernel == 'ilr':
+            X = self.ilr_transform(X)
+
        return KernelDensity(bandwidth=bandwidth).fit(X)

-    def pdf(self, kde, X):
+    def pdf(self, kde, X, kernel):
        """
        Wraps the density evalution of scikit-learn's KDE. Scikit-learn returns log-scores (s), so this
        function returns :math:`e^{s}`

        :param kde: a previously fit KDE function
        :param X: the data for which the density is to be estimated
+        :param kernel: the kernel (valid ones are in KDEBase.KERNELS)
        :return: np.ndarray with the densities
        """
+        if kernel == 'aitchison':
+            X = self.clr_transform(X)
+        elif kernel == 'ilr':
+            X = self.ilr_transform(X)
+
        return np.exp(kde.score_samples(X))

-    def get_mixture_components(self, X, y, classes, bandwidth):
+    def get_mixture_components(self, X, y, classes, bandwidth, kernel):
        """
        Returns an array containing the mixture components, i.e., the KDE functions for each class.

@ -60,6 +86,7 @@ class KDEBase:
        :param y: the class labels
        :param n_classes: integer, the number of classes
        :param bandwidth: float, the bandwidth of the kernel
+        :param kernel: the kernel (valid ones are in KDEBase.KERNELS)
        :return: a list of KernelDensity objects, each fitted with the corresponding class-specific covariates
        """
        class_cond_X = []
@ -67,8 +94,19 @@ class KDEBase:
            selX = X[y==cat]
            if selX.size==0:
                selX = [F.uniform_prevalence(len(classes))]
+
            class_cond_X.append(selX)
-        return [self.get_kde_function(X_cond_yi, bandwidth) for X_cond_yi in class_cond_X]
+        return [self.get_kde_function(X_cond_yi, bandwidth, kernel) for X_cond_yi in class_cond_X]
+
+    def clr_transform(self, X):
+        if not hasattr(self, 'clr'):
+            self.clr = F.CLRtransformation()
+        return self.clr(X)
+
+    def ilr_transform(self, X):
+        if not hasattr(self, 'ilr'):
+            self.ilr = F.ILRtransformation()
+        return self.ilr(X)


 class KDEyML(AggregativeSoftQuantifier, KDEBase):
@ -107,17 +145,19 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
        are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
        for `k`); or as a tuple (X,y) defining the specific set of data to use for validation.
    :param bandwidth: float, the bandwidth of the Kernel
+    :param kernel: kernel of KDE, valid ones are in KDEBase.KERNELS
    :param random_state: a seed to be set before fitting any base quantifier (default None)
    """

-    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=0.1,
+    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=0.1, kernel='gaussian',
                 random_state=None):
        super().__init__(classifier, fit_classifier, val_split)
-        self.bandwidth = KDEBase._check_bandwidth(bandwidth)
+        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel)
+        self.kernel = self._check_kernel(kernel)
        self.random_state=random_state

    def aggregation_fit(self, classif_predictions, labels):
-        self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth)
+        self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth, self.kernel)
        return self

    def aggregate(self, posteriors: np.ndarray):
@ -131,10 +171,11 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
        with qp.util.temp_seed(self.random_state):
            epsilon = 1e-10
            n_classes = len(self.mix_densities)
-            test_densities = [self.pdf(kde_i, posteriors) for kde_i in self.mix_densities]
+            test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]

            def neg_loglikelihood(prev):
-                test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))
+                # test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))
+                test_mixture_likelihood = prev @ test_densities
                test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
                return  -np.sum(test_loglikelihood)

@ -191,7 +232,7 @@ class KDEyHD(AggregativeSoftQuantifier, KDEBase):

        super().__init__(classifier, fit_classifier, val_split)
        self.divergence = divergence
-        self.bandwidth = KDEBase._check_bandwidth(bandwidth)
+        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel='gaussian')
        self.random_state=random_state
        self.montecarlo_trials = montecarlo_trials

@ -278,7 +319,7 @@ class KDEyCS(AggregativeSoftQuantifier):

    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=0.1):
        super().__init__(classifier, fit_classifier, val_split)
-        self.bandwidth = KDEBase._check_bandwidth(bandwidth)
+        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel='gaussian')

    def gram_matrix_mix_sum(self, X, Y=None):
        # this adapts the output of the rbf_kernel function (pairwise evaluations of Gaussian kernels k(x,y))
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -673,7 +673,7 @@ class PACC(AggregativeSoftQuantifier):
 class EMQ(AggregativeSoftQuantifier):
    """
    `Expectation Maximization for Quantification <https://ieeexplore.ieee.org/abstract/document/6789744>`_ (EMQ),
-    aka `Saerens-Latinne-Decaestecker` (SLD) algorithm.
+    aka `Saerens-Latinne-Decaestecker` (SLD) algorithm, or `Maximum Likelihood Label Shif` (MLLS).
    EMQ consists of using the well-known `Expectation Maximization algorithm` to iteratively update the posterior
    probabilities generated by a probabilistic classifier and the class prevalence estimates obtained via
    maximum-likelihood estimation, in a mutually recursive way, until convergence.
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@ -1,19 +1,21 @@
 import numpy as np
+from joblib import Parallel, delayed
 from sklearn.base import BaseEstimator
 from sklearn.metrics import confusion_matrix

 import quapy as qp
 import quapy.functional as F
+from quapy.functional import CompositionalTransformation, CLRtransformation, ILRtransformation
 from quapy.method import _bayesian
-from quapy.method.aggregative import AggregativeCrispQuantifier
 from quapy.data import LabelledCollection
-from quapy.method.aggregative import AggregativeQuantifier
+from quapy.method.aggregative import AggregativeQuantifier, AggregativeCrispQuantifier, AggregativeSoftQuantifier, BinaryAggregativeQuantifier
 from scipy.stats import chi2
 from sklearn.utils import resample
 from abc import ABC, abstractmethod
-from scipy.special import softmax, factorial
+from scipy.special import factorial
 import copy
 from functools import lru_cache
+from tqdm import tqdm

 """
 This module provides implementation of different types of confidence regions, and the implementation of Bootstrap
@ -80,6 +82,58 @@ class ConfidenceRegionABC(ABC):
        proportion = np.clip(self.coverage(uniform_simplex), 0., 1.)
        return proportion

+    @property
+    @abstractmethod
+    def samples(self):
+        """ Returns internal samples """
+        ...
+
+    def __contains__(self, p):
+        """
+        Overloads in operator, checks if `p` is contained in the region
+
+        :param p: array-like
+        :return: boolean
+        """
+        p = np.asarray(p)
+        assert p.ndim==1, f'unexpected shape for point parameter'
+        return self.coverage(p)==1.
+
+    def closest_point_in_region(self, p, tol=1e-6, max_iter=30):
+        """
+        Finds the closes point to p that belongs to the region. Assumes the region is convex.
+
+        :param p: array-like, the point
+        :param tol: float, error tolerance
+        :param max_iter: int, max number of iterations
+        :returns: array-like, the closes point to p in the segment between p and the center of the region, that
+            belongs to the region
+        """
+        p = np.asarray(p, dtype=float)
+
+        # if p in region, returns p itself
+        if p in self:
+            return p.copy()
+
+        # center of the region
+        c = self.point_estimate()
+
+        # binary search in [0,1], interpolation parameter
+        # low=closest to p, high=closest to c
+        low, high = 0.0, 1.0
+        for _ in range(max_iter):
+            mid = 0.5 * (low + high)
+            x = p*(1-mid) + c*mid
+            if x in self:
+                high = mid
+            else:
+                low = mid
+            if high - low < tol:
+                break
+
+        in_boundary = p*(1-high) + c*high
+        return in_boundary
+

 class WithConfidenceABC(ABC):
    """
@ -88,20 +142,32 @@ class WithConfidenceABC(ABC):
    METHODS = ['intervals', 'ellipse', 'ellipse-clr']

    @abstractmethod
-    def quantify_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
+    def predict_conf(self, instances, confidence_level=0.95) -> (np.ndarray, ConfidenceRegionABC):
        """
-        Adds the method `quantify_conf` to the interface. This method returns not only the point-estimate, but
+        Adds the method `predict_conf` to the interface. This method returns not only the point-estimate, but
        also the confidence region around it.

        :param instances: a np.ndarray of shape (n_instances, n_features,)
-        :confidence_level: float in (0, 1)
+        :param confidence_level: float in (0, 1), default is 0.95
        :return: a tuple (`point_estimate`, `conf_region`), where `point_estimate` is a np.ndarray of shape
            (n_classes,) and  `conf_region` is an object from :class:`ConfidenceRegionABC`
        """
        ...

+    def quantify_conf(self, instances, confidence_level=0.95) -> (np.ndarray, ConfidenceRegionABC):
+        """
+        Alias to `predict_conf`. This method returns not only the point-estimate, but
+        also the confidence region around it.
+
+        :param instances: a np.ndarray of shape (n_instances, n_features,)
+        :param confidence_level: float in (0, 1), default is 0.95
+        :return: a tuple (`point_estimate`, `conf_region`), where `point_estimate` is a np.ndarray of shape
+            (n_classes,) and  `conf_region` is an object from :class:`ConfidenceRegionABC`
+        """
+        return self.predict_conf(instances=instances, confidence_level=confidence_level)
+
    @classmethod
-    def construct_region(cls, prev_estims, confidence_level=0.95, method='intervals'):
+    def construct_region(cls, prev_estims, confidence_level=0.95, method='intervals')->ConfidenceRegionABC:
        """
        Construct a confidence region given many prevalence estimations.

@ -136,7 +202,7 @@ def simplex_volume(n):
    return 1 / factorial(n)


-def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
+def within_ellipse_prop__(values, mean, prec_matrix, chi2_critical):
    """
    Checks the proportion of values that belong to the ellipse with center `mean` and precision matrix `prec_matrix`
    at a distance `chi2_critical`.
@ -169,110 +235,125 @@ def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
    return within_elipse * 1.0


-class ConfidenceEllipseSimplex(ConfidenceRegionABC):
+def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
    """
-    Instantiates a Confidence Ellipse in the probability simplex.
+        Checks the proportion of values that belong to the ellipse with center `mean` and precision matrix `prec_matrix`
+        at a distance `chi2_critical`.

-    :param X: np.ndarray of shape (n_bootstrap_samples, n_classes)
-    :param confidence_level: float, the confidence level (default 0.95)
+        :param values: a np.ndarray of shape (n_dim,) or (n_values, n_dim,)
+        :param mean: a np.ndarray of shape (n_dim,) with the center of the ellipse
+        :param prec_matrix: a np.ndarray with the precision matrix (inverse of the
+            covariance matrix) of the ellipse. If this inverse cannot be computed
+            then None must be passed
+        :param chi2_critical: float, the chi2 critical value
+
+        :return: float in [0,1], the fraction of values that are contained in the ellipse
+            defined by the mean (center), the precision matrix (shape), and the chi2_critical value (distance).
+            If `values` is only one value, then either 0. (not contained) or 1. (contained) is returned.
+        """
+    if prec_matrix is None:
+        return 0.
+
+    values = np.atleast_2d(values)
+    diff = values - mean
+    d_M_squared = np.sum(diff @ prec_matrix * diff, axis=-1)
+    within_ellipse = d_M_squared <= chi2_critical
+
+    if len(within_ellipse) == 1:
+        return float(within_ellipse[0])
+    else:
+        return float(np.mean(within_ellipse))
+
+
+def closest_point_on_ellipsoid(p, mean, cov, chi2_critical, tol=1e-9, max_iter=100):
+    """
+    Computes the closest point on the ellipsoid defined by:
+        (x - mean)^T cov^{-1} (x - mean) = chi2_critical
    """

-    def __init__(self, X, confidence_level=0.95):
+    p = np.asarray(p)
+    mean = np.asarray(mean)
+    Sigma = np.asarray(cov)

-        assert 0. < confidence_level < 1., f'{confidence_level=} must be in range(0,1)'
+    # Precompute precision matrix
+    P = np.linalg.pinv(Sigma)
+    d = P.shape[0]

-        X = np.asarray(X)
+    # Define v = p - mean
+    v = p - mean

-        self.mean_ = X.mean(axis=0)
-        self.cov_ = np.cov(X, rowvar=False, ddof=1)
+    # If p is inside the ellipsoid, return p itself
+    M_dist = v @ P @ v
+    if M_dist <= chi2_critical:
+        return p.copy()

-        try:
-            self.precision_matrix_ = np.linalg.inv(self.cov_)
-        except:
-            self.precision_matrix_ = None
+    # Function to compute x(lambda)
+    def x_lambda(lmbda):
+        A = np.eye(d) + lmbda * P
+        return mean + np.linalg.solve(A, v)

-        self.dim = X.shape[-1]
-        self.ddof = self.dim - 1
+    # Function whose root we want: f(lambda) = Mahalanobis distance - chi2
+    def f(lmbda):
+        x = x_lambda(lmbda)
+        diff = x - mean
+        return diff @ P @ diff - chi2_critical

-        # critical chi-square value
-        self.confidence_level = confidence_level
-        self.chi2_critical_ = chi2.ppf(confidence_level, df=self.ddof)
+    # Bisection search over lambda >= 0
+    l_low, l_high = 0.0, 1.0

-    def point_estimate(self):
-        """
-        Returns the point estimate, the center of the ellipse.
+    # Increase high until f(high) < 0
+    while f(l_high) > 0:
+        l_high *= 2
+        if l_high > 1e12:
+            raise RuntimeError("Failed to bracket the root.")

-        :return: np.ndarray of shape (n_classes,)
-        """
-        return self.mean_
+    # Bisection
+    for _ in range(max_iter):
+        l_mid = 0.5 * (l_low + l_high)
+        fm = f(l_mid)
+        if abs(fm) < tol:
+            break
+        if fm > 0:
+            l_low = l_mid
+        else:
+            l_high = l_mid

-    def coverage(self, true_value):
-        """
-        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
-        fraction of these that are contained in the region, if more than one value is passed. If only one value is
-        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
-
-        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
-        :return: float in [0,1]
-        """
-        return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_)
-
-
-class ConfidenceEllipseCLR(ConfidenceRegionABC):
-    """
-    Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
-
-    :param X: np.ndarray of shape (n_bootstrap_samples, n_classes)
-    :param confidence_level: float, the confidence level (default 0.95)
-    """
-
-    def __init__(self, X, confidence_level=0.95):
-        self.clr = CLRtransformation()
-        Z = self.clr(X)
-        self.mean_ = np.mean(X, axis=0)
-        self.conf_region_clr = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
-
-    def point_estimate(self):
-        """
-        Returns the point estimate, the center of the ellipse.
-
-        :return: np.ndarray of shape (n_classes,)
-        """
-        # The inverse of the CLR does not coincide with the true mean, because the geometric mean
-        # requires smoothing the prevalence vectors and this affects the softmax (inverse);
-        # return self.clr.inverse(self.mean_) # <- does not coincide
-        return self.mean_
-
-    def coverage(self, true_value):
-        """
-        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
-        fraction of these that are contained in the region, if more than one value is passed. If only one value is
-        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
-
-        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
-        :return: float in [0,1]
-        """
-        transformed_values = self.clr(true_value)
-        return self.conf_region_clr.coverage(transformed_values)
+    l_opt = l_mid
+    return x_lambda(l_opt)


 class ConfidenceIntervals(ConfidenceRegionABC):
    """
    Instantiates a region based on (independent) Confidence Intervals.

-    :param X: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
+    :param bonferroni_correction: bool (default False), if True, a Bonferroni correction
+        is applied to the significance level (`alpha`) before computing confidence intervals.
+        The correction consists of replacing `alpha` with `alpha/n_classes`. When
+        `n_classes=2` the correction is not applied because there is only one verification test
+        since the other class is constrained. This is not necessarily true for n_classes>2.
    """
-    def __init__(self, X, confidence_level=0.95):
+    def __init__(self, samples, confidence_level=0.95, bonferroni_correction=False):
        assert 0 < confidence_level < 1, f'{confidence_level=} must be in range(0,1)'

-        X = np.asarray(X)
+        samples = np.asarray(samples)

-        self.means_ = X.mean(axis=0)
+        self.means_ = samples.mean(axis=0)
        alpha = 1-confidence_level
+        if bonferroni_correction:
+            n_classes = samples.shape[-1]
+            if n_classes>2:
+                alpha = alpha/n_classes
        low_perc = (alpha/2.)*100
        high_perc = (1-alpha/2.)*100
-        self.I_low, self.I_high = np.percentile(X, q=[low_perc, high_perc], axis=0)
+        self.I_low, self.I_high = np.percentile(samples, q=[low_perc, high_perc], axis=0)
+        self._samples = samples
+        self.alpha = alpha
+
+    @property
+    def samples(self):
+        return self._samples

    def point_estimate(self):
        """
@ -297,33 +378,174 @@ class ConfidenceIntervals(ConfidenceRegionABC):

        return proportion

+    def __repr__(self):
+        return '['+', '.join(f'({low:.4f}, {high:.4f})' for (low,high) in zip(self.I_low, self.I_high))+']'

-class CLRtransformation:
+    @property
+    def n_dim(self):
+        return len(self.I_low)
+
+    def winkler_scores(self, true_prev):
+        true_prev = np.asarray(true_prev)
+        assert true_prev.ndim == 1, 'unexpected dimensionality for true_prev'
+        assert len(true_prev)==self.n_dim, \
+            f'unexpected number of dimensions; found {true_prev.ndim}, expected {self.n_dim}'
+
+        def winkler_score(low, high, true_val, alpha):
+            amp = high-low
+            scale_cost = 1./alpha
+            cost = np.max([0, low-true_val], axis=0) + np.max([0, true_val-high], axis=0)
+            return amp + scale_cost*cost
+
+        return np.asarray(
+            [winkler_score(low_i, high_i, true_v, self.alpha)
+                for (low_i, high_i, true_v) in zip(self.I_low, self.I_high, true_prev)]
+        )
+
+    def mean_winkler_score(self, true_prev):
+        return np.mean(self.winkler_scores(true_prev))
+
+
+class ConfidenceEllipseSimplex(ConfidenceRegionABC):
    """
-    Centered log-ratio, from component analysis
+    Instantiates a Confidence Ellipse in the probability simplex.
+
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param confidence_level: float, the confidence level (default 0.95)
    """
-    def __call__(self, X, epsilon=1e-6):
-        """
-        Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
-        actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`

-        :param X: np.ndarray of (n_instances, n_dimensions) to be transformed
-        :param epsilon: small float for prevalence smoothing
-        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
-        """
-        X = np.asarray(X)
-        X = qp.error.smooth(X, epsilon)
-        G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True))  # geometric mean
-        return np.log(X / G)
+    def __init__(self, samples, confidence_level=0.95):

-    def inverse(self, X):
-        """
-        Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
+        assert 0. < confidence_level < 1., f'{confidence_level=} must be in range(0,1)'

-        :param X: np.ndarray of (n_instances, n_dimensions) to be transformed
-        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
+        samples = np.asarray(samples)
+
+        self.mean_ = samples.mean(axis=0)
+        self.cov_ = np.cov(samples, rowvar=False, ddof=1)
+
+        try:
+            self.precision_matrix_ = np.linalg.pinv(self.cov_)
+        except:
+            self.precision_matrix_ = None
+
+        self.dim = samples.shape[-1]
+        self.ddof = self.dim - 1
+
+        # critical chi-square value
+        self.confidence_level = confidence_level
+        self.chi2_critical_ = chi2.ppf(confidence_level, df=self.ddof)
+        self._samples = samples
+        self.alpha = 1.-confidence_level
+
+    @property
+    def samples(self):
+        return self._samples
+
+    def point_estimate(self):
        """
-        return softmax(X, axis=-1)
+        Returns the point estimate, the center of the ellipse.
+
+        :return: np.ndarray of shape (n_classes,)
+        """
+        return self.mean_
+
+    def coverage(self, true_value):
+        """
+        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
+        fraction of these that are contained in the region, if more than one value is passed. If only one value is
+        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
+
+        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
+        :return: float in [0,1]
+        """
+        return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_)
+
+    def closest_point_in_region(self, p, tol=1e-6, max_iter=30):
+        return closest_point_on_ellipsoid(
+            p,
+            mean=self.mean_,
+            cov=self.cov_,
+            chi2_critical=self.chi2_critical_
+        )
+
+
+class ConfidenceEllipseTransformed(ConfidenceRegionABC):
+    """
+    Instantiates a Confidence Ellipse in a transformed space.
+
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param confidence_level: float, the confidence level (default 0.95)
+    """
+
+    def __init__(self, samples, transformation: CompositionalTransformation, confidence_level=0.95):
+        samples = np.asarray(samples)
+        self.transformation = transformation
+        Z = self.transformation(samples)
+        # self.mean_ = np.mean(samples, axis=0)
+        self.mean_ = self.transformation.inverse(np.mean(Z, axis=0))
+        self.conf_region_z = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
+        self._samples = samples
+        self.alpha = 1.-confidence_level
+
+    @property
+    def samples(self):
+        return self._samples
+
+    def point_estimate(self):
+        """
+        Returns the point estimate, the center of the ellipse.
+
+        :return: np.ndarray of shape (n_classes,)
+        """
+        # The inverse of the CLR does not coincide with the true mean, because the geometric mean
+        # requires smoothing the prevalence vectors and this affects the softmax (inverse);
+        # return self.clr.inverse(self.mean_) # <- does not coincide
+        return self.mean_
+
+    def coverage(self, true_value):
+        """
+        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
+        fraction of these that are contained in the region, if more than one value is passed. If only one value is
+        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
+
+        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
+        :return: float in [0,1]
+        """
+        transformed_values = self.transformation(true_value)
+        return self.conf_region_z.coverage(transformed_values)
+
+    def closest_point_in_region(self, p, tol=1e-6, max_iter=30):
+        p_prime = self.transformation(p)
+        b_prime = self.conf_region_z.closest_point_in_region(p_prime, tol=tol, max_iter=max_iter)
+        b = self.transformation.inverse(b_prime)
+        return b
+
+
+class ConfidenceEllipseCLR(ConfidenceEllipseTransformed):
+    """
+    Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
+
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param confidence_level: float, the confidence level (default 0.95)
+    """
+    def __init__(self, samples, confidence_level=0.95):
+        super().__init__(samples, CLRtransformation(), confidence_level=confidence_level)
+
+
+class ConfidenceEllipseILR(ConfidenceEllipseTransformed):
+    """
+    Instantiates a Confidence Ellipse in the Isometric-Log Ratio (CLR) space.
+
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param confidence_level: float, the confidence level (default 0.95)
+    """
+    def __init__(self, samples, confidence_level=0.95):
+        super().__init__(samples, ILRtransformation(), confidence_level=confidence_level)
+
+
+
+
+


 class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
@ -339,6 +561,12 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):

    During inference, the bootstrap repetitions are applied to the pre-classified test instances.

+    See
+    `Moreo, A., Salvati, N.
+    An Efficient Method for Deriving Confidence Intervals in Aggregative Quantification.
+    Learning to Quantify: Methods and Applications (LQ 2025), co-located at ECML-PKDD 2025.
+    pp 12-33 <https://lq-2025.github.io/proceedings/CompleteVolume.pdf>`_
+
    :param quantifier: an aggregative quantifier
    :para n_train_samples: int, the number of training resamplings (defaults to 1, set to > 1 to activate a
        model-based bootstrap approach)
@ -357,7 +585,8 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
                 n_test_samples=500,
                 confidence_level=0.95,
                 region='intervals',
-                 random_state=None):
+                 random_state=None,
+                 verbose=False):

        assert isinstance(quantifier, AggregativeQuantifier), \
            f'base quantifier does not seem to be an instance of {AggregativeQuantifier.__name__}'
@ -374,6 +603,7 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
        self.confidence_level = confidence_level
        self.region = region
        self.random_state = random_state
+        self.verbose = verbose

    def aggregation_fit(self, classif_predictions, labels):
        data = LabelledCollection(classif_predictions, labels, classes=self.classes_)
@ -399,6 +629,24 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
        prev_mean, self.confidence = self.aggregate_conf(classif_predictions)
        return prev_mean

+    def aggregate_conf_sequential__(self, classif_predictions: np.ndarray, confidence_level=None):
+        if confidence_level is None:
+            confidence_level = self.confidence_level
+
+        n_samples = classif_predictions.shape[0]
+        prevs = []
+        with qp.util.temp_seed(self.random_state):
+            for quantifier in self.quantifiers:
+                for i in tqdm(range(self.n_test_samples), desc='resampling', total=self.n_test_samples, disable=not self.verbose):
+                    sample_i = resample(classif_predictions, n_samples=n_samples)
+                    prev_i = quantifier.aggregate(sample_i)
+                    prevs.append(prev_i)
+
+        conf = WithConfidenceABC.construct_region(prevs, confidence_level, method=self.region)
+        prev_estim = conf.point_estimate()
+
+        return prev_estim, conf
+
    def aggregate_conf(self, classif_predictions: np.ndarray, confidence_level=None):
        if confidence_level is None:
            confidence_level = self.confidence_level
@ -407,10 +655,15 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
        prevs = []
        with qp.util.temp_seed(self.random_state):
            for quantifier in self.quantifiers:
-                for i in range(self.n_test_samples):
-                    sample_i = resample(classif_predictions, n_samples=n_samples)
-                    prev_i = quantifier.aggregate(sample_i)
-                    prevs.append(prev_i)
+                results = Parallel(n_jobs=-1)(
+                    delayed(bootstrap_once)(i, classif_predictions, quantifier, n_samples)
+                    for i in range(self.n_test_samples)
+                )
+                prevs.extend(results)
+                # for i in tqdm(range(self.n_test_samples), desc='resampling', total=self.n_test_samples, disable=not self.verbose):
+                #     sample_i = resample(classif_predictions, n_samples=n_samples)
+                #     prev_i = quantifier.aggregate(sample_i)
+                #     prevs.append(prev_i)

        conf = WithConfidenceABC.construct_region(prevs, confidence_level, method=self.region)
        prev_estim = conf.point_estimate()
@ -423,7 +676,7 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
        self.aggregation_fit(classif_predictions, labels)
        return self

-    def quantify_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
+    def predict_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
        predictions = self.quantifier.classify(instances)
        return self.aggregate_conf(predictions, confidence_level=confidence_level)

@ -435,9 +688,16 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
        return self.quantifier._classifier_method()


+def bootstrap_once(i, classif_predictions, quantifier, n_samples):
+    idx = np.random.randint(0, len(classif_predictions), n_samples)
+    sample = classif_predictions[idx]
+    prev = quantifier.aggregate(sample)
+    return prev
+
+
 class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
    """
-    `Bayesian quantification <https://arxiv.org/abs/2302.09159>`_ method,
+    `Bayesian quantification <https://arxiv.org/abs/2302.09159>`_ method (by Albert Ziegler and Paweł Czyż),
    which is a variant of :class:`ACC` that calculates the posterior probability distribution
    over the prevalence vectors, rather than providing a point estimate obtained
    by matrix inversion.
@ -543,9 +803,115 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
        samples = self.sample_from_posterior(classif_predictions)[_bayesian.P_TEST_Y]
        return np.asarray(samples.mean(axis=0), dtype=float)

-    def quantify_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
+    def predict_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
+        if confidence_level is None:
+            confidence_level = self.confidence_level
        classif_predictions = self.classify(instances)
        point_estimate = self.aggregate(classif_predictions)
        samples = self.get_prevalence_samples()  # available after calling "aggregate" function
-        region = WithConfidenceABC.construct_region(samples, confidence_level=self.confidence_level, method=self.region)
+        region = WithConfidenceABC.construct_region(samples, confidence_level=confidence_level, method=self.region)
        return point_estimate, region
+    
+
+class PQ(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
+    """
+    `Precise Quantifier: Bayesian distribution matching quantifier <https://arxiv.org/abs/2507.06061>,
+    which is a variant of :class:`HDy` that calculates the posterior probability distribution
+    over the prevalence vectors, rather than providing a point estimate.
+
+    This method relies on extra dependencies, which have to be installed via:
+    `$ pip install quapy[bayes]`
+
+    :param classifier: a scikit-learn's BaseEstimator, or None, in which case the classifier is taken to be
+        the one indicated in `qp.environ['DEFAULT_CLS']`
+    :param val_split:  specifies the data used for generating classifier predictions. This specification
+        can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to
+        be extracted from the training set; or as an integer (default 5), indicating that the predictions
+        are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
+        for `k`); or as a tuple `(X,y)` defining the specific set of data to use for validation. Set to
+        None when the method does not require any validation data, in order to avoid that some portion of
+        the training data be wasted.
+    :param num_warmup: number of warmup iterations for the STAN sampler (default 500)
+    :param num_samples: number of samples to draw from the posterior (default 1000)
+    :param stan_seed: random seed for the STAN sampler (default 0)
+    :param region: string, set to `intervals` for constructing confidence intervals (default), or to
+        `ellipse` for constructing an ellipse in the probability simplex, or to `ellipse-clr` for
+        constructing an ellipse in the Centered-Log Ratio (CLR) unconstrained space.
+    """
+    def __init__(self,
+                 classifier: BaseEstimator=None,
+                 fit_classifier=True,
+                 val_split: int = 5,
+                 nbins: int = 4,
+                 fixed_bins: bool = False,
+                 num_warmup: int = 500,
+                 num_samples: int = 1_000,
+                 stan_seed: int = 0,
+                 confidence_level: float = 0.95,
+                 region: str = 'intervals'):
+
+        if num_warmup <= 0:
+            raise ValueError(f'parameter {num_warmup=} must be a positive integer')
+        if num_samples <= 0:
+            raise ValueError(f'parameter {num_samples=} must be a positive integer')
+
+        if not _bayesian.DEPENDENCIES_INSTALLED:
+            raise ImportError("Auxiliary dependencies are required. "
+                              "Run `$ pip install quapy[bayes]` to install them.")
+
+        super().__init__(classifier, fit_classifier, val_split)
+        
+        self.nbins = nbins
+        self.fixed_bins = fixed_bins
+        self.num_warmup = num_warmup
+        self.num_samples = num_samples
+        self.stan_seed = stan_seed
+        self.stan_code = _bayesian.load_stan_file()
+        self.confidence_level = confidence_level
+        self.region = region
+
+    def aggregation_fit(self, classif_predictions, labels):
+        y_pred = classif_predictions[:, self.pos_label]
+
+        # Compute bin limits
+        if self.fixed_bins:
+            # Uniform bins in [0,1]
+            self.bin_limits = np.linspace(0, 1, self.nbins + 1)
+        else:
+            # Quantile bins
+            self.bin_limits = np.quantile(y_pred, np.linspace(0, 1, self.nbins + 1))
+
+        # Assign each prediction to a bin
+        bin_indices = np.digitize(y_pred, self.bin_limits[1:-1], right=True)
+
+        # Positive and negative masks
+        pos_mask = (labels == self.pos_label)
+        neg_mask = ~pos_mask
+
+        # Count positives and negatives per bin
+        self.pos_hist = np.bincount(bin_indices[pos_mask], minlength=self.nbins)
+        self.neg_hist = np.bincount(bin_indices[neg_mask], minlength=self.nbins)
+
+    def aggregate(self, classif_predictions):
+        Px_test = classif_predictions[:, self.pos_label]
+        test_hist, _ = np.histogram(Px_test, bins=self.bin_limits)
+        prevs = _bayesian.pq_stan(
+            self.stan_code, self.nbins, self.pos_hist, self.neg_hist, test_hist,
+            self.num_samples, self.num_warmup, self.stan_seed
+        ).flatten()
+        self.prev_distribution = np.vstack([1-prevs, prevs]).T
+        return self.prev_distribution.mean(axis=0)
+
+    def aggregate_conf(self, predictions, confidence_level=None):
+        if confidence_level is None:
+            confidence_level = self.confidence_level
+        point_estimate = self.aggregate(predictions)
+        samples = self.prev_distribution
+        region = WithConfidenceABC.construct_region(samples, confidence_level=confidence_level, method=self.region)
+        return point_estimate, region
+
+    def predict_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
+        predictions = self.classify(instances)
+        return self.aggregate_conf(predictions, confidence_level=confidence_level)
+
+
--- a/quapy/method/non_aggregative.py
+++ b/quapy/method/non_aggregative.py
@ -1,11 +1,17 @@
-from typing import Union, Callable
+from itertools import product
+from tqdm import tqdm
+from typing import Union, Callable, Counter
 import numpy as np
 from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.utils import resample
+from sklearn.preprocessing import normalize

+from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
 from quapy.functional import get_divergence
-from quapy.data import LabelledCollection
 from quapy.method.base import BaseQuantifier, BinaryQuantifier
 import quapy.functional as F
+from scipy.optimize import lsq_linear
+from scipy import sparse


 class MaximumLikelihoodPrevalenceEstimation(BaseQuantifier):
@ -149,53 +155,164 @@ class DMx(BaseQuantifier):
        return F.argmin_prevalence(loss, n_classes, method=self.search)


-# class ReadMe(BaseQuantifier):
-#
-#     def __init__(self, bootstrap_trials=100, bootstrap_range=100, bagging_trials=100, bagging_range=25, **vectorizer_kwargs):
-#         raise NotImplementedError('under development ...')
-#         self.bootstrap_trials = bootstrap_trials
-#         self.bootstrap_range = bootstrap_range
-#         self.bagging_trials = bagging_trials
-#         self.bagging_range = bagging_range
-#         self.vectorizer_kwargs = vectorizer_kwargs
-#
-#     def fit(self, data: LabelledCollection):
-#         X, y = data.Xy
-#         self.vectorizer = CountVectorizer(binary=True, **self.vectorizer_kwargs)
-#         X = self.vectorizer.fit_transform(X)
-#         self.class_conditional_X = {i: X[y==i] for i in range(data.classes_)}
-#
-#     def predict(self, X):
-#         X = self.vectorizer.transform(X)
-#
-#         # number of features
-#         num_docs, num_feats = X.shape
-#
-#         # bootstrap
-#         p_boots = []
-#         for _ in range(self.bootstrap_trials):
-#             docs_idx = np.random.choice(num_docs, size=self.bootstra_range, replace=False)
-#             class_conditional_X = {i: X[docs_idx] for i, X in self.class_conditional_X.items()}
-#             Xboot = X[docs_idx]
-#
-#             # bagging
-#             p_bags = []
-#             for _ in range(self.bagging_trials):
-#                 feat_idx = np.random.choice(num_feats, size=self.bagging_range, replace=False)
-#                 class_conditional_Xbag = {i: X[:, feat_idx] for i, X in class_conditional_X.items()}
-#                 Xbag = Xboot[:,feat_idx]
-#                 p = self.std_constrained_linear_ls(Xbag, class_conditional_Xbag)
-#                 p_bags.append(p)
-#             p_boots.append(np.mean(p_bags, axis=0))
-#
-#         p_mean = np.mean(p_boots, axis=0)
-#         p_std  = np.std(p_bags, axis=0)
-#
-#         return p_mean
-#
-#
-#     def std_constrained_linear_ls(self, X, class_cond_X: dict):
-#         pass
+
+
+class ReadMe(BaseQuantifier, WithConfidenceABC):
+    """
+    ReadMe is a non-aggregative quantification system proposed by
+    `Daniel Hopkins and Gary King, 2007. A method of automated nonparametric content analysis for
+    social science. American Journal of Political Science, 54(1):229–247.
+    <https://onlinelibrary.wiley.com/doi/abs/10.1111/j.1540-5907.2009.00428.x>`_.
+    The idea is to estimate `Q(Y=i)` directly from:
+
+    :math:`Q(X)=\\sum_{i=1} Q(X|Y=i) Q(Y=i)`
+
+    via least-squares regression, i.e., without incurring the cost of computing posterior probabilities.
+    However, this poses a very difficult representation in which the vector `Q(X)` and the matrix `Q(X|Y=i)`
+    can be of very high dimensions. In order to render the problem tracktable, ReadMe performs bagging in
+    the feature space. ReadMe also combines bagging with bootstrap in order to derive confidence intervals
+    around point estimations.
+
+    We use the same default parameters as in the official
+    `R implementation <https://github.com/iqss-research/ReadMeV1/blob/master/R/prototype.R>`_.
+
+    :param prob_model: str ('naive', or 'full'), selects the modality in which the probabilities `Q(X)` and
+        `Q(X|Y)` are to be modelled. Options include "full", which corresponds to the original formulation of
+        ReadMe, in which X is constrained to be a binary matrix (e.g., of term presence/absence) and in which
+        `Q(X)` and `Q(X|Y)` are modelled, respectively, as matrices of `(2^K, 1)` and `(2^K, n)` values, where
+        `K` is the number of columns in the data matrix (i.e., `bagging_range`), and `n` is the number of classes.
+        Of course, this approach is computationally prohibited for large `K`, so the computation is restricted to data
+        matrices with `K<=25` (although we recommend even smaller values of `K`). A much faster model is "naive", which
+        considers the `Q(X)` and `Q(X|Y)` be multinomial distributions under the `bag-of-words` perspective. In this
+        case, `bagging_range` can be set to much larger values. Default is "full" (i.e., original ReadMe behavior).
+    :param bootstrap_trials: int, number of bootstrap trials (default 300)
+    :param bagging_trials: int, number of bagging trials (default 300)
+    :param bagging_range: int, number of features to keep for each bagging trial (default 15)
+    :param confidence_level: float, a value in (0,1) reflecting the desired confidence level (default 0.95)
+    :param region: str in 'intervals', 'ellipse', 'ellipse-clr'; indicates the preferred method for
+        defining the confidence region (see :class:`WithConfidenceABC`)
+    :param random_state: int or None, allows replicability (default None)
+    :param verbose: bool, whether to display information during the process (default False)
+    """
+
+    MAX_FEATURES_FOR_EMPIRICAL_ESTIMATION = 25
+    PROBABILISTIC_MODELS = ["naive", "full"]
+
+    def __init__(self,
+                 prob_model="full",
+                 bootstrap_trials=300,
+                 bagging_trials=300,
+                 bagging_range=15,
+                 confidence_level=0.95,
+                 region='intervals',
+                 random_state=None,
+                 verbose=False):
+        assert prob_model in ReadMe.PROBABILISTIC_MODELS, \
+            f'unknown {prob_model=}, valid ones are {ReadMe.PROBABILISTIC_MODELS=}'
+        self.prob_model = prob_model
+        self.bootstrap_trials = bootstrap_trials
+        self.bagging_trials = bagging_trials
+        self.bagging_range = bagging_range
+        self.confidence_level = confidence_level
+        self.region = region
+        self.random_state = random_state
+        self.verbose = verbose
+
+    def fit(self, X, y):
+        self._check_matrix(X)
+
+        self.rng = np.random.default_rng(self.random_state)
+        self.classes_ = np.unique(y)
+
+
+        Xsize = X.shape[0]
+
+        # Bootstrap loop
+        self.Xboots, self.yboots = [], []
+        for _ in range(self.bootstrap_trials):
+            idx = self.rng.choice(Xsize, size=Xsize, replace=True)
+            self.Xboots.append(X[idx])
+            self.yboots.append(y[idx])
+
+        return self
+
+    def predict_conf(self, X, confidence_level=0.95) -> (np.ndarray, ConfidenceRegionABC):
+        self._check_matrix(X)
+
+        n_features = X.shape[1]
+        boots_prevalences = []
+        for Xboots, yboots in tqdm(
+                zip(self.Xboots, self.yboots),
+                desc='bootstrap predictions', total=self.bootstrap_trials, disable=not self.verbose
+        ):
+            bagging_estimates = []
+            for _ in range(self.bagging_trials):
+                feat_idx = self.rng.choice(n_features, size=self.bagging_range, replace=False)
+                Xboots_bagging = Xboots[:, feat_idx]
+                X_boots_bagging = X[:, feat_idx]
+                bagging_prev = self._quantify_iteration(Xboots_bagging, yboots, X_boots_bagging)
+                bagging_estimates.append(bagging_prev)
+
+            boots_prevalences.append(np.mean(bagging_estimates, axis=0))
+
+        conf = WithConfidenceABC.construct_region(boots_prevalences, confidence_level, method=self.region)
+        prev_estim = conf.point_estimate()
+
+        return prev_estim, conf
+
+    def predict(self, X):
+        prev_estim, _ = self.predict_conf(X)
+        return prev_estim
+
+    def _quantify_iteration(self, Xtr, ytr, Xte):
+        """Single ReadMe estimate."""
+        PX_given_Y = np.asarray([self._compute_P(Xtr[ytr == c]) for i,c in enumerate(self.classes_)])
+        PX = self._compute_P(Xte)
+
+        res = lsq_linear(A=PX_given_Y.T, b=PX, bounds=(0, 1))
+        pY = np.maximum(res.x, 0)
+        return pY / pY.sum()
+
+    def _check_matrix(self, X):
+        """the "full" model requires estimating empirical distributions; due to the high computational cost,
+                this function is only made available for binary matrices"""
+        if self.prob_model == 'full' and not self._is_binary_matrix(X):
+            raise ValueError('the empirical distribution can only be computed efficiently on binary matrices')
+
+    def _is_binary_matrix(self, X):
+        data = X.data if sparse.issparse(X) else X
+        return np.all((data == 0) | (data == 1))
+
+    def _compute_P(self, X):
+        if self.prob_model == 'naive':
+            return self._multinomial_distribution(X)
+        elif self.prob_model == 'full':
+            return self._empirical_distribution(X)
+        else:
+            raise ValueError(f'unknown {self.prob_model}; valid ones are {ReadMe.PROBABILISTIC_MODELS=}')
+
+    def _empirical_distribution(self, X):
+
+        if X.shape[1] > self.MAX_FEATURES_FOR_EMPIRICAL_ESTIMATION:
+            raise ValueError(f'the empirical distribution can only be computed efficiently for dimensions '
+                             f'less or equal than {self.MAX_FEATURES_FOR_EMPIRICAL_ESTIMATION}')
+
+        # we convert every binary row (e.g., 0 0 1 0 1) into the equivalent number (e.g., 5)
+        K = X.shape[1]
+        binary_powers = 1 << np.arange(K-1, -1, -1)  # (2^K, ..., 32, 16, 8, 4, 2, 1)
+        X_as_binary_numbers = X @ binary_powers
+
+        # count occurrences and compute probs
+        counts = np.bincount(X_as_binary_numbers, minlength=2 ** K).astype(float)
+        probs = counts / counts.sum()
+        return probs
+
+    def _multinomial_distribution(self, X):
+        PX = np.asarray(X.sum(axis=0))
+        PX = normalize(PX, norm='l1', axis=1)
+        return PX.ravel()
+
+


 def _get_features_range(X):
--- a/quapy/method/stan/pq.stan
+++ b/quapy/method/stan/pq.stan
@ -0,0 +1,39 @@
+data {
+  int<lower=0> n_bucket;
+  array[n_bucket] int<lower=0> train_pos;
+  array[n_bucket] int<lower=0> train_neg;
+  array[n_bucket] int<lower=0> test;
+  int<lower=0,upper=1> posterior;
+}
+
+transformed data{
+  row_vector<lower=0>[n_bucket] train_pos_rv;
+  row_vector<lower=0>[n_bucket] train_neg_rv;
+  row_vector<lower=0>[n_bucket] test_rv;
+  real n_test;
+
+  train_pos_rv = to_row_vector( train_pos );
+  train_neg_rv = to_row_vector( train_neg );
+  test_rv      = to_row_vector( test );
+  n_test       = sum( test );
+}
+
+parameters {
+  simplex[n_bucket] p_neg;
+  simplex[n_bucket] p_pos;
+  real<lower=0,upper=1> prev_prior;
+}
+
+model {
+  if( posterior ) {
+    target += train_neg_rv * log( p_neg );
+    target += train_pos_rv * log( p_pos );
+    target += test_rv * log( p_neg * ( 1 - prev_prior) + p_pos * prev_prior );
+  }
+}
+
+generated quantities {
+  real<lower=0,upper=1> prev;
+  prev = sum( binomial_rng(test, 1 / ( 1 + (p_neg./p_pos) *(1-prev_prior)/prev_prior ) ) ) / n_test;
+}
+
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@ -410,7 +410,7 @@ def group_params(param_grid: dict):
    """
    classifier_params, quantifier_params = {}, {}
    for key, values in param_grid.items():
-        if key.startswith('classifier__') or key == 'val_split':
+        if 'classifier__' in key or key == 'val_split':
            classifier_params[key] = values
        else:
            quantifier_params[key] = values
--- a/quapy/plot.py
+++ b/quapy/plot.py
@ -80,7 +80,7 @@ def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=No
        train_prev = train_prev[pos_class]
        ax.scatter(train_prev, train_prev, c='c', label='tr-prev', linewidth=2, edgecolor='k', s=100, zorder=3)

-    ax.set(xlabel='true prevalence', ylabel='estimated prevalence', title=title)
+    ax.set(xlabel='true frequency', ylabel='estimated frequency', title=title)
    ax.set_ylim(0, 1)
    ax.set_xlim(0, 1)

@ -241,6 +241,7 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
                   title=None,
                   vlines=None,
                   method_order=None,
+                   fontsize=18,
                   savepath=None):
    """
    Plots the error (along the x-axis, as measured in terms of `error_name`) as a function of the train-test shift
@ -276,6 +277,8 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
    :return: returns (fig, ax) matplotlib objects for eventual customisation
    """

+    plt.rcParams['font.size'] = fontsize
+
    fig, ax = plt.subplots()
    ax.grid()

@ -311,7 +314,7 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
        for p,ind in enumerate(range(len(bins))):
            selected = inds==ind
            if selected.sum() > 0:
-                xs.append(ind*binwidth-binwidth/2)
+                xs.append(ind*binwidth)
                ys.append(np.mean(method_drifts[selected]))
                ystds.append(np.std(method_drifts[selected]))
                npoints[p] += len(method_drifts[selected])
@ -320,6 +323,9 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
        ys = np.asarray(ys)
        ystds = np.asarray(ystds)

+        # if ys[-1]<ys[-2]:
+        #     ys[-1] = ys[-2]+(abs(ys[-2]-ys[-3]))/2
+
        min_x_method, max_x_method, min_y_method, max_y_method = xs.min(), xs.max(), ys.min(), ys.max()
        min_x = min_x_method if min_x is None or min_x_method < min_x else min_x
        max_x = max_x_method if max_x is None or max_x_method > max_x else max_x
@ -342,8 +348,8 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
        ax2.spines['right'].set_color('g')
        ax2.tick_params(axis='y', colors='g')
    
-    ax.set(xlabel=f'Prior shift between training set and test sample',
-           ylabel=f'{error_name.upper()} (true prev, predicted prev)',
+    ax.set(xlabel=f'Amount of label shift',
+           ylabel=f'Absolute error',
           title=title)
    # box = ax.get_position()
    # ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
@ -357,9 +363,10 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
        ax.set_ylim(0,10 ** math.ceil(math.log10(max_y)))
    
    if show_legend:
-        fig.legend(loc='center left',
-                  bbox_to_anchor=(1, 0.5),
-                  ncol=1)
+        ax.legend(loc='center right', bbox_to_anchor=(1.31, 0.5))
+        # fig.legend(loc='lower center',
+        #           bbox_to_anchor=(1, 0.5),
+        #           ncol=(len(method_names)+1)//2)

    _save_or_show(savepath)

--- a/setup.py
+++ b/setup.py
@ -111,6 +111,12 @@ setup(
    #
    packages=find_packages(include=['quapy', 'quapy.*']),  # Required

+    package_data={
+        # For the 'quapy.method' package, include all files
+        # in the 'stan' subdirectory that end with .stan
+        'quapy.method': ['stan/*.stan']
+    },
+
    python_requires='>=3.8, <4',

    install_requires=['scikit-learn', 'pandas', 'tqdm', 'matplotlib', 'joblib', 'xlrd', 'abstention', 'ucimlrepo', 'certifi'],
@ -124,7 +130,7 @@ setup(
    # Similar to `install_requires` above, these must be valid existing
    # projects.
    extras_require={  # Optional
-       'bayes': ['jax', 'jaxlib', 'numpyro'],
+       'bayes': ['jax', 'jaxlib', 'numpyro', 'pystan'],
       'neural': ['torch'],
       'tests': ['certifi'],
       'docs' : ['sphinx-rtd-theme', 'myst-parser'],
Author	SHA1	Message	Date
Alejandro Moreo Fernandez	1661a79dbb	quantification over time experiment	2025-12-10 19:43:08 +01:00
Alejandro Moreo Fernandez	5c2554861c	new plots	2025-12-10 13:11:49 +01:00
Alejandro Moreo Fernandez	1eebfbc709	refactor folders	2025-12-10 13:10:03 +01:00
Alejandro Moreo Fernandez	8c063afd1e	merged from devel	2025-12-10 12:06:03 +01:00
Alejandro Moreo Fernandez	befe7503bd	merged from devel	2025-12-10 12:05:10 +01:00
Alejandro Moreo Fernandez	3824a19e10	merged from cacm	2025-12-10 11:57:57 +01:00
Alejandro Moreo Fernandez	b2e2794012	renaming branch	2025-12-10 11:52:32 +01:00
Alejandro Moreo Fernandez	89a8cad0b3	atichison distance and tests for evaluating regions	2025-12-09 18:12:06 +01:00
Alejandro Moreo Fernandez	c492415977	adding bootstrap-emq	2025-12-08 12:40:44 +01:00
Alejandro Moreo Fernandez	7cb4bd550f	adding bootstrap-emq	2025-12-08 12:31:23 +01:00
Alejandro Moreo Fernandez	7b75954f9b	adding bootstrap-emq	2025-12-08 12:17:54 +01:00
Alejandro Moreo Fernandez	7342e57cda	trying with emcee with no success...	2025-12-08 12:14:43 +01:00
Alejandro Moreo Fernandez	b2750ab6ea	added temperature, and coverage increases!	2025-12-06 14:06:14 +01:00
Alejandro Moreo Fernandez	602b89bd21	added temperature, and coverage increases!	2025-12-06 13:53:34 +01:00
Alejandro Moreo Fernandez	84c11d956b	added temperature, and coverage increases!	2025-12-06 13:51:49 +01:00
Alejandro Moreo Fernandez	7c7af2cfaf	added temperature, and coverage increases!	2025-12-06 13:50:15 +01:00
Alejandro Moreo Fernandez	2e0068b6ae	import fix	2025-12-06 13:36:13 +01:00
Alejandro Moreo Fernandez	d52fc40d2b	ilr added	2025-12-06 13:20:11 +01:00
Alejandro Moreo Fernandez	59ef17c86c	ilr only on multiclass	2025-12-04 20:10:25 +01:00
Alejandro Moreo Fernandez	a6974b7624	adding experiment with ILR	2025-12-04 20:03:30 +01:00
Alejandro Moreo Fernandez	e8d175106f	adding experiment with ILR	2025-12-04 20:02:26 +01:00
Alejandro Moreo Fernandez	b180aae16c	added ILR to conf regions	2025-12-04 19:16:52 +01:00
Alejandro Moreo Fernandez	d87625bd09	adding new config for bayesian	2025-12-04 18:27:15 +01:00
Alejandro Moreo Fernandez	90981088b0	launching PQ	2025-12-04 18:00:01 +01:00
Alejandro Moreo Fernandez	78fd05ab33	repairing safe check	2025-12-04 15:43:31 +01:00
Alejandro Moreo Fernandez	1080973d25	recomputing confidence regions	2025-12-04 15:42:00 +01:00
Alejandro Moreo Fernandez	bfb6482410	launching kde**	2025-12-04 10:34:38 +01:00
Alejandro Moreo Fernandez	23608f2038	adding exploration in CLR	2025-12-04 10:24:02 +01:00
Alejandro Moreo Fernandez	881e1033f1	lauching experiments	2025-11-26 15:19:08 +01:00
Alejandro Moreo Fernandez	a3f0008a2a	Merge branch 'devel' into kdeplus	2025-11-18 10:13:56 +01:00
Alejandro Moreo Fernandez	6db659e3c4	unifying n_bins in PQ and DMy	2025-11-18 10:13:34 +01:00
Alejandro Moreo Fernandez	277a2e617f	added aggregative bootstrap	2025-11-18 10:12:41 +01:00
Alejandro Moreo Fernandez	be465712cd	Merge branch 'devel' into kdeplus	2025-11-17 17:55:15 +01:00
Alejandro Moreo Fernandez	db49cd31be	Merge branch 'devel' of github.com:HLT-ISTI/QuaPy into devel	2025-11-17 17:54:09 +01:00
Alejandro Moreo Fernandez	9da4fd57db	added property samples to confidence regions	2025-11-17 17:53:56 +01:00
Alejandro Moreo Fernandez	fdc0560ccc	no optim classifier	2025-11-17 17:47:14 +01:00
Alejandro Moreo Fernandez	fd62e73d2d	adding bootstrap variants	2025-11-17 12:37:12 +01:00
Alejandro Moreo Fernandez	12b431ef4b	scripting experiments binary and multiclass	2025-11-17 12:22:40 +01:00
Alejandro Moreo Fernandez	2f83a520c7	drafting experiments	2025-11-16 12:42:26 +01:00
Alejandro Moreo Fernandez	4255098ba7	creating class BayesianKDEy	2025-11-15 21:47:30 +01:00
Alejandro Moreo Fernandez	8a8988d2de	Merge branch 'kdeplus' of gitea-s2i2s.isti.cnr.it:moreo/QuaPy into kdeplus	2025-11-15 19:01:42 +01:00
Alejandro Moreo Fernandez	f324ca049e	Merge branch 'devel' into kdeplus	2025-11-15 18:57:56 +01:00
Alejandro Moreo Fernandez	5f6a151263	Merge branch 'pglez82-precisequant' into devel	2025-11-15 18:55:38 +01:00
Alejandro Moreo Fernandez	047cb9e533	merged	2025-11-15 18:54:04 +01:00
Alejandro Moreo Fernandez	6388d9b549	merged	2025-11-15 18:03:06 +01:00
Alejandro Moreo Fernandez	d9cf6cc11d	index in labelled collection from versions restored	2025-11-15 17:56:37 +01:00
pglez82	46e7246f3a	adding stan file to setup	2025-11-15 17:04:55 +01:00
pglez82	c6492a0f20	fixing import	2025-11-15 17:04:44 +01:00
pglez82	e4c07e1835	changing the way the file is loaded	2025-11-15 16:51:48 +01:00
Alejandro Moreo Fernandez	c2044cb200	Merge branch 'precisequant' of github.com:pglez82/QuaPy into pglez82-precisequant	2025-11-15 15:03:20 +01:00
pglez82	3268e9fada	PQ (precise quantifier)	2025-11-14 18:35:40 +01:00
Alejandro Moreo Fernandez	a868d2d561	import fix	2025-11-14 16:10:17 +01:00
Alejandro Moreo Fernandez	ccb634fae5	step rate adaptation	2025-11-14 16:09:34 +01:00
Alejandro Moreo Fernandez	3dba708fe4	Merge branch 'devel' into kdeplus	2025-11-14 12:09:23 +01:00
Alejandro Moreo Fernandez	7ad5311fac	merging from devel	2025-11-14 12:07:32 +01:00
Alejandro Moreo Fernandez	400edfdb63	bayesian plot	2025-11-13 19:43:07 +01:00
Alejandro Moreo Fernandez	3c09b1c98a	adding prev@densities in KDEyML, huge effiency improvement...	2025-11-13 18:45:07 +01:00
Alejandro Moreo Fernandez	3e7a431d26	bayeisan kdey	2025-11-13 18:43:03 +01:00
Alejandro Moreo Fernandez	f227ed2f60	adding kdex	2025-10-23 14:12:39 +02:00
Alejandro Moreo Fernandez	41baeb78ca	lazy index construction in labelled collection	2025-10-23 12:35:01 +02:00
Alejandro Moreo Fernandez	088ebcdd31	adding non aggregative methods experimental	2025-10-23 12:10:21 +02:00
Alejandro Moreo Fernandez	c11b99e08a	improved ReadMe method	2025-10-22 18:51:35 +02:00
Alejandro Moreo Fernandez	854b3ba3f9	documented ReadMe	2025-10-20 18:33:45 +02:00
Alejandro Moreo Fernandez	eafe486893	adding readme to non-aggregative	2025-10-20 18:13:34 +02:00
Alejandro Moreo Fernandez	1fb8500e87	improved doc	2025-10-09 12:49:08 +02:00
Alejandro Moreo Fernandez	d597820a59	missing doc of confidence methods	2025-10-09 12:10:26 +02:00
Alejandro Moreo Fernandez	010676df12	starting devel 0.2.1	2025-10-07 10:27:59 +02:00
Alejandro Moreo Fernandez	bd9f8e2cb4	show results fix	2025-09-30 12:02:13 +02:00
Alejandro Moreo Fernandez	79f3709e6f	adding mrae	2025-09-27 18:12:56 +02:00
Alejandro Moreo Fernandez	e4cb7868c7	adding mrae	2025-09-27 18:03:24 +02:00
Alejandro Moreo Fernandez	606ec2b89c	with tables in pdf	2025-09-27 18:01:39 +02:00
Alejandro Moreo Fernandez	27afea8bf1	with tables in pdf	2025-09-27 18:01:21 +02:00
Alejandro Moreo Fernandez	29bb261f62	testing kde normal	2025-09-27 17:47:52 +02:00
Alejandro Moreo Fernandez	c3fd92efde	experiment	2025-09-27 17:41:12 +02:00
Alejandro Moreo Fernandez	bb0950fad5	code used to generate plots	2023-11-13 12:07:59 +01:00
Alejandro Moreo Fernandez	2e992a0b9a	choosing plots for paper	2023-11-10 14:22:43 +01:00