quantification over time experiment

new plots
refactor folders
2025-12-10 19:43:08 +01:00 · 2025-12-10 13:11:49 +01:00 · 2025-12-10 13:10:03 +01:00 · 2025-12-10 12:06:03 +01:00 · 2025-12-10 12:05:10 +01:00 · 2025-12-10 11:57:57 +01:00
40 changed files with 3613 additions and 210 deletions
--- a/BayesianKDEy/TODO.txt
+++ b/BayesianKDEy/TODO.txt
@ -0,0 +1,7 @@
 - Add other methods that natively provide uncertainty quantification methods?
 - MPIW (Mean Prediction Interval Width): is the average of the amplitudes (w/o aggregating coverage whatsoever)
 - Implement Interval Score or Winkler Score
 - analyze across shift
 - add Bayesian EM
 - optimize also C and class_weight?
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@ -0,0 +1,215 @@
 from numpy.ma.core import shape
 from sklearn.base import BaseEstimator
 import numpy as np
 import quapy.util
 from quapy.method._kdey import KDEBase
 from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
 from quapy.functional import CLRtransformation, ILRtransformation
 from quapy.method.aggregative import AggregativeSoftQuantifier
 from tqdm import tqdm
 import quapy.functional as F
 #import emcee
 class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
    """
    `Bayesian version of KDEy.
    :param classifier: a scikit-learn's BaseEstimator, or None, in which case the classifier is taken to be
        the one indicated in `qp.environ['DEFAULT_CLS']`
    :param val_split:  specifies the data used for generating classifier predictions. This specification
        can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to
        be extracted from the training set; or as an integer (default 5), indicating that the predictions
        are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
        for `k`); or as a tuple `(X,y)` defining the specific set of data to use for validation. Set to
        None when the method does not require any validation data, in order to avoid that some portion of
        the training data be wasted.
    :param num_warmup: number of warmup iterations for the MCMC sampler (default 500)
    :param num_samples: number of samples to draw from the posterior (default 1000)
    :param mcmc_seed: random seed for the MCMC sampler (default 0)
    :param confidence_level: float in [0,1] to construct a confidence region around the point estimate (default 0.95)
    :param region: string, set to `intervals` for constructing confidence intervals (default), or to
        `ellipse` for constructing an ellipse in the probability simplex, or to `ellipse-clr` for
        constructing an ellipse in the Centered-Log Ratio (CLR) unconstrained space.
    :param verbose: bool, whether to display progress bar
    """
    def __init__(self,
                 classifier: BaseEstimator=None,
                 fit_classifier=True,
                 val_split: int = 5,
                 kernel='gaussian',
                 bandwidth=0.1,
                 num_warmup: int = 500,
                 num_samples: int = 1_000,
                 mcmc_seed: int = 0,
                 confidence_level: float = 0.95,
                 region: str = 'intervals',
                 explore='simplex',
                 step_size=0.05,
                 temperature=1.,
                 verbose: bool = False):
        if num_warmup <= 0:
            raise ValueError(f'parameter {num_warmup=} must be a positive integer')
        if num_samples <= 0:
            raise ValueError(f'parameter {num_samples=} must be a positive integer')
        assert explore in ['simplex', 'clr', 'ilr'], \
            f'unexpected value for param {explore=}; valid ones are "simplex", "clr", and "ilr"'
        assert temperature>0., f'temperature must be >0'
        super().__init__(classifier, fit_classifier, val_split)
        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel)
        self.kernel = self._check_kernel(kernel)
        self.num_warmup = num_warmup
        self.num_samples = num_samples
        self.mcmc_seed = mcmc_seed
        self.confidence_level = confidence_level
        self.region = region
        self.explore = explore
        self.step_size = step_size
        self.temperature = temperature
        self.verbose = verbose
    def aggregation_fit(self, classif_predictions, labels):
        self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth, self.kernel)
        return self
    def aggregate(self, classif_predictions):
        # self.prevalence_samples = self._bayesian_kde(classif_predictions, init=None, verbose=self.verbose)
        self.prevalence_samples = self._bayesian_emcee(classif_predictions)
        return self.prevalence_samples.mean(axis=0)
    def predict_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
        if confidence_level is None:
            confidence_level = self.confidence_level
        classif_predictions = self.classify(instances)
        point_estimate = self.aggregate(classif_predictions)
        samples = self.prevalence_samples  # available after calling "aggregate" function
        region = WithConfidenceABC.construct_region(samples, confidence_level=confidence_level, method=self.region)
        return point_estimate, region
    def _bayesian_kde(self, X_probs, init=None, verbose=False):
        """
        Bayes:
            P(prev|data) = P(data|prev) P(prev) / P(data)
        i.e.,
            posterior = likelihood * prior / evidence
        we assume the likelihood be:
            prev @ [kde_i_likelihood(data) 1..i..n]
            prior be uniform in simplex
        """
        rng = np.random.default_rng(self.mcmc_seed)
        kdes = self.mix_densities
        test_densities = np.asarray([self.pdf(kde_i, X_probs, self.kernel) for kde_i in kdes])
        def log_likelihood(prev, epsilon=1e-10):
            test_likelihoods = prev @ test_densities
            test_loglikelihood = np.log(test_likelihoods + epsilon)
            return (1./self.temperature) * np.sum(test_loglikelihood)
        # def log_prior(prev):
            # todo: adapt to arbitrary prior knowledge (e.g., something around training prevalence)
            # return 1/np.sum((prev-init)**2) # it is not 1 but we assume uniform, son anyway is an useless constant
        # def log_prior(prev, alpha_scale=1000):
        #     alpha = np.array(init) * alpha_scale
        #     return dirichlet.logpdf(prev, alpha)
        def log_prior(prev):
            return 0
        def sample_neighbour(prev, step_size):
            # random-walk Metropolis-Hastings
            d = len(prev)
            neighbour = None
            if self.explore=='simplex':
                dir_noise = rng.normal(scale=step_size/np.sqrt(d), size=d)
                neighbour = F.normalize_prevalence(prev + dir_noise, method='mapsimplex')
            elif self.explore=='clr':
                clr = CLRtransformation()
                clr_point = clr(prev)
                dir_noise = rng.normal(scale=step_size, size=d)
                clr_neighbour = clr_point+dir_noise
                neighbour = clr.inverse(clr_neighbour)
                assert in_simplex(neighbour), 'wrong CLR transformation'
            elif self.explore=='ilr':
                ilr = ILRtransformation()
                ilr_point = ilr(prev)
                dir_noise = rng.normal(scale=step_size, size=d-1)
                ilr_neighbour = ilr_point + dir_noise
                neighbour = ilr.inverse(ilr_neighbour)
                assert in_simplex(neighbour), 'wrong ILR transformation'
            return neighbour
        n_classes = X_probs.shape[1]
        current_prev = F.uniform_prevalence(n_classes) if init is None else init
        current_likelihood = log_likelihood(current_prev) + log_prior(current_prev)
        # Metropolis-Hastings with adaptive rate
        step_size = self.step_size
        target_acceptance = 0.3
        adapt_rate = 0.05
        acceptance_history = []
        samples = []
        total_steps = self.num_samples + self.num_warmup
        for i in tqdm(range(total_steps), total=total_steps, disable=not verbose):
            proposed_prev = sample_neighbour(current_prev, step_size)
            # probability of acceptance
            proposed_likelihood = log_likelihood(proposed_prev) + log_prior(proposed_prev)
            acceptance = proposed_likelihood - current_likelihood
            # decide acceptance
            accepted = np.log(rng.random()) < acceptance
            if accepted:
                current_prev = proposed_prev
                current_likelihood = proposed_likelihood
            samples.append(current_prev)
            acceptance_history.append(1. if accepted else 0.)
            # if i < self.num_warmup and i%10==0 and len(acceptance_history)>=100:
            if i % 10 == 0 and len(acceptance_history) >= 100:
                recent_accept_rate = np.mean(acceptance_history[-100:])
                step_size *= np.exp(adapt_rate * (recent_accept_rate - target_acceptance))
                # step_size = float(np.clip(step_size, min_step, max_step))
                if i %100==0:
                    print(f'acceptance-rate={recent_accept_rate*100:.3f}%, step-size={step_size:.5f}')
        # remove "warmup" initial iterations
        samples = np.asarray(samples[self.num_warmup:])
        return samples
    def _bayesian_emcee(self, X_probs):
        ndim = X_probs.shape[1]
        nwalkers = 32
        f = CLRtransformation()
        def log_likelihood(unconstrained, test_densities, epsilon=1e-10):
            prev = f.inverse(unconstrained)
            test_likelihoods = prev @ test_densities
            test_loglikelihood = np.log(test_likelihoods + epsilon)
            return np.sum(test_loglikelihood)
        kdes = self.mix_densities
        test_densities = np.asarray([self.pdf(kde_i, X_probs, self.kernel) for kde_i in kdes])
        # p0 = np.random.normal(nwalkers, ndim)
        p0 = F.uniform_prevalence_sampling(ndim, nwalkers)
        p0 = f(p0)
        sampler = emcee.EnsembleSampler(nwalkers, ndim, log_likelihood, args=[test_densities])
        state = sampler.run_mcmc(p0, self.num_warmup, skip_initial_state_check=True)
        sampler.reset()
        sampler.run_mcmc(state, self.num_samples, skip_initial_state_check=True)
        samples = sampler.get_chain(flat=True)
        samples = f.inverse(samples)
        return samples
 def in_simplex(x):
    return np.all(x >= 0) and np.isclose(x.sum(), 1)
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@ -0,0 +1,196 @@
 import os
 import warnings
 from os.path import join
 from pathlib import Path
 from sklearn.calibration import CalibratedClassifierCV
 from sklearn.linear_model import LogisticRegression as LR
 from sklearn.model_selection import GridSearchCV, StratifiedKFold
 from copy import deepcopy as cp
 import quapy as qp
 from BayesianKDEy._bayeisan_kdey import BayesianKDEy
 from build.lib.quapy.data import LabelledCollection
 from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ
 from quapy.method.base import BinaryQuantifier, BaseQuantifier
 from quapy.model_selection import GridSearchQ
 from quapy.data import Dataset
 # from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
 from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ, WithConfidenceABC, AggregativeBootstrap
 from quapy.functional import strprev
 from quapy.method.aggregative import KDEyML, ACC
 from quapy.protocol import UPP
 import quapy.functional as F
 import numpy as np
 from tqdm import tqdm
 from scipy.stats import dirichlet
 from collections import defaultdict
 from time import time
 from sklearn.base import clone, BaseEstimator
 class KDEyCLR(KDEyML):
    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
        super().__init__(
            classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
            random_state=random_state, kernel='aitchison'
        )
 class KDEyILR(KDEyML):
    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
        super().__init__(
            classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
            random_state=random_state, kernel='ilr'
        )
 def methods():
    """
    Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
    - name: is a str representing the name of the method (e.g., 'BayesianKDEy')
    - quantifier: is the base model (e.g., KDEyML())
    - hyperparams: is a dictionary for the quantifier (e.g., {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]})
    - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
        quantifier with optimized hyperparameters
    """
    acc_hyper = {}
    emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs']}
    hdy_hyper = {'nbins': [3,4,5,8,16,32]}
    kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
    kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
    multiclass_method = 'multiclass'
    only_binary = 'only_binary'
    only_multiclass = 'only_multiclass'
    # yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
    # yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0), multiclass_method
    yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method
    # yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
    # yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary
    #
    # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
    # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
    # yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
    # yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
    # yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method
    # yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), only_multiclass
    # yield 'BayKDEy*ILR2', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, explore='ilr', step_size=.1, **hyper), only_multiclass
 def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
    with qp.util.temp_seed(0):
        print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
        # model selection
        if len(grid)>0:
            train, val  = train.split_stratified(train_prop=0.6, random_state=0)
            mod_sel = GridSearchQ(
                model=point_quantifier,
                param_grid=grid,
                protocol=qp.protocol.UPP(val, repeats=250, random_state=0),
                refit=False,
                n_jobs=-1,
                verbose=True
            ).fit(*train.Xy)
            best_params = mod_sel.best_params_
        else:
            best_params = {}
        return best_params
 def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method_name:str, grid: dict, withconf_constructor, hyper_choice_path: Path):
    with qp.util.temp_seed(0):
        training, test = dataset.train_test
        # model selection
        best_hyperparams = qp.util.pickled_resource(
            hyper_choice_path, model_selection, training, cp(point_quantifier), grid
        )
        t_init = time()
        withconf_quantifier = withconf_constructor(best_hyperparams).fit(*training.Xy)
        tr_time = time() - t_init
        # test
        train_prevalence = training.prevalence()
        results = defaultdict(list)
        test_generator = UPP(test, repeats=100, random_state=0)
        for i, (sample_X, true_prevalence) in tqdm(enumerate(test_generator()), total=test_generator.total(), desc=f'{method_name} predictions'):
            t_init = time()
            point_estimate, region = withconf_quantifier.predict_conf(sample_X)
            ttime = time()-t_init
            results['true-prevs'].append(true_prevalence)
            results['point-estim'].append(point_estimate)
            results['shift'].append(qp.error.ae(true_prevalence, train_prevalence))
            results['ae'].append(qp.error.ae(prevs_true=true_prevalence, prevs_hat=point_estimate))
            results['rae'].append(qp.error.rae(prevs_true=true_prevalence, prevs_hat=point_estimate))
            results['coverage'].append(region.coverage(true_prevalence))
            results['amplitude'].append(region.montecarlo_proportion(n_trials=50_000))
            results['test-time'].append(ttime)
            results['samples'].append(region.samples)
        report = {
            'optim_hyper': best_hyperparams,
            'train_time': tr_time,
            'train-prev': train_prevalence,
            'results': {k:np.asarray(v) for k,v in results.items()}
        }
        return report
 def experiment_path(dir:Path, dataset_name:str, method_name:str):
    os.makedirs(dir, exist_ok=True)
    return dir/f'{dataset_name}__{method_name}.pkl'
 if __name__ == '__main__':
    binary = {
        'datasets': qp.datasets.UCI_BINARY_DATASETS,
        'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
        'sample_size': 500
    }
    multiclass = {
        'datasets': qp.datasets.UCI_MULTICLASS_DATASETS,
        'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
        'sample_size': 1000
    }
    result_dir = Path('./results')
    for setup in [binary, multiclass]: # [binary, multiclass]:
        qp.environ['SAMPLE_SIZE'] = setup['sample_size']
        for data_name in setup['datasets']:
            print(f'dataset={data_name}')
            # if data_name=='breast-cancer' or data_name.startswith("cmc") or data_name.startswith("ctg"):
            #     print(f'skipping dataset: {data_name}')
            #     continue
            data = setup['fetch_fn'](data_name)
            is_binary = data.n_classes==2
            result_subdir = result_dir / ('binary' if is_binary else 'multiclass')
            hyper_subdir  = result_dir / 'hyperparams' / ('binary' if is_binary else 'multiclass')
            for method_name, method, hyper_params, withconf_constructor, method_scope in methods():
                if method_scope == 'only_binary' and not is_binary:
                    continue
                if method_scope == 'only_multiclass' and is_binary:
                    continue
                result_path = experiment_path(result_subdir, data_name, method_name)
                hyper_path  = experiment_path(hyper_subdir, data_name, method.__class__.__name__)
                report = qp.util.pickled_resource(
                    result_path, experiment, data, method, method_name, hyper_params, withconf_constructor, hyper_path
                )
                print(f'dataset={data_name}, '
                      f'method={method_name}: '
                      f'mae={report["results"]["ae"].mean():.3f}, '
                      f'coverage={report["results"]["coverage"].mean():.5f}, '
                      f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@ -0,0 +1,170 @@
 import pickle
 from collections import defaultdict
 from joblib import Parallel, delayed
 from tqdm import tqdm
 import pandas as pd
 from glob import glob
 from pathlib import Path
 import quapy as qp
 from error import dist_aitchison
 from quapy.method.confidence import ConfidenceIntervals
 from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
 pd.set_option('display.max_columns', None)
 pd.set_option('display.width', 2000)
 pd.set_option('display.max_rows', None)
 pd.set_option("display.expand_frame_repr", False)
 pd.set_option("display.precision", 4)
 pd.set_option("display.float_format", "{:.4f}".format)
 def region_score(true_prev, region: ConfidenceRegionABC):
    amp = region.montecarlo_proportion(50_000)
    if true_prev in region:
        cost = 0
    else:
        scale_cost = 1/region.alpha
        cost = scale_cost * dist_aitchison(true_prev, region.closest_point_in_region(true_prev))
    return amp + cost
 def compute_coverage_amplitude(region_constructor, **kwargs):
    all_samples = results['samples']
    all_true_prevs = results['true-prevs']
    def process_one(samples, true_prevs):
        region = region_constructor(samples, **kwargs)
        if isinstance(region, ConfidenceIntervals):
            winkler = region.mean_winkler_score(true_prevs)
        else:
            winkler = None
        return region.coverage(true_prevs), region.montecarlo_proportion(), winkler
    out = Parallel(n_jobs=3)(
        delayed(process_one)(samples, true_prevs)
        for samples, true_prevs in tqdm(
            zip(all_samples, all_true_prevs),
            total=len(all_samples),
            desc='constructing ellipses'
        )
    )
    # unzip results
    coverage, amplitude, winkler = zip(*out)
    return list(coverage), list(amplitude), list(winkler)
 def update_pickle(report, pickle_path, updated_dict:dict):
    for k,v in updated_dict.items():
        report[k]=v
    pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
 def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwargs):
    if f'coverage-{conf_name}' not in report:
        covs, amps, winkler = compute_coverage_amplitude(conf_region_class, **kwargs)
        update_fields = {
            f'coverage-{conf_name}': covs,
            f'amplitude-{conf_name}': amps,
            f'winkler-{conf_name}': winkler
        }
        update_pickle(report, file, update_fields)
 methods = None  # show all methods
 # methods = ['BayesianACC', 'BayesianKDEy']
 for setup in ['multiclass']:
    path = f'./results/{setup}/*.pkl'
    table = defaultdict(list)
    for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
        file = Path(file)
        dataset, method = file.name.replace('.pkl', '').split('__')
        if methods is not None and method not in methods:
            continue
        report = pickle.load(open(file, 'rb'))
        results = report['results']
        n_samples = len(results['ae'])
        table['method'].extend([method.replace('Bayesian','Ba').replace('Bootstrap', 'Bo')] * n_samples)
        table['dataset'].extend([dataset] * n_samples)
        table['ae'].extend(results['ae'])
        table['rae'].extend(results['rae'])
        # table['c-CI'].extend(results['coverage'])
        # table['a-CI'].extend(results['amplitude'])
        update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True)
        update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
        update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
        update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
        table['c-CI'].extend(report['coverage-CI'])
        table['a-CI'].extend(report['amplitude-CI'])
        table['w-CI'].extend(report['winkler-CI'])
        table['c-CE'].extend(report['coverage-CE'])
        table['a-CE'].extend(report['amplitude-CE'])
        table['c-CLR'].extend(report['coverage-CLR'])
        table['a-CLR'].extend(report['amplitude-CLR'])
        table['c-ILR'].extend(report['coverage-ILR'])
        table['a-ILR'].extend(report['amplitude-ILR'])
        table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim']))
        # table['aitch-well'].extend(qp.error.dist_aitchison(results['true-prevs'], [ConfidenceEllipseILR(samples).mean_ for samples in results['samples']]))
        # table['aitch'].extend()
        table['reg-score-ILR'].extend(
            [region_score(true_prev, ConfidenceEllipseILR(samples)) for true_prev, samples in zip(results['true-prevs'], results['samples'])]
        )
    df = pd.DataFrame(table)
    n_classes = {}
    tr_size   = {}
    for dataset in df['dataset'].unique():
        fetch_fn = {
            'binary': qp.datasets.fetch_UCIBinaryDataset,
            'multiclass': qp.datasets.fetch_UCIMulticlassDataset
        }[setup]
        data = fetch_fn(dataset)
        n_classes[dataset] = data.n_classes
        tr_size[dataset] = len(data.training)
    # remove datasets with more than max_classes classes
    # max_classes = 30
    # min_train   = 1000
    # for data_name, n in n_classes.items():
    #     if n > max_classes:
    #         df = df[df["dataset"] != data_name]
    # for data_name, n in tr_size.items():
    #     if n < min_train:
    #         df = df[df["dataset"] != data_name]
    for region in ['ILR']: # , 'CI', 'CE', 'CLR', 'ILR']:
        if setup == 'binary' and region=='ILR':
            continue
        # pv = pd.pivot_table(
        #     df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
        # )
        pv = pd.pivot_table(
            df, index='dataset', columns='method', values=[
                #f'w-{region}',
                # 'ae',
                # 'rae',
                # f'aitch',
                # f'aitch-well'
                'reg-score-ILR',
            ], margins=True
        )
        pv['n_classes'] = pv.index.map(n_classes).astype('Int64')
        pv['tr_size'] = pv.index.map(tr_size).astype('Int64')
        pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"])
        print(f'{setup=}')
        print(pv)
        print('-'*80)
--- a/BayesianKDEy/plot_simplex.py
+++ b/BayesianKDEy/plot_simplex.py
@ -0,0 +1,258 @@
 import os
 import pickle
 from pathlib import Path
 import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib.colors import ListedColormap
 from scipy.stats import gaussian_kde
 from method.confidence import (ConfidenceIntervals as CI,
                               ConfidenceEllipseSimplex as CE,
                               ConfidenceEllipseCLR as CLR,
                               ConfidenceEllipseILR as ILR)
 def get_region_colormap(name="blue", alpha=0.40):
    name = name.lower()
    if name == "blue":
        base = (76/255, 114/255, 176/255)
    elif name == "orange":
        base = (221/255, 132/255, 82/255)
    elif name == "violet":
        base = (129/255, 114/255, 178/255)
    else:
        raise ValueError(f"Unknown palette name: {name}")
    cmap = ListedColormap([
        (1, 1, 1, 0),              # 0: transparent white
        (base[0], base[1], base[2], alpha)  # 1: color
    ])
    return cmap
 def plot_prev_points(samples=None,
                     show_samples=True,
                     true_prev=None,
                     point_estim=None, train_prev=None, show_mean=True, show_legend=True,
                     region=None,
                     region_resolution=1000,
                     confine_region_in_simplex=False,
                     color='blue',
                     save_path=None):
    plt.rcParams.update({
        'font.size': 10,  # tamaño base de todo el texto
        'axes.titlesize': 12,  # título del eje
        'axes.labelsize': 10,  # etiquetas de ejes
        'xtick.labelsize': 8,  # etiquetas de ticks
        'ytick.labelsize': 8,
        'legend.fontsize': 9,  # leyenda
    })
    def cartesian(p):
        dim = p.shape[-1]
        p = p.reshape(-1,dim)
        x = p[:, 1] + p[:, 2] * 0.5
        y = p[:, 2] * np.sqrt(3) / 2
        return x, y
    def barycentric_from_xy(x, y):
        """
        Given cartesian (x,y) in simplex returns baricentric coordinates (p1,p2,p3).
        """
        p3 = 2 * y / np.sqrt(3)
        p2 = x - 0.5 * p3
        p1 = 1 - p2 - p3
        return np.stack([p1, p2, p3], axis=-1)
    # simplex coordinates
    v1 = np.array([0, 0])
    v2 = np.array([1, 0])
    v3 = np.array([0.5, np.sqrt(3)/2])
    # Plot
    fig, ax = plt.subplots(figsize=(6, 6))
    if region is not None:
        if callable(region):
            region_list = [("region", region)]
        else:
            region_list = region  # lista de (name, fn)
    if region is not None:
        # rectangular mesh
        x_min, x_max = -0.2, 1.2
        y_min, y_max = -0.2, np.sqrt(3) / 2 + 0.2
        xs = np.linspace(x_min, x_max, region_resolution)
        ys = np.linspace(y_min, y_max, region_resolution)
        grid_x, grid_y = np.meshgrid(xs, ys)
        # barycentric
        pts_bary = barycentric_from_xy(grid_x, grid_y)
        # mask within simplex
        if confine_region_in_simplex:
            in_simplex = np.all(pts_bary >= 0, axis=-1)
        else:
            in_simplex = np.full(shape=(region_resolution, region_resolution), fill_value=True, dtype=bool)
        # --- Colormap 0 → blanco, 1 → rojo semitransparente ---
        # iterar sobre todas las regiones
        for (rname, rfun) in region_list:
            mask = np.zeros_like(in_simplex, dtype=float)
            valid_pts = pts_bary[in_simplex]
            mask_vals = np.array([float(rfun(p)) for p in valid_pts])
            mask[in_simplex] = mask_vals
            ax.pcolormesh(
                xs, ys, mask,
                shading='auto',
                cmap=get_region_colormap(color),
                alpha=0.3,
            )
    if samples is not None:
        if show_samples:
            ax.scatter(*cartesian(samples), s=15, alpha=0.5, edgecolors='none', label='samples', color='black', linewidth=0.5)
    if show_mean is not None:
        if isinstance(show_mean, np.ndarray):
            ax.scatter(*cartesian(show_mean), s=10, alpha=1, label='sample-mean', edgecolors='black')
        elif show_mean==True and samples is not None:
            ax.scatter(*cartesian(samples.mean(axis=0)), s=10, alpha=1, label='sample-mean', edgecolors='black')
        else:
            raise ValueError(f'show_mean should either be a boolean (if True, then samples must be provided) or '
                             f'the mean point itself')
    if train_prev is not None:
        ax.scatter(*cartesian(true_prev), s=10, alpha=1, label='true-prev', edgecolors='black')
    if point_estim is not None:
        ax.scatter(*cartesian(point_estim), s=10, alpha=1, label='KDEy-estim', edgecolors='black')
    if train_prev is not None:
        ax.scatter(*cartesian(train_prev), s=10, alpha=1, label='train-prev', edgecolors='black')
    # edges
    triangle = np.array([v1, v2, v3, v1])
    ax.plot(triangle[:, 0], triangle[:, 1], color='black')
    # vertex labels
    ax.text(-0.05, -0.05, "Y=1", ha='right', va='top')
    ax.text(1.05, -0.05, "Y=2", ha='left', va='top')
    ax.text(0.5, np.sqrt(3)/2 + 0.05, "Y=3", ha='center', va='bottom')
    ax.set_aspect('equal')
    ax.axis('off')
    if show_legend:
        plt.legend(
            loc='center left',
            bbox_to_anchor=(1.05, 0.5),
        )
    plt.tight_layout()
    if save_path is None:
        plt.show()
    else:
        os.makedirs(Path(save_path).parent, exist_ok=True)
        plt.savefig(save_path)
 def plot_prev_points_matplot(points):
    # project 2D
    v1 = np.array([0, 0])
    v2 = np.array([1, 0])
    v3 = np.array([0.5, np.sqrt(3) / 2])
    x = points[:, 1] + points[:, 2] * 0.5
    y = points[:, 2] * np.sqrt(3) / 2
    # kde
    xy = np.vstack([x, y])
    kde = gaussian_kde(xy, bw_method=0.25)
    xmin, xmax = 0, 1
    ymin, ymax = 0, np.sqrt(3) / 2
    # grid
    xx, yy = np.mgrid[xmin:xmax:200j, ymin:ymax:200j]
    positions = np.vstack([xx.ravel(), yy.ravel()])
    zz = np.reshape(kde(positions).T, xx.shape)
    # mask points in simplex
    def in_triangle(x, y):
        return (y >= 0) & (y <= np.sqrt(3) * np.minimum(x, 1 - x))
    mask = in_triangle(xx, yy)
    zz_masked = np.ma.array(zz, mask=~mask)
    # plot
    fig, ax = plt.subplots(figsize=(6, 6))
    ax.imshow(
        np.rot90(zz_masked),
        cmap=plt.cm.viridis,
        extent=[xmin, xmax, ymin, ymax],
        alpha=0.8,
    )
    # Bordes del triángulo
    triangle = np.array([v1, v2, v3, v1])
    ax.plot(triangle[:, 0], triangle[:, 1], color='black', lw=2)
    # Puntos (opcional)
    ax.scatter(x, y, s=5, c='white', alpha=0.3)
    # Etiquetas
    ax.text(-0.05, -0.05, "A (1,0,0)", ha='right', va='top')
    ax.text(1.05, -0.05, "B (0,1,0)", ha='left', va='top')
    ax.text(0.5, np.sqrt(3) / 2 + 0.05, "C (0,0,1)", ha='center', va='bottom')
    ax.set_aspect('equal')
    ax.axis('off')
    plt.show()
 if __name__ == '__main__':
    np.random.seed(1)
    n = 1000
    # alpha = [3,5,10]
    alpha = [10,1,1]
    prevs = np.random.dirichlet(alpha, size=n)
    def regions():
        confs = [0.99, 0.95, 0.90]
        yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
        # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
        # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
        # yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
        # yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
    # resolution = 1000
    # alpha_str = ','.join([f'{str(i)}' for i in alpha])
    # for crname, cr in regions():
    #     plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr, region_resolution=resolution,
    #                      color='blue',
    #                      save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png',
    #                      )
    def regions():
        confs = [0.99, 0.95, 0.90]
        yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
        # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
        # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
        # yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
        # yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
    resolution = 1000
    alpha_str = ','.join([f'{str(i)}' for i in alpha])
    region  = ILR(prevs, confidence_level=.99)
    p = np.asarray([0.1, 0.8, 0.1])
    plot_prev_points(prevs, show_samples=False,
                     show_mean=region.mean_,
                     # show_mean=prevs.mean(axis=0),
                     show_legend=False, region=[('', region.coverage)], region_resolution=resolution,
                     color='blue',
                     true_prev=p,
                     train_prev=region.closest_point_in_region(p),
                     save_path=f'./plots3/simplex_ilr.png',
                     )
--- a/BayesianKDEy/quick_experiment_bayesian_kdey.py
+++ b/BayesianKDEy/quick_experiment_bayesian_kdey.py
@ -0,0 +1,56 @@
 import warnings
 from sklearn.linear_model import LogisticRegression
 import quapy as qp
 from BayesianKDEy._bayeisan_kdey import BayesianKDEy
 from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
 from method.confidence import ConfidenceIntervals
 from quapy.functional import strprev
 from quapy.method.aggregative import KDEyML
 from quapy.protocol import UPP
 import quapy.functional as F
 import numpy as np
 from tqdm import tqdm
 from scipy.stats import dirichlet
 if __name__ == '__main__':
    qp.environ["SAMPLE_SIZE"] = 500
    cls = LogisticRegression()
    bayes_kdey = BayesianKDEy(cls, bandwidth=.3, kernel='aitchison', mcmc_seed=0)
    datasets = qp.datasets.UCI_BINARY_DATASETS
    train, test = qp.datasets.fetch_UCIBinaryDataset(datasets[0]).train_test
    # train, test = qp.datasets.fetch_UCIMulticlassDataset('academic-success', standardize=True).train_test
    with qp.util.temp_seed(0):
        print('fitting KDEy')
        bayes_kdey.fit(*train.Xy)
        shifted = test.sampling(500, *[0.2, 0.8])
        # shifted = test.sampling(500, *test.prevalence()[::-1])
        # shifted = test.sampling(500, *F.uniform_prevalence_sampling(train.n_classes))
        prev_hat = bayes_kdey.predict(shifted.X)
        mae = qp.error.mae(shifted.prevalence(), prev_hat)
        print(f'true_prev={strprev(shifted.prevalence())}')
        print(f'prev_hat={strprev(prev_hat)}, {mae=:.4f}')
        prev_hat, conf_interval = bayes_kdey.predict_conf(shifted.X)
        mae = qp.error.mae(shifted.prevalence(), prev_hat)
        print(f'mean posterior {strprev(prev_hat)}, {mae=:.4f}')
        print(f'CI={conf_interval}')
        print(f'\tcontains true={conf_interval.coverage(true_value=shifted.prevalence())==1}')
        print(f'\tamplitude={conf_interval.montecarlo_proportion(50_000)*100.:.3f}%')
        if train.n_classes == 3:
            plot_prev_points(bayes_kdey.prevalence_samples, true_prev=shifted.prevalence(), point_estim=prev_hat, train_prev=train.prevalence())
            # plot_prev_points_matplot(samples)
        # report = qp.evaluation.evaluation_report(kdey, protocol=UPP(test), verbose=True)
        # print(report.mean(numeric_only=True))
--- a/BayesianKDEy/single_experiment_debug.py
+++ b/BayesianKDEy/single_experiment_debug.py
@ -0,0 +1,91 @@
 import os
 import warnings
 from os.path import join
 from pathlib import Path
 from sklearn.calibration import CalibratedClassifierCV
 from sklearn.linear_model import LogisticRegression as LR
 from sklearn.model_selection import GridSearchCV, StratifiedKFold
 from copy import deepcopy as cp
 import quapy as qp
 from BayesianKDEy._bayeisan_kdey import BayesianKDEy
 from BayesianKDEy.full_experiments import experiment, experiment_path, KDEyCLR
 from build.lib.quapy.data import LabelledCollection
 from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier
 from quapy.method.base import BinaryQuantifier, BaseQuantifier
 from quapy.model_selection import GridSearchQ
 from quapy.data import Dataset
 # from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
 from quapy.method.confidence import ConfidenceIntervals, BayesianCC, PQ, WithConfidenceABC, AggregativeBootstrap
 from quapy.functional import strprev
 from quapy.method.aggregative import KDEyML, ACC
 from quapy.protocol import UPP
 import quapy.functional as F
 import numpy as np
 from tqdm import tqdm
 from scipy.stats import dirichlet
 from collections import defaultdict
 from time import time
 from sklearn.base import clone, BaseEstimator
 def methods():
    """
    Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
    - name: is a str representing the name of the method (e.g., 'BayesianKDEy')
    - quantifier: is the base model (e.g., KDEyML())
    - hyperparams: is a dictionary for the quantifier (e.g., {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]})
    - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
        quantifier with optimized hyperparameters
    """
    acc_hyper = {}
    hdy_hyper = {'nbins': [3,4,5,8,16,32]}
    kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
    kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
    wrap_hyper = lambda dic: {f'quantifier__{k}':v for k,v in dic.items()}
    # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
    # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
    # for T in [10, 20, 50, 100., 500]:
    #     yield f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', explore='ilr', mcmc_seed=0, temperature=T, num_warmup=3000, num_samples=1000, step_size=.1, **hyper),
    yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, **hyper),
 if __name__ == '__main__':
    binary = {
        'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
        'sample_size': 500
    }
    multiclass = {
        'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
        'sample_size': 1000
    }
    setup = multiclass
    # data_name = 'isolet'
    # data_name = 'cmc'
    data_name = 'abalone'
    qp.environ['SAMPLE_SIZE'] = setup['sample_size']
    print(f'dataset={data_name}')
    data = setup['fetch_fn'](data_name)
    is_binary = data.n_classes==2
    hyper_subdir  = Path('./results') / 'hyperparams' / ('binary' if is_binary else 'multiclass')
    for method_name, method, hyper_params, withconf_constructor in methods():
        hyper_path = experiment_path(hyper_subdir, data_name, method.__class__.__name__)
        report = experiment(data, method, method_name, hyper_params, withconf_constructor, hyper_path)
        print(f'dataset={data_name}, '
              f'method={method_name}: '
              f'mae={report["results"]["ae"].mean():.3f}, '
              f'coverage={report["results"]["coverage"].mean():.5f}, '
              f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
--- a/CHANGE_LOG.txt
+++ b/CHANGE_LOG.txt
@ -1,7 +1,16 @@
-Change Log 0.2.0
+Change Log 0.2.1
 -----------------
-CLEAN TODO-FILE
+- Added squared ratio error.
 - Improved efficiency of confidence regions coverage functions
 - Added Precise Quantifier to WithConfidence methods (a Bayesian adaptation of HDy)
 - Improved documentation of confidence regions.
 - Added ReadMe method by Daniel Hopkins and Gary King
 - Internal index in LabelledCollection is now "lazy", and is only constructed if required.
 - I have added dist_aitchison and mean_dist_aitchison as a new error evaluation metric
 Change Log 0.2.0
 -----------------
 - Base code Refactor:
    - Removing coupling between LabelledCollection and quantification methods; the fit interface changes:
--- a/IEEEProc2025_plots/over_time_experiment.py
+++ b/IEEEProc2025_plots/over_time_experiment.py
@ -0,0 +1,300 @@
 import os
 from collections import defaultdict
 from typing import List, Dict
 import matplotlib.pyplot as plt
 import kagglehub
 import pandas as pd
 from pathlib import Path
 import numpy as np
 from qunfold import KMM
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.decomposition import TruncatedSVD
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.linear_model import LogisticRegression as LR, LogisticRegressionCV
 from tqdm import tqdm
 import quapy as qp
 from data import LabelledCollection, Dataset
 import quapy.functional as F
 from method.composable import QUnfoldWrapper
 from quapy.method.aggregative import DistributionMatchingY, EMQ, KDEyML
 from quapy.method.non_aggregative import DistributionMatchingX
 from quapy.method.aggregative import CC, ACC, HDy
 from transformers import pipeline
 pd.set_option('display.max_columns', None)
 pd.set_option('display.width', 2000)
 pd.set_option('display.max_rows', None)
 pd.set_option("display.expand_frame_repr", False)
 pd.set_option("display.precision", 4)
 pd.set_option("display.float_format", "{:.4f}".format)
 def prepare_xy_date_blocks(df, freq="M"):
    """
    df: DataFrame con columnas 'text', 'airline_sentiment', 'tweet_created'
    freq: frecuencia de los bloques temporales ('D', 'W', 'M', etc.)
    Devuelve:
        X: lista de textos
        y: np.ndarray de etiquetas
        date: lista de índices enteros por bloque temporal
        idx2date: lista con los límites temporales de cada bloque (tuplas)
    """
    df["tweet_created"] = pd.to_datetime(df["tweet_created"], errors="coerce")
    df = df.sort_values("tweet_created").reset_index(drop=True)
    X = df["text"].astype(str).values
    y = df["airline_sentiment"].values
    # group dates by requested frequency
    date_groups = df["tweet_created"].dt.to_period(freq)
    # assigns index to date blocks
    unique_periods = date_groups.unique()
    period_to_idx = {p: i for i, p in enumerate(unique_periods)}
    date = np.asarray([period_to_idx[p] for p in date_groups])
    # get true limits of period intervals
    idx2date = []
    for p in unique_periods:
        start = p.start_time
        end = p.end_time
        idx2date.append((start, end))
    return X, y, date, idx2date
 def prepare_labelled_collections():
    # loads and prepares the Twitter US Arlines Sentiment dataset (from Kaggle)
    # returns a labelled collection for the training data (day 0 and 1), and a list of the
    # test sets (days 2 to 8) and the time limits for each test period
    # The dataset is originally ternary (negative, neutral, positive), but we binarize it discarding neutral
    # Download latest version
    path = kagglehub.dataset_download("crowdflower/twitter-airline-sentiment")
    df = pd.read_csv(Path(path) / 'Tweets.csv')
    X, y, date, idx2date = prepare_xy_date_blocks(df, freq="D")
    # binarize
    keep_idx = (y!='neutral')
    X = X[keep_idx]
    y = y[keep_idx]
    date = date[keep_idx]
    y[y != 'negative'] = 1
    y[y == 'negative'] = 0
    y = y.astype(int)
    # use day 0 for training, the rest for test
    X_train, y_train = X[date<=1], y[date<=1]
    train = LabelledCollection(X_train, y_train)
    print(f'training has {len(train)} docs and prevalence={F.strprev(train.prevalence())} classes={train.classes}')
    tests = []
    test_init = []
    for date_i in range(2, max(date)+1):
        X_test_i, y_test_i = X[date==date_i], y[date==date_i]
        test_i = LabelledCollection(X_test_i, y_test_i, classes=train.classes)
        print(f'test-{date_i} has {len(test_i)} docs and prevalence={F.strprev(test_i.prevalence())}')
        tests.append(test_i)
        test_init.append(idx2date[date_i])
    return train, tests, test_init
 from scipy.interpolate import CubicSpline
 import numpy as np
 import matplotlib.pyplot as plt
 def smooth_curve(dates, values, num_points=300):
    """
    dates: list of timestamps
    values: list of Y-values
    num_points: number of points in the smooth curve
    Returns new_x, new_y for plotting a smooth line.
    """
    # Convert datetime to numeric (matplotlib float representation)
    x = [d.timestamp() for d in dates]
    x = np.array(x)
    y = np.array(values)
    # Create new X-axis with more points
    x_new = np.linspace(x.min(), x.max(), num_points)
    # Smooth spline
    spline = CubicSpline(x, y)
    y_new = spline(x_new)
    # Convert numeric x_new back to datetime
    dates_new = [pd.to_datetime(t, unit='s') for t in x_new]
    return dates_new, y_new
 def plot_prevalences(results_dict, target_class=1, target_label='positive', savepath=None):
    """
    Plot prevalence estimates over time for each method contained in results_dict.
    Parameters
    ----------
    results_dict : dict
        A dictionary where:
            - "date-start" : list of datetime-like objects
            - all other keys : list of prevalence vectors (arrays), e.g. [p_pos, p_neg]
              Only the first component (p_pos) will be plotted.
    """
    dates = results_dict["date-start"]
    # Create figure
    plt.figure(figsize=(20, 10))
    # Plot one line per method (except "date-start")
    for method, values in results_dict.items():
        if method == "date-start":
            continue
        # Extract first component from each prevalence array
        target_component = [v[target_class]*100 for v in values]
        dates_smooth, y_smooth = smooth_curve(dates, target_component)
        if method=='true-prev':
            line,=plt.plot(dates_smooth, y_smooth, label=method, linewidth=3, linestyle='-')
        else:
            line,=plt.plot(dates_smooth, y_smooth, label=method, linewidth=2, linestyle='--')
        plt.plot(dates, target_component, 'o', markersize=10, color=line.get_color())
    # Axis labels
    # plt.xlabel("Date")
    plt.ylabel("% of "+target_label+" tweets")
    # Rotate date labels for readability
    plt.xticks(rotation=45)
    plt.minorticks_on()
    plt.grid(which='major', linestyle='-', linewidth=0.5)
    plt.grid(which='minor', linestyle=':', linewidth=0.3)
    # Place the legend outside to the right
    plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
    plt.tight_layout()
    if savepath is not None:
        os.makedirs(Path(savepath).parent, exist_ok=True)
        plt.savefig(savepath)
    else:
        plt.show()
 class HDxDensify(DistributionMatchingX):
    def fit(self, X, y):
        self.reductor = TruncatedSVD(n_components=5, random_state=0)
        Xred = self.reductor.fit_transform(X)
        return super().fit(Xred, y)
    def predict(self, X):
        Xred = self.reductor.transform(X)
        return super().predict(Xred)
 class QUnfoldWrapperDensify(QUnfoldWrapper):
    def fit(self, X, y):
        self.reductor = TruncatedSVD(n_components=5, random_state=0)
        Xred = self.reductor.fit_transform(X)
        return super().fit(Xred, y)
    def predict(self, X):
        Xred = self.reductor.transform(X)
        return super().predict(Xred)
 # A scikit-learn's style wrapper for a huggingface-based pre-trained transformer for binary sentiment classification
 class HFTextClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, model_name='distilbert-base-uncased-finetuned-sst-2-english'):
        self.pipe = pipeline("sentiment-analysis", model=model_name)
        self.classes_ = np.asarray([0,1])
    def fit(self, X, y=None):
        return self
    def _binary_decisions(self, transformer_output: List[Dict]):
        return np.array([(1 if p['label']=='POSITIVE' else 0) for p in transformer_output], dtype=int)
    def predict(self, X):
        X = list(map(str, X))
        preds = self.pipe(X, truncation=True)
        return self._binary_decisions(preds)
    def predict_proba(self, X):
        X = list(map(str, X))
        n_examples = len(X)
        preds = self.pipe(X, truncation=True)
        decisions = self._binary_decisions(preds)
        scores = np.array([p['score'] for p in preds], dtype=float)
        probas = np.zeros(shape=(len(X), 2), dtype=float)
        probas[np.arange(n_examples),decisions] = scores
        probas[np.arange(n_examples),~decisions] = 1-scores
        return probas
 # def methods(pre_trained_classifier):
 #     yield 'CC', CC(pre_trained_classifier, fit_classifier=False)
 USE_LOGISTIC_REGRESSION = True
 if USE_LOGISTIC_REGRESSION:
    new_classifier = lambda:LR()
    to_fit = True
 else:
    pretrained = HFTextClassifier()
    new_classifier = lambda:pretrained
    to_fit = False
 def methods():
    yield 'CC', CC(new_classifier(), fit_classifier=to_fit)
    yield 'ACC', ACC(new_classifier(), fit_classifier=to_fit)
    yield 'HDy', DistributionMatchingY(new_classifier(), fit_classifier=to_fit)
    yield 'HDx', HDxDensify()
    yield 'KMM', QUnfoldWrapperDensify(KMM())
    yield 'SLD', EMQ(new_classifier(), fit_classifier=to_fit)
    yield 'KDEy', KDEyML(new_classifier(), fit_classifier=to_fit)
 train, tests, test_init = prepare_labelled_collections()
 if USE_LOGISTIC_REGRESSION:
    # vectorize text for logistic regression
    vectorizer = TfidfVectorizer(min_df=5, sublinear_tf=True)
    Xtr = vectorizer.fit_transform(train.X)
    train = LabelledCollection(Xtr, train.labels, train.classes_)
    for i in range(len(tests)):
        Xte = vectorizer.transform(tests[i].X)
        tests[i] = LabelledCollection(Xte, tests[i].labels, train.classes_)
 results = defaultdict(list)
 for test_i, test_init_i in zip(tests, test_init):
    results['true-prev'].append(test_i.prevalence())
    results['date-start'].append(test_init_i[0])
 for q_name, quant in methods():
    quant.fit(*train.Xy)
    for test_i, test_init_i in tqdm(zip(tests, test_init), desc=f'{q_name} predicting', total=len(tests)):
        pred_i = quant.predict(test_i.X)
        results[q_name].append(pred_i)
 suffix = '_lr' if USE_LOGISTIC_REGRESSION else '_transformer'
 plot_prevalences(results, savepath=f'./plots_ieee/over_time{suffix}.pdf')
--- a/IEEEProc2025_plots/plots_ieee_3histograms.py
+++ b/IEEEProc2025_plots/plots_ieee_3histograms.py
@ -0,0 +1,73 @@
 import itertools
 import seaborn as sns
 import matplotlib.pyplot as plt
 import numpy as np
 palette = itertools.cycle(sns.color_palette())
 def setframe():
    fig.spines['top'].set_visible(False)
    fig.spines['left'].set_visible(False)
    fig.get_yaxis().set_ticks([])
    fig.spines['right'].set_visible(False)
    # fig.axis('off')
 nbins = 50
 figsize = (5, 2)
 ymax = 0.2
 negatives = np.random.normal(loc = 0.3, scale=0.2, size=20000)
 negatives = np.asarray([x for x in negatives if 0 <= x <= 1])
 plt.figure(figsize=figsize)
 plt.xlim(0, 1)
 plt.ylim(0, ymax)
 fig = sns.histplot(data=negatives, binrange=(0,1), bins=nbins,  stat='probability', color=next(palette))
 plt.title('Negative distribution')
 fig.set(yticklabels=[])
 fig.set(ylabel=None)
 setframe()
 # fig.get_figure().savefig('plots_cacm/negatives.pdf')
 # plt.clf()
 # -------------------------------------------------------------
 positives1 = np.random.normal(loc = 0.75, scale=0.06, size=20000)
 positives2 = np.random.normal(loc = 0.65, scale=0.1, size=1)
 positives = np.concatenate([positives1, positives2])
 np.random.shuffle(positives)
 positives = np.asarray([x for x in positives if 0 <= x <= 1])
 # plt.figure(figsize=figsize)
 plt.xlim(0, 1)
 plt.ylim(0, ymax)
 fig = sns.histplot(data=positives, binrange=(0,1), bins=nbins,  stat='probability', color=next(palette))
 plt.title('')
 fig.set(yticklabels=[])
 fig.set(ylabel=None)
 setframe()
 fig.get_figure().savefig('plots_cacm/training.pdf')
 # -------------------------------------------------------------
 prev = 0.2
 test = np.concatenate([
    negatives[:int(len(negatives)*(1-prev))],
    positives[:int(len(positives)*(prev))],
 ])
 plt.figure(figsize=figsize)
 plt.xlim(0, 1)
 plt.ylim(0, ymax)
 fig = sns.histplot(data=test, binrange=(0,1), bins=nbins,  stat='probability', color=next(palette))
 plt.title('')
 fig.set(yticklabels=[])
 fig.set(ylabel=None)
 setframe()
 fig.get_figure().savefig('plots_cacm/test.pdf')
--- a/IEEEProc2025_plots/plots_ieee_errdrift_deprecated.py
+++ b/IEEEProc2025_plots/plots_ieee_errdrift_deprecated.py
@ -0,0 +1,86 @@
 from copy import deepcopy
 import numpy as np
 from sklearn.linear_model import LogisticRegression
 import quapy as qp
 from method.non_aggregative import DMx
 from protocol import APP
 from quapy.method.aggregative import CC, ACC, DMy
 from sklearn.svm import LinearSVC
 qp.environ['SAMPLE_SIZE'] = 100
 DATASETS = qp.datasets.UCI_DATASETS[10:]
 def fit_eval_task(args):
    model_name, model, train, test = args
    with qp.util.temp_seed(0):
        model = deepcopy(model)
        model.fit(train)
        true_prev, estim_prev = qp.evaluation.prediction(model, APP(test, repeats=100, random_state=0))
    return model_name, true_prev, estim_prev
 def gen_data():
    def base_classifier():
        return LogisticRegression()
        #return LinearSVC(class_weight='balanced')
    def models():
        yield 'CC', CC(base_classifier())
        yield 'ACC', ACC(base_classifier())
        yield 'HDy', DMy(base_classifier(), val_split=10, nbins=10, n_jobs=-1)
        yield 'HDx', DMx(nbins=10, n_jobs=-1)
    # train, test = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=10).train_test
    method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
    for dataset_name in DATASETS:
        train, test = qp.datasets.fetch_UCIDataset(dataset_name).train_test
        print(dataset_name, train.X.shape)
        outs = qp.util.parallel(
            fit_eval_task,
            ((method_name, model, train, test) for method_name, model in models()),
            seed=0,
            n_jobs=-1
        )
        for method_name, true_prev, estim_prev in outs:
            method_names.append(method_name)
            true_prevs.append(true_prev)
            estim_prevs.append(estim_prev)
            tr_prevs.append(train.prevalence())
    return method_names, true_prevs, estim_prevs, tr_prevs
 method_names, true_prevs, estim_prevs, tr_prevs = qp.util.pickled_resource('../quick_experiment/pickled_plot_data.pkl', gen_data)
 def remove_dataset(dataset_order, num_methods=4):
    sel_names, sel_true, sel_estim, sel_tr = [],[],[],[]
    for i, (name, true, estim, tr) in enumerate(zip(method_names, true_prevs, estim_prevs, tr_prevs)):
        dataset_pos = i//num_methods
        if dataset_pos not in dataset_order:
            sel_names.append(name)
            sel_true.append(true)
            sel_estim.append(estim)
            sel_tr.append(tr)
    return np.asarray(sel_names), np.asarray(sel_true), np.asarray(sel_estim), np.asarray(sel_tr)
 print(DATASETS)
 selected = 10
 for i in [selected]:
    print(i, DATASETS[i])
    all_ = set(range(len(DATASETS)))
    remove_index = sorted(all_ - {i})
    sel_names, sel_true, sel_estim, sel_tr = remove_dataset(dataset_order=remove_index, num_methods=4)
    p=sel_tr[0][1]
    sel_names = ['CC$_{'+str(p)+'}$' if x=='CC' else x for x in sel_names]
    # qp.plot.binary_diagonal(sel_names, sel_true, sel_estim, train_prev=sel_tr[0], show_std=False, savepath=f'./plots/bin_diag_{i}.png')
    qp.plot.error_by_drift(sel_names, sel_true, sel_estim, sel_tr, n_bins=10, savepath=f'./plots/err_drift_{i}.png', show_std=True, show_density=False, title="")
    # qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, savepath='./plots/bin_bias.png')
    # qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, nbins=3, savepath='./plots/bin_bias_bin.png')
--- a/IEEEProc2025_plots/plots_ieee_histogram3D.py
+++ b/IEEEProc2025_plots/plots_ieee_histogram3D.py
@ -0,0 +1,62 @@
 import math
 import numpy as np
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split, cross_val_predict
 from sklearn.neighbors import KernelDensity
 import matplotlib.pyplot as plt
 import numpy as np
 from data import LabelledCollection
 scale = 100
 import quapy as qp
 negatives = np.random.normal(loc = 0.2, scale=0.2, size=20000)
 negatives = np.asarray([x for x in negatives if 0 <= x <= 1])
 positives = np.random.normal(loc = 0.75, scale=0.05, size=20000)
 positives = np.asarray([x for x in positives if 0 <= x <= 1])
 prev = 0.1
 test = np.concatenate([
    negatives[:int(len(negatives)*(1-prev))],
    positives[:int(len(positives)*(prev))],
 ])
 nbins = 30
 plt.rcParams.update({'font.size': 7})
 fig = plt.figure()
 positions = np.asarray([2,1,0])
 colors = ['r', 'g', 'b']
 ax = fig.add_subplot(111, projection='3d')
 ax.set_box_aspect((3, 1, 0.8))
 for post, c, z in zip([test, positives, negatives], colors, positions):
    hist, bins = np.histogram(post, bins=np.linspace(0,1, nbins+1), density=True)
    xs = (bins[:-1] + bins[1:])/2
    ax.bar(xs, hist, width=1 / nbins, zs=z, zdir='y', color=c, ec=c, alpha=0.6)
 ax.yaxis.set_ticks(positions)
 ax.yaxis.set_ticklabels([' '*20+'Test distribution', ' '*20+'Positive distribution', ' '*20+'Negative distribution'])
 # ax.xaxis.set_ticks([])
 # ax.xaxis.set_ticklabels([], minor=True)
 ax.zaxis.set_ticks([])
 ax.zaxis.set_ticklabels([], minor=True)
 #plt.figure(figsize=(10,6))
 #plt.show()
 plt.savefig('./histograms3d_CACM2023.pdf')
--- a/IEEEProc2025_plots/plotting_diagonal_4methods.py
+++ b/IEEEProc2025_plots/plotting_diagonal_4methods.py
@ -0,0 +1,63 @@
 from sklearn.decomposition import TruncatedSVD
 from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
 from sklearn.model_selection import GridSearchCV
 import quapy as qp
 from data import LabelledCollection
 from method.non_aggregative import DMx
 from protocol import APP
 from quapy.method.aggregative import CC, DMy, ACC, EMQ
 from sklearn.svm import LinearSVC
 import numpy as np
 from tqdm import tqdm
 qp.environ['SAMPLE_SIZE'] = 500
 def cls():
    return LogisticRegressionCV(n_jobs=-1,Cs=10)
    # return LogisticRegression(C=.1)
 def gen_methods():
    yield CC(cls()), r'CC$_{10' + r'\%}$'
    yield ACC(cls()), 'ACC'
    yield DMy(cls(), val_split=10, nbins=10, n_jobs=-1), 'HDy'
    yield DMx(nbins=10, n_jobs=-1), 'HDx'
    yield EMQ(cls()), 'SLD'
    # yield EMQ(cls(), calib='vs'), 'SLD-VS'
 def gen_data():
    train, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test
    method_data = []
    training_prevalence = 0.1
    training_size = 5000
    # since the problem is binary, it suffices to specify the negative prevalence, since the positive is constrained
    train_sample = train.sampling(training_size, 1-training_prevalence, random_state=0)
    # train_sample = train
    for model, method_name in tqdm(gen_methods(), total=4):
        with qp.util.temp_seed(1):
            if method_name == 'HDx':
                X, y = train_sample.Xy
                svd = TruncatedSVD(n_components=5, random_state=0)
                Xred = svd.fit_transform(X)
                train_sample_dense = LabelledCollection(Xred, y)
                X, y = test.Xy
                test_dense = LabelledCollection(svd.transform(X), y)
                model.fit(*train_sample_dense.Xy)
                true_prev, estim_prev = qp.evaluation.prediction(model, APP(test_dense, repeats=100, random_state=0))
            else:
                model.fit(*train_sample.Xy)
                true_prev, estim_prev = qp.evaluation.prediction(model, APP(test, repeats=100, random_state=0))
        method_data.append((method_name, true_prev, estim_prev, train_sample.prevalence()))
    return zip(*method_data)
 method_names, true_prevs, estim_prevs, tr_prevs = gen_data()
 # qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, savepath='./plots_ieee/bin_diag_4methods.pdf')
 qp.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=10, savepath='./plots_ieee/err_drift_4methods.pdf', title='', show_density=False, show_std=True)
--- a/IEEEProc2025_plots/plotting_diagonal_CCvariants.py
+++ b/IEEEProc2025_plots/plotting_diagonal_CCvariants.py
@ -0,0 +1,40 @@
 from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
 from sklearn.model_selection import GridSearchCV
 import quapy as qp
 from protocol import APP
 from quapy.method.aggregative import CC
 from sklearn.svm import LinearSVC
 import numpy as np
 from tqdm import tqdm
 qp.environ['SAMPLE_SIZE'] = 500
 def gen_data():
    train, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test
    method_data = []
    for training_prevalence in tqdm(np.linspace(0.1, 0.9, 9), total=9):
        training_size = 5000
        # since the problem is binary, it suffices to specify the negative prevalence, since the positive is constrained
        train_sample = train.sampling(training_size, 1-training_prevalence)
        # cls = GridSearchCV(LinearSVC(), param_grid={'C': np.logspace(-2,2,5), 'class_weight':[None, 'balanced']}, n_jobs=-1)
        # cls = GridSearchCV(LogisticRegression(), param_grid={'C': np.logspace(-2, 2, 5), 'class_weight': [None, 'balanced']}, n_jobs=-1)
        # cls.fit(*train_sample.Xy)
        model = CC(LogisticRegressionCV(n_jobs=-1,Cs=10))
        model.fit(train_sample)
        true_prev, estim_prev = qp.evaluation.prediction(model, APP(test, repeats=100, random_state=0))
        method_name = 'CC$_{'+f'{int(100*training_prevalence)}' + '\%}$'
        method_data.append((method_name, true_prev, estim_prev, train_sample.prevalence()))
    return zip(*method_data)
 method_names, true_prevs, estim_prevs, tr_prevs = gen_data()
 qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, savepath='./plots_cacm/bin_diag_cc.pdf')
 # qp.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=10, savepath='./plots_cacm/err_drift_cc.pdf', title='', show_density=False)
--- a/IEEEProc2025_plots/types_of_shift.py
+++ b/IEEEProc2025_plots/types_of_shift.py
@ -0,0 +1,126 @@
 import numpy as np
 import matplotlib.pyplot as plt
 from scipy.stats import gaussian_kde
 def plot_kde_background(ax, data, cmap="Blues", alpha=0.35, gridsize=200):
    """
    data: array Nx2
    """
    # KDE
    kde = gaussian_kde(data.T)
    # Grid for evaluation
    x_min, x_max = data[:, 0].min() - 1, data[:, 0].max() + 1
    y_min, y_max = data[:, 1].min() - 1, data[:, 1].max() + 1
    X, Y = np.meshgrid(
        np.linspace(x_min, x_max, gridsize),
        np.linspace(y_min, y_max, gridsize)
    )
    Z = kde(np.vstack([X.ravel(), Y.ravel()])).reshape(X.shape)
    # Draw background density
    ax.contourf(X, Y, Z, levels=30, cmap=cmap, alpha=alpha)
 # ======================================================
 #  Define 3 Gaussian sources in 2D
 # ======================================================
 # Means
 mu1 = np.array([0, 0])  # negative
 mu2 = np.array([3, 0])  # positive
 mu3 = np.array([0, 3])  # positive
 # Covariances
 Sigma = np.array([[1, 0.2],
                  [0.2, 1]])
 def sample_gaussian(mu, Sigma, n):
    return np.random.multivariate_normal(mu, Sigma, n)
 # ======================================================
 #  Generate datasets for the 4 scenarios
 # ======================================================
 density = 20
 # ---------- Scenario 1: Baseline ----------
 G1_1 = sample_gaussian(mu1, Sigma, 100*density)
 G2_1 = sample_gaussian(mu2, Sigma, 100*density)
 G3_1 = sample_gaussian(mu3, Sigma, 100*density)
 # ---------- Scenario 2: Prior Probability Shift ----------
 G1_2 = sample_gaussian(mu1, Sigma, 300*density)
 G2_2 = sample_gaussian(mu2, Sigma, 50*density)
 G3_2 = sample_gaussian(mu3, Sigma, 50*density)
 # ---------- Scenario 3: Covariate Shift ----------
 # same class proportions but G3 moves (X-shift)
 mu3_shift = mu3 + np.array([1.5, 0])
 G1_3 = sample_gaussian(mu1, Sigma, 100*density)
 G2_3 = sample_gaussian(mu2, Sigma, 100*density)
 G3_3 = sample_gaussian(mu3_shift, Sigma, 100*density)  # shifted covariates
 # ---------- Scenario 4: Concept Shift ----------
 # same data as Scenario 1, but G3 becomes negative
 G1_4 = G1_1
 G2_4 = G2_1
 G3_4 = G3_1  # but will be colored as negative
 # ======================================================
 #  Plotting function for each scenario
 # ======================================================
 def plot_scenario(ax, G1, G2, G3, title, G3_negative=False):
    # plot_kde_background(ax, G1, cmap="Reds", alpha=0.75)
    # plot_kde_background(ax, G2, cmap="Blues", alpha=0.75)
    # plot_kde_background(ax, G3, cmap="Greens", alpha=0.75)
    ax.scatter(G1[:, 0], G1[:, 1], s=12, color='red', alpha=0.1, label='Negative ($\ominus$)')
    ax.scatter(G2[:, 0], G2[:, 1], s=12, color='blue', alpha=0.1, label='Positive ($\oplus$)')
    if G3_negative:
        ax.scatter(G3[:, 0], G3[:, 1], s=12, color='red', alpha=0.1) #, label='Negative ($\ominus$)')
    else:
        ax.scatter(G3[:, 0], G3[:, 1], s=12, color='blue', alpha=0.1) #, label='Positive ($\oplus$)')
    ax.set_title(title)
    ax.set_xlabel("$x_1$")
    ax.set_ylabel("$x_2$")
    ax.set_xticks([])
    ax.set_yticks([])
    ax.grid(alpha=0.3)
 # ======================================================
 #  Generate 2×2 grid of subplots
 # ======================================================
 fig, axes = plt.subplots(2, 2, figsize=(9, 9))
 plot_scenario(axes[0, 0], G1_1, G2_1, G3_1,
              "Training data")
 plot_scenario(axes[0, 1], G1_2, G2_2, G3_2,
              "Prior Probability Shift")
 plot_scenario(axes[1, 0], G1_3, G2_3, G3_3,
              "Covariate Shift",
              G3_negative=False)
 plot_scenario(axes[1, 1], G1_4, G2_4, G3_4,
              "Concept Shift",
              G3_negative=True)
 # One global legend
 handles, labels = axes[0, 0].get_legend_handles_labels()
 fig.legend(handles, labels, loc='upper center', ncol=3, fontsize=12)
 plt.tight_layout(rect=[0, 0, 1, 0.95])
 # plt.show()
 plt.savefig('dataset_shift_types.pdf')
--- a/IEEEProc2025_plots/uniform_sampling_simplex.py
+++ b/IEEEProc2025_plots/uniform_sampling_simplex.py
@ -0,0 +1,97 @@
 import numpy as np
 import matplotlib.pyplot as plt
 from mpl_toolkits.mplot3d.art3d import Poly3DCollection
 from quapy.functional import uniform_prevalence_sampling
 # Vertices of a regular tetrahedron
 v1 = np.array([0, 0, 0])
 v2 = np.array([1, 0, 0])
 v3 = np.array([0.5, np.sqrt(3)/2, 0])
 v4 = np.array([0.5, np.sqrt(3)/6, np.sqrt(6)/3])
 vertices = np.array([v1, v2, v3, v4])
 # Function to map (p1,p2,p3,p4) to 3D coordinates
 def prob_to_xyz(p):
    return p[0]*v1 + p[1]*v2 + p[2]*v3 + p[3]*v4
 # --- Example: 5 random distributions inside the simplex
 rand_probs = uniform_prevalence_sampling(n_classes=4, size=3000)
 points_xyz = np.array([prob_to_xyz(p) for p in rand_probs])
 # --- Plotting
 fig = plt.figure(figsize=(8, 8))
 ax = fig.add_subplot(111, projection='3d')
 # Draw tetrahedron faces
 faces = [
    [v1, v2, v3],
    [v1, v2, v4],
    [v1, v3, v4],
    [v2, v3, v4]
 ]
 poly = Poly3DCollection(faces, alpha=0.15, edgecolor='k', facecolor=None)
 # poly = Poly3DCollection(faces, alpha=0.15, edgecolor='k', facecolor=None)
 ax.add_collection3d(poly)
 edges = [
    [v1, v2],
    [v1, v3],
    [v1, v4],
    [v2, v3],
    [v2, v4],
    [v3, v4]
 ]
 for edge in edges:
    xs, ys, zs = zip(*edge)
    ax.plot(xs, ys, zs, color='black', linewidth=1)
 # Draw vertices
 # ax.scatter(vertices[:,0], vertices[:,1], vertices[:,2], s=60, color='red')
 # Labels
 offset = 0.08
 labels = ["$y_1$", "$y_2$", "$y_3$", "$y_4$"]
 for i, v in enumerate(vertices):
    direction = v / np.linalg.norm(v)
    label_pos = v + offset * direction
    ax.text(label_pos[0], label_pos[1], label_pos[2], labels[i], fontsize=14, color='black')
 # Plot random points
 ax.scatter(points_xyz[:,0], points_xyz[:,1], points_xyz[:,2], s=10, c='blue', alpha=0.2)
 # Axes formatting
 ax.set_xlabel("X")
 ax.set_ylabel("Y")
 ax.set_zlabel("Z")
 ax.set_title("4-Class Probability Simplex (Tetrahedron)")
 # ax.view_init(elev=65, azim=20)
 ax.set_xticks([])
 ax.set_yticks([])
 ax.set_zticks([])
 ax.set_xticklabels([])
 ax.set_yticklabels([])
 ax.set_zticklabels([])
 ax.xaxis.set_ticks_position('none')  # evita que dibuje marcas
 ax.yaxis.set_ticks_position('none')
 ax.zaxis.set_ticks_position('none')
 ax.xaxis.pane.set_visible(False)
 ax.yaxis.pane.set_visible(False)
 ax.zaxis.pane.set_visible(False)
 ax.set_xlabel('')
 ax.set_ylabel('')
 ax.set_zlabel('')
 ax.grid(False)
 plt.tight_layout()
 # plt.show()
 plt.savefig('plots_ieee/tetrahedron.pdf')
--- a/KDEyAitchison/commons.py
+++ b/KDEyAitchison/commons.py
@ -0,0 +1,56 @@
 import numpy as np
 import pandas as pd
 from quapy.method.aggregative import EMQ, KDEyML, PACC
 from sklearn.linear_model import LogisticRegression
 METHODS = ['PACC',
           'EMQ',
           'KDEy-ML',
           'KDEy-MLA'
           ]
 # common hyperparameterss
 hyper_LR = {
    'classifier__C': np.logspace(-3, 3, 7),
    'classifier__class_weight': ['balanced', None]
 }
 hyper_kde = {
    'bandwidth': np.linspace(0.001, 0.5, 100)
 }
 hyper_kde_aitchison = {
    'bandwidth': np.linspace(0.01, 2, 100)
 }
 # instances a new quantifier based on a string name
 def new_method(method, **lr_kwargs):
    lr = LogisticRegression(**lr_kwargs)
    if method == 'KDEy-ML':
        param_grid = {**hyper_kde, **hyper_LR}
        quantifier = KDEyML(lr, kernel='gaussian')
    elif method == 'KDEy-MLA':
        param_grid = {**hyper_kde_aitchison, **hyper_LR}
        quantifier = KDEyML(lr, kernel='aitchison')
    elif method == 'EMQ':
        param_grid = hyper_LR
        quantifier = EMQ(lr)
    elif method == 'PACC':
        param_grid = hyper_LR
        quantifier = PACC(lr)
    else:
        raise NotImplementedError('unknown method', method)
    return param_grid, quantifier
 def show_results(result_path):
    df = pd.read_csv(result_path+'.csv', sep='\t')
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', None)
    pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE", "MRAE"])
    print(pv)
--- a/KDEyAitchison/show_results.py
+++ b/KDEyAitchison/show_results.py
@ -0,0 +1,38 @@
 import pickle
 import os
 import sys
 import pandas as pd
 import quapy as qp
 from quapy.model_selection import GridSearchQ
 from quapy.protocol import UPP
 from commons import METHODS, new_method, show_results
 from new_table import LatexTable
 SEED = 1
 if __name__ == '__main__':
    print(qp.datasets.UCI_MULTICLASS_DATASETS)
    for optim in ['mae', 'mrae']:
        table = LatexTable()
        result_dir = f'results/ucimulti/{optim}'
        for method in METHODS:
            print()
            global_result_path = f'{result_dir}/{method}'
            print(f'Method\tDataset\tMAE\tMRAE\tKLD')
            for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
                # print(dataset)
                local_result_path = global_result_path + '_' + dataset
                if os.path.exists(local_result_path + '.dataframe'):
                    report = pd.read_csv(local_result_path+'.dataframe')
                    print(f'{method}\t{dataset}\t{report[optim].mean():.5f}')
                    table.add(benchmark=dataset, method=method, v=report[optim].values)
                else:
                    print(dataset, 'not found for method', method)
        table.latexPDF(f'./tables/{optim}.pdf', landscape=False)
--- a/KDEyAitchison/ucimulti_experiments.py
+++ b/KDEyAitchison/ucimulti_experiments.py
@ -0,0 +1,94 @@
 import pickle
 import os
 import sys
 import pandas as pd
 import quapy as qp
 from quapy.model_selection import GridSearchQ
 from quapy.protocol import UPP
 from commons import METHODS, new_method, show_results
 SEED = 1
 if __name__ == '__main__':
    qp.environ['SAMPLE_SIZE'] = 500
    qp.environ['N_JOBS'] = -1
    n_bags_val = 250
    n_bags_test = 1000
    for optim in ['mae', 'mrae']:
        result_dir = f'results/ucimulti/{optim}'
        os.makedirs(result_dir, exist_ok=True)
        for method in METHODS:
            print('Init method', method)
            global_result_path = f'{result_dir}/{method}'
            # show_results(global_result_path)
            # sys.exit(0)
            if not os.path.exists(global_result_path + '.csv'):
                with open(global_result_path + '.csv', 'wt') as csv:
                    csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\n')
            with open(global_result_path + '.csv', 'at') as csv:
                for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
                    print('init', dataset)
                    local_result_path = global_result_path + '_' + dataset
                    if os.path.exists(local_result_path + '.dataframe'):
                        print(f'result file {local_result_path}.dataframe already exist; skipping')
                        report = pd.read_csv(local_result_path+'.dataframe')
                        print(report["mae"].mean())
                        # data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
                        # csv.write(f'{method}\t{data.name}\t{report["mae"].mean():.5f}\t{report["mrae"].mean():.5f}\t{report["kld"].mean():.5f}\n')
                        continue
                    with qp.util.temp_seed(SEED):
                        param_grid, quantifier = new_method(method, max_iter=3000)
                        data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
                        # model selection
                        train, test = data.train_test
                        train, val = train.split_stratified(random_state=SEED)
                        protocol = UPP(val, repeats=n_bags_val)
                        modsel = GridSearchQ(
                            quantifier, param_grid, protocol, refit=True, n_jobs=-1, verbose=True, error=optim
                        )
                        try:
                            modsel.fit(*train.Xy)
                            print(f'best params {modsel.best_params_}')
                            print(f'best score {modsel.best_score_}')
                            pickle.dump(
                                (modsel.best_params_, modsel.best_score_,),
                                open(f'{local_result_path}.hyper.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)
                            quantifier = modsel.best_model()
                        except:
                            print('something went wrong... trying to fit the default model')
                            quantifier.fit(*train.Xy)
                        protocol = UPP(test, repeats=n_bags_test)
                        report = qp.evaluation.evaluation_report(
                            quantifier, protocol, error_metrics=['mae', 'mrae', 'kld'], verbose=True
                        )
                        report.to_csv(f'{local_result_path}.dataframe')
                        print(f'{method}\t{data.name}\t{report["mae"].mean():.5f}\t{report["mrae"].mean():.5f}\t{report["kld"].mean():.5f}\n')
                        csv.write(f'{method}\t{data.name}\t{report["mae"].mean():.5f}\t{report["mrae"].mean():.5f}\t{report["kld"].mean():.5f}\n')
                        csv.flush()
        show_results(global_result_path)
--- a/docs/source/manuals/methods.md
+++ b/docs/source/manuals/methods.md
@ -604,7 +604,10 @@ estim_prevalence = model.predict(dataset.test.X)
 _(New in v0.2.0!)_ Some quantification methods go beyond providing a single point estimate of class prevalence values and also produce confidence regions, which characterize the uncertainty around the point estimate. In QuaPy, two such methods are currently implemented:
-* Aggregative Bootstrap: The Aggregative Bootstrap method extends any aggregative quantifier by generating confidence regions for class prevalence estimates through bootstrapping. Key features of this method include:
+* Aggregative Bootstrap: The Aggregative Bootstrap method extends any aggregative quantifier by generating confidence regions for class prevalence estimates through bootstrapping. The method is described in the paper [Moreo, A., Salvati, N.
    An Efficient Method for Deriving Confidence Intervals in Aggregative Quantification.
    Learning to Quantify: Methods and Applications (LQ 2025), co-located at ECML-PKDD 2025.
    pp 12-33, Porto (Portugal)](https://lq-2025.github.io/proceedings/CompleteVolume.pdf). Key features of this method include:
    * Optimized Computation: The bootstrap is applied to pre-classified instances, significantly speeding up training and inference.
 During training, bootstrap repetitions are performed only after training the classifier once. These repetitions are used to train multiple aggregation functions.
--- a/docs/source/quapy.method.rst
+++ b/docs/source/quapy.method.rst
@ -60,6 +60,14 @@ quapy.method.composable module
   :undoc-members:
   :show-inheritance:
 quapy.method.confidence module
 ------------------------------
 .. automodule:: quapy.method.confidence
   :members:
   :undoc-members:
   :show-inheritance:
 Module contents
 ---------------
--- a/examples/16.confidence_regions.py
+++ b/examples/16.confidence_regions.py
@ -36,7 +36,7 @@ with qp.util.temp_seed(0):
    true_prev = shifted_test.prevalence()
    # by calling "quantify_conf", we obtain the point estimate and the confidence intervals around it
-    pred_prev, conf_intervals = pacc.quantify_conf(shifted_test.X)
+    pred_prev, conf_intervals = pacc.predict_conf(shifted_test.X)
    # conf_intervals is an instance of ConfidenceRegionABC, which provides some useful utilities like:
    # - coverage: a function which computes the fraction of true values that belong to the confidence region
--- a/examples/18.ReadMe_for_text_analysis.py
+++ b/examples/18.ReadMe_for_text_analysis.py
@ -0,0 +1,60 @@
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.feature_selection import SelectKBest, chi2
 import quapy as qp
 from quapy.method.non_aggregative import ReadMe
 import quapy.functional as F
 from sklearn.pipeline import Pipeline
 """
 This example showcases how to use the non-aggregative method ReadMe proposed by Hopkins and King.
 This method is for text analysis, so let us first instantiate a dataset for sentiment quantification (we
 use IMDb for this example). The method is quite computationally expensive, so we will restrict the training
 set to 1000 documents only.
 """
 reviews = qp.datasets.fetch_reviews('imdb').reduce(n_train=1000, random_state=0)
 """
 We need to convert text to bag-of-words representations. Actually, ReadMe requires the representations to be
 binary (i.e., storing a 1 whenever a document contains certain word, or 0 otherwise), so we will not use
 TFIDF weighting. We will also retain the top 1000 most important features according to chi2.
 """
 encode_0_1 = Pipeline([
    ('0_1_terms', CountVectorizer(min_df=5, binary=True)),
    ('feat_sel',  SelectKBest(chi2, k=1000))
 ])
 train, test = qp.data.preprocessing.instance_transformation(reviews, encode_0_1, inplace=True).train_test
 """
 We now instantiate ReadMe, with the prob_model='full' (default behaviour, implementing the Hopkins and King original
 idea). This method consists of estimating Q(Y) by solving:
 Q(X) = \sum_i Q(X|Y=i) Q(Y=i)
 without resorting to estimating the posteriors Q(Y=i|X), by solving a linear least-squares problem.
 However, since Q(X) and Q(X|Y=i) are matrices of shape (2^K, 1) and (2^K, n), with K the number of features
 and n the number of classes, their calculation becomes intractable. ReadMe instead performs bagging (i.e., it
 samples small sets of features and averages the results) thus reducing K to a few terms. In our example we
 set K (bagging_range) to 20, and the number of bagging_trials to 100. 
 ReadMe also computes confidence intervals via bootstrap. We set the number of bootstrap trials to 100.  
 """
 readme = ReadMe(prob_model='full', bootstrap_trials=100, bagging_trials=100, bagging_range=20, random_state=0, verbose=True)
 readme.fit(*train.Xy)   # <- there is actually nothing happening here (only bootstrap resampling); the method is "lazy"
                        #    and postpones most of the calculations to the test phase.
 # since the method is slow, we will only test 3 cases with different imbalances
 few_negatives = [0.25, 0.75]
 balanced = [0.5, 0.5]
 few_positives = [0.75, 0.25]
 for test_prev in [few_negatives, balanced, few_positives]:
    sample = reviews.test.sampling(500, *test_prev, random_state=0)  # draw sets of 500 documents with desired prevs
    prev_estim, conf = readme.predict_conf(sample.X)
    err = qp.error.mae(sample.prevalence(), prev_estim)
    print(f'true-prevalence={F.strprev(sample.prevalence())},\n'
          f'predicted-prevalence={F.strprev(prev_estim)}, with confidence intervals {conf},\n'
          f'MAE={err:.4f}')
--- a/experimental_non_aggregative/custom_vectorizers.py
+++ b/experimental_non_aggregative/custom_vectorizers.py
@ -0,0 +1,254 @@
 from scipy.sparse import csc_matrix, csr_matrix
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.feature_extraction.text import TfidfTransformer, TfidfVectorizer, CountVectorizer
 import numpy as np
 from joblib import Parallel, delayed
 import sklearn
 import math
 from scipy.stats import t
 class ContTable:
    def __init__(self, tp=0, tn=0, fp=0, fn=0):
        self.tp=tp
        self.tn=tn
        self.fp=fp
        self.fn=fn
    def get_d(self): return self.tp + self.tn + self.fp + self.fn
    def get_c(self): return self.tp + self.fn
    def get_not_c(self): return self.tn + self.fp
    def get_f(self): return self.tp + self.fp
    def get_not_f(self): return self.tn + self.fn
    def p_c(self): return (1.0*self.get_c())/self.get_d()
    def p_not_c(self): return 1.0-self.p_c()
    def p_f(self): return (1.0*self.get_f())/self.get_d()
    def p_not_f(self): return 1.0-self.p_f()
    def p_tp(self): return (1.0*self.tp) / self.get_d()
    def p_tn(self): return (1.0*self.tn) / self.get_d()
    def p_fp(self): return (1.0*self.fp) / self.get_d()
    def p_fn(self): return (1.0*self.fn) / self.get_d()
    def tpr(self):
        c = 1.0*self.get_c()
        return self.tp / c if c > 0.0 else 0.0
    def fpr(self):
        _c = 1.0*self.get_not_c()
        return self.fp / _c if _c > 0.0 else 0.0
 def __ig_factor(p_tc, p_t, p_c):
    den = p_t * p_c
    if den != 0.0 and p_tc != 0:
        return p_tc * math.log(p_tc / den, 2)
    else:
        return 0.0
 def information_gain(cell):
    return __ig_factor(cell.p_tp(), cell.p_f(), cell.p_c()) + \
           __ig_factor(cell.p_fp(), cell.p_f(), cell.p_not_c()) +\
           __ig_factor(cell.p_fn(), cell.p_not_f(), cell.p_c()) + \
           __ig_factor(cell.p_tn(), cell.p_not_f(), cell.p_not_c())
 def squared_information_gain(cell):
    return information_gain(cell)**2
 def posneg_information_gain(cell):
    ig = information_gain(cell)
    if cell.tpr() < cell.fpr():
        return -ig
    else:
        return ig
 def pos_information_gain(cell):
    if cell.tpr() < cell.fpr():
        return 0
    else:
        return information_gain(cell)
 def pointwise_mutual_information(cell):
    return __ig_factor(cell.p_tp(), cell.p_f(), cell.p_c())
 def gss(cell):
    return cell.p_tp()*cell.p_tn() - cell.p_fp()*cell.p_fn()
 def chi_square(cell):
    den = cell.p_f() * cell.p_not_f() * cell.p_c() * cell.p_not_c()
    if den==0.0: return 0.0
    num = gss(cell)**2
    return num / den
 def conf_interval(xt, n):
    if n>30:
        z2 = 3.84145882069 # norm.ppf(0.5+0.95/2.0)**2
    else:
        z2 = t.ppf(0.5 + 0.95 / 2.0, df=max(n-1,1)) ** 2
    p = (xt + 0.5 * z2) / (n + z2)
    amplitude = 0.5 * z2 * math.sqrt((p * (1.0 - p)) / (n + z2))
    return p, amplitude
 def strength(minPosRelFreq, minPos, maxNeg):
    if minPos > maxNeg:
        return math.log(2.0 * minPosRelFreq, 2.0)
    else:
        return 0.0
 #set cancel_features=True to allow some features to be weighted as 0 (as in the original article)
 #however, for some extremely imbalanced dataset caused all documents to be 0
 def conf_weight(cell, cancel_features=False):
    c = cell.get_c()
    not_c = cell.get_not_c()
    tp = cell.tp
    fp = cell.fp
    pos_p, pos_amp = conf_interval(tp, c)
    neg_p, neg_amp = conf_interval(fp, not_c)
    min_pos = pos_p-pos_amp
    max_neg = neg_p+neg_amp
    den = (min_pos + max_neg)
    minpos_relfreq = min_pos / (den if den != 0 else 1)
    str_tplus = strength(minpos_relfreq, min_pos, max_neg);
    if str_tplus == 0 and not cancel_features:
        return 1e-20
    return str_tplus
 def get_tsr_matrix(cell_matrix, tsr_score_funtion):
    nC = len(cell_matrix)
    nF = len(cell_matrix[0])
    tsr_matrix = [[tsr_score_funtion(cell_matrix[c,f]) for f in range(nF)] for c in range(nC)]
    return np.array(tsr_matrix)
 def feature_label_contingency_table(positive_document_indexes, feature_document_indexes, nD):
    tp_ = len(positive_document_indexes & feature_document_indexes)
    fp_ = len(feature_document_indexes - positive_document_indexes)
    fn_ = len(positive_document_indexes - feature_document_indexes)
    tn_ = nD - (tp_ + fp_ + fn_)
    return ContTable(tp=tp_, tn=tn_, fp=fp_, fn=fn_)
 def category_tables(feature_sets, category_sets, c, nD, nF):
    return [feature_label_contingency_table(category_sets[c], feature_sets[f], nD) for f in range(nF)]
 def get_supervised_matrix(coocurrence_matrix, label_matrix, n_jobs=-1):
    """
    Computes the nC x nF supervised matrix M where Mcf is the 4-cell contingency table for feature f and class c.
    Efficiency O(nF x nC x log(S)) where S is the sparse factor
    """
    nD, nF = coocurrence_matrix.shape
    nD2, nC = label_matrix.shape
    if nD != nD2:
        raise ValueError('Number of rows in coocurrence matrix shape %s and label matrix shape %s is not consistent' %
                         (coocurrence_matrix.shape,label_matrix.shape))
    def nonzero_set(matrix, col):
        return set(matrix[:, col].nonzero()[0])
    if isinstance(coocurrence_matrix, csr_matrix):
        coocurrence_matrix = csc_matrix(coocurrence_matrix)
    feature_sets = [nonzero_set(coocurrence_matrix, f) for f in range(nF)]
    category_sets = [nonzero_set(label_matrix, c) for c in range(nC)]
    cell_matrix = Parallel(n_jobs=n_jobs, backend="threading")(
        delayed(category_tables)(feature_sets, category_sets, c, nD, nF) for c in range(nC)
    )
    return np.array(cell_matrix)
 class TSRweighting(BaseEstimator,TransformerMixin):
    """
    Supervised Term Weighting function based on any Term Selection Reduction (TSR) function (e.g., information gain,
    chi-square, etc.) or, more generally, on any function that could be computed on the 4-cell contingency table for
    each category-feature pair.
    The supervised_4cell_matrix is a `(n_classes, n_words)` matrix containing the 4-cell contingency tables
    for each class-word pair, and can be pre-computed (e.g., during the feature selection phase) and passed as an
    argument.
    When `n_classes>1`, i.e., in multiclass scenarios, a global_policy is used in order to determine a
    single feature-score which informs about its relevance. Accepted policies include "max" (takes the max score
    across categories), "ave" and "wave" (take the average, or weighted average, across all categories -- weights
    correspond to the class prevalence), and "sum" (which sums all category scores).
    """
    def __init__(self, tsr_function, global_policy='max', supervised_4cell_matrix=None, sublinear_tf=True, norm='l2', min_df=3, n_jobs=-1):
        if global_policy not in ['max', 'ave', 'wave', 'sum']: raise ValueError('Global policy should be in {"max", "ave", "wave", "sum"}')
        self.tsr_function = tsr_function
        self.global_policy = global_policy
        self.supervised_4cell_matrix = supervised_4cell_matrix
        self.sublinear_tf = sublinear_tf
        self.norm = norm
        self.min_df = min_df
        self.n_jobs = n_jobs
    def fit(self, X, y):
        self.count_vectorizer = CountVectorizer(min_df=self.min_df)
        X = self.count_vectorizer.fit_transform(X)
        self.tf_vectorizer = TfidfTransformer(
            norm=None, use_idf=False, smooth_idf=False, sublinear_tf=self.sublinear_tf
        ).fit(X)
        if len(y.shape) == 1:
            y = np.expand_dims(y, axis=1)
        nD, nC = y.shape
        nF = len(self.tf_vectorizer.get_feature_names_out())
        if self.supervised_4cell_matrix is None:
            self.supervised_4cell_matrix = get_supervised_matrix(X, y, n_jobs=self.n_jobs)
        else:
            if self.supervised_4cell_matrix.shape != (nC, nF):
                raise ValueError("Shape of supervised information matrix is inconsistent with X and y")
        tsr_matrix = get_tsr_matrix(self.supervised_4cell_matrix, self.tsr_function)
        if self.global_policy == 'ave':
            self.global_tsr_vector = np.average(tsr_matrix, axis=0)
        elif self.global_policy == 'wave':
            category_prevalences = [sum(y[:,c])*1.0/nD for c in range(nC)]
            self.global_tsr_vector = np.average(tsr_matrix, axis=0, weights=category_prevalences)
        elif self.global_policy == 'sum':
            self.global_tsr_vector = np.sum(tsr_matrix, axis=0)
        elif self.global_policy == 'max':
            self.global_tsr_vector = np.amax(tsr_matrix, axis=0)
        return self
    def fit_transform(self, X, y):
        return self.fit(X,y).transform(X)
    def transform(self, X):
        if not hasattr(self, 'global_tsr_vector'): raise NameError('TSRweighting: transform method called before fit.')
        X = self.count_vectorizer.transform(X)
        tf_X = self.tf_vectorizer.transform(X).toarray()
        weighted_X = np.multiply(tf_X, self.global_tsr_vector)
        if self.norm is not None and self.norm!='none':
            weighted_X = sklearn.preprocessing.normalize(weighted_X, norm=self.norm, axis=1, copy=False)
        return csr_matrix(weighted_X)
--- a/experimental_non_aggregative/method_dxs.py
+++ b/experimental_non_aggregative/method_dxs.py
@ -0,0 +1,208 @@
 from scipy.sparse import issparse
 from sklearn.decomposition import TruncatedSVD
 from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
 from sklearn.linear_model import LogisticRegression
 from sklearn.preprocessing import StandardScaler
 import quapy as qp
 from data import LabelledCollection
 import numpy as np
 from experimental_non_aggregative.custom_vectorizers import *
 from method._kdey import KDEBase
 from protocol import APP
 from quapy.method.aggregative import HDy, DistributionMatchingY
 from quapy.method.base import BaseQuantifier
 from scipy import optimize
 import pandas as pd
 import quapy.functional as F
 # TODO: explore the bernoulli (term presence/absence) variant
 # TODO: explore the multinomial (term frequency) variant
 # TODO: explore the multinomial + length normalization variant
 # TODO: consolidate the TSR-variant (e.g., using information gain) variant;
 #   - works better with the idf?
 #   - works better with length normalization?
 #   - etc
 class DxS(BaseQuantifier):
    def __init__(self, vectorizer=None, divergence='topsoe'):
        self.vectorizer = vectorizer
        self.divergence = divergence
    # def __as_distribution(self, instances):
    #     return np.asarray(instances.sum(axis=0) / instances.sum()).flatten()
    def __as_distribution(self, instances):
        dist = instances.mean(axis=0)
        return np.asarray(dist).flatten()
    def fit(self, text_instances, labels):
        classes = np.unique(labels)
        if self.vectorizer is not None:
            text_instances = self.vectorizer.fit_transform(text_instances, y=labels)
        distributions = []
        for class_i in classes:
            distributions.append(self.__as_distribution(text_instances[labels == class_i]))
        self.validation_distribution = np.asarray(distributions)
        return self
    def predict(self, text_instances):
        if self.vectorizer is not None:
            text_instances = self.vectorizer.transform(text_instances)
        test_distribution = self.__as_distribution(text_instances)
        divergence = qp.functional.get_divergence(self.divergence)
        n_classes, n_feats = self.validation_distribution.shape
        def match(prev):
            prev = np.expand_dims(prev, axis=0)
            mixture_distribution = (prev @ self.validation_distribution).flatten()
            return divergence(test_distribution, mixture_distribution)
        # the initial point is set as the uniform distribution
        uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
        # solutions are bounded to those contained in the unit-simplex
        bounds = tuple((0, 1) for x in range(n_classes))  # values in [0,1]
        constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)})  # values summing up to 1
        r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
        return r.x
 class KDExML(BaseQuantifier, KDEBase):
    def __init__(self, bandwidth=0.1, standardize=False):
        self._check_bandwidth(bandwidth)
        self.bandwidth = bandwidth
        self.standardize = standardize
    def fit(self, X, y):
        classes = sorted(np.unique(y))
        if self.standardize:
            self.scaler = StandardScaler()
            X = self.scaler.fit_transform(X)
        if issparse(X):
            X = X.toarray()
        self.mix_densities = self.get_mixture_components(X, y, classes, self.bandwidth)
        return self
    def predict(self, X):
        """
        Searches for the mixture model parameter (the sought prevalence values) that maximizes the likelihood
        of the data (i.e., that minimizes the negative log-likelihood)
        :param X: instances in the sample
        :return: a vector of class prevalence estimates
        """
        epsilon = 1e-10
        if issparse(X):
            X = X.toarray()
        n_classes = len(self.mix_densities)
        if self.standardize:
            X = self.scaler.transform(X)
        test_densities = [self.pdf(kde_i, X) for kde_i in self.mix_densities]
        def neg_loglikelihood(prev):
            test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))
            test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
            return  -np.sum(test_loglikelihood)
        return F.optim_minimize(neg_loglikelihood, n_classes)
 if __name__ == '__main__':
    qp.environ['SAMPLE_SIZE'] = 250
    qp.environ['N_JOBS'] = -1
    min_df = 10
    # dataset = 'imdb'
    repeats = 10
    error = 'mae'
    div = 'topsoe'
    # generates tuples (dataset, method, method_name)
    # (the dataset is needed for methods that process the dataset differently)
    def gen_methods():
        for dataset in qp.datasets.REVIEWS_SENTIMENT_DATASETS:
            data = qp.datasets.fetch_reviews(dataset, tfidf=False)
            # bernoulli_vectorizer = CountVectorizer(min_df=min_df, binary=True)
            # dxs = DxS(divergence=div, vectorizer=bernoulli_vectorizer)
            # yield data, dxs, 'DxS-Bernoulli'
            #
            # multinomial_vectorizer = CountVectorizer(min_df=min_df, binary=False)
            # dxs = DxS(divergence=div, vectorizer=multinomial_vectorizer)
            # yield data, dxs, 'DxS-multinomial'
            #
            # tf_vectorizer = TfidfVectorizer(sublinear_tf=False, use_idf=False, min_df=min_df, norm=None)
            # dxs = DxS(divergence=div, vectorizer=tf_vectorizer)
            # yield data, dxs, 'DxS-TF'
            #
            # logtf_vectorizer = TfidfVectorizer(sublinear_tf=True, use_idf=False, min_df=min_df, norm=None)
            # dxs = DxS(divergence=div, vectorizer=logtf_vectorizer)
            # yield data, dxs, 'DxS-logTF'
            #
            # tfidf_vectorizer = TfidfVectorizer(use_idf=True, min_df=min_df, norm=None)
            # dxs = DxS(divergence=div, vectorizer=tfidf_vectorizer)
            # yield data, dxs, 'DxS-TFIDF'
            #
            # tfidf_vectorizer = TfidfVectorizer(use_idf=True, min_df=min_df, norm='l2')
            # dxs = DxS(divergence=div, vectorizer=tfidf_vectorizer)
            # yield data, dxs, 'DxS-TFIDF-l2'
            tsr_vectorizer = TSRweighting(tsr_function=information_gain, min_df=min_df, norm='l2')
            dxs = DxS(divergence=div, vectorizer=tsr_vectorizer)
            yield data, dxs, 'DxS-TFTSR-l2'
            data = qp.datasets.fetch_reviews(dataset, tfidf=True, min_df=min_df)
            kdex = KDExML()
            reduction = TruncatedSVD(n_components=100, random_state=0)
            red_data = qp.data.preprocessing.instance_transformation(data, transformer=reduction, inplace=False)
            yield red_data, kdex, 'KDEx'
            hdy = HDy(LogisticRegression())
            yield data, hdy, 'HDy'
            # dm = DistributionMatchingY(LogisticRegression(), divergence=div, nbins=5)
            # yield data, dm, 'DM-5b'
            #
            # dm = DistributionMatchingY(LogisticRegression(), divergence=div, nbins=10)
            # yield data, dm, 'DM-10b'
    result_path = 'results.csv'
    with open(result_path, 'wt') as csv:
        csv.write(f'Method\tDataset\tMAE\tMRAE\n')
        for data, quantifier, quant_name in gen_methods():
            quantifier.fit(*data.training.Xy)
            report = qp.evaluation.evaluation_report(quantifier, APP(data.test, repeats=repeats), error_metrics=['mae','mrae'], verbose=True)
            means = report.mean(numeric_only=True)
            csv.write(f'{quant_name}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
    df = pd.read_csv(result_path, sep='\t')
    # print(df)
    pv = df.pivot_table(index='Method', columns="Dataset", values=["MAE", "MRAE"])
    print(pv)
--- a/quapy/init.py
+++ b/quapy/init.py
@ -13,7 +13,7 @@ from . import model_selection
 from . import classification
 import os
-__version__ = '0.2.0'
+__version__ = '0.2.1'
 def _default_cls():
--- a/quapy/data/base.py
+++ b/quapy/data/base.py
@ -33,7 +33,6 @@ class LabelledCollection:
        else:
            self.instances = np.asarray(instances)
        self.labels = np.asarray(labels)
        n_docs = len(self)
        if classes is None:
            self.classes_ = F.classes_from_labels(self.labels)
        else:
@ -41,7 +40,13 @@ class LabelledCollection:
            self.classes_.sort()
            if len(set(self.labels).difference(set(classes))) > 0:
                raise ValueError(f'labels ({set(self.labels)}) contain values not included in classes_ ({set(classes)})')
-        self.index = {class_: np.arange(n_docs)[self.labels == class_] for class_ in self.classes_}
+        self._index = None
    @property
    def index(self):
        if not hasattr(self, '_index') or self._index is None:
            self._index = {class_: np.arange(len(self))[self.labels == class_] for class_ in self.classes_}
        return self._index
    @classmethod
    def load(cls, path: str, loader_func: callable, classes=None, **loader_kwargs):
--- a/quapy/data/preprocessing.py
+++ b/quapy/data/preprocessing.py
@ -10,6 +10,37 @@ from quapy.util import map_parallel
 from .base import LabelledCollection
 def instance_transformation(dataset:Dataset, transformer, inplace=False):
    """
    Transforms a :class:`quapy.data.base.Dataset` applying the `fit_transform` and `transform` functions
    of a (sklearn's) transformer.
    :param dataset: a :class:`quapy.data.base.Dataset` where the instances of training and test collections are
        lists of str
    :param transformer: TransformerMixin implementing `fit_transform` and `transform` functions
    :param inplace: whether or not to apply the transformation inplace (True), or to a new copy (False, default)
    :return: a new :class:`quapy.data.base.Dataset` with transformed instances (if inplace=False) or a reference to the
        current Dataset (if inplace=True) where the instances have been transformed
    """
    training_transformed = transformer.fit_transform(*dataset.training.Xy)
    test_transformed = transformer.transform(dataset.test.X)
    orig_name = dataset.name
    if inplace:
        dataset.training = LabelledCollection(training_transformed, dataset.training.labels, dataset.classes_)
        dataset.test = LabelledCollection(test_transformed, dataset.test.labels, dataset.classes_)
        if hasattr(transformer, 'vocabulary_'):
            dataset.vocabulary = transformer.vocabulary_
        return dataset
    else:
        training = LabelledCollection(training_transformed, dataset.training.labels.copy(), dataset.classes_)
        test = LabelledCollection(test_transformed, dataset.test.labels.copy(), dataset.classes_)
        vocab = None
        if hasattr(transformer, 'vocabulary_'):
            vocab = transformer.vocabulary_
        return Dataset(training, test, vocabulary=vocab, name=orig_name)
 def text2tfidf(dataset:Dataset, min_df=3, sublinear_tf=True, inplace=False, **kwargs):
    """
    Transforms a :class:`quapy.data.base.Dataset` of textual instances into a :class:`quapy.data.base.Dataset` of
@ -29,18 +60,7 @@ def text2tfidf(dataset:Dataset, min_df=3, sublinear_tf=True, inplace=False, **kw
    __check_type(dataset.test.instances, np.ndarray, str)
    vectorizer = TfidfVectorizer(min_df=min_df, sublinear_tf=sublinear_tf, **kwargs)
-    training_documents = vectorizer.fit_transform(dataset.training.instances)
+    return instance_transformation(dataset, vectorizer, inplace)
    test_documents = vectorizer.transform(dataset.test.instances)
    if inplace:
        dataset.training = LabelledCollection(training_documents, dataset.training.labels, dataset.classes_)
        dataset.test = LabelledCollection(test_documents, dataset.test.labels, dataset.classes_)
        dataset.vocabulary = vectorizer.vocabulary_
        return dataset
    else:
        training = LabelledCollection(training_documents, dataset.training.labels.copy(), dataset.classes_)
        test = LabelledCollection(test_documents, dataset.test.labels.copy(), dataset.classes_)
        return Dataset(training, test, vectorizer.vocabulary_)
 def reduce_columns(dataset: Dataset, min_df=5, inplace=False):
--- a/quapy/error.py
+++ b/quapy/error.py
@ -3,6 +3,7 @@
 import numpy as np
 from sklearn.metrics import f1_score
 import quapy as qp
 from functional import CLRtransformation
 def from_name(err_name):
@ -128,6 +129,59 @@ def se(prevs_true, prevs_hat):
    return ((prevs_hat - prevs_true) ** 2).mean(axis=-1)
 def sre(prevs_true, prevs_hat, prevs_train, eps=0.):
    """
    The squared ratio error is defined as
    :math:`SRE(p,\\hat{p},p^{tr})=\\frac{1}{|\\mathcal{Y}|}\\sum_{i \\in \\mathcal{Y}}(w_i-\\hat{w}_i)^2`,
    where
    :math:`w_i=\\frac{p_i}{p^{tr}_i}=\\frac{Q(Y=i)}{P(Y=i)}`,
    and `\\hat{w}_i` is the estimate obtained by replacing the true test prior with an estimate, and
     :math:`\\mathcal{Y}` are the classes of interest
    :param prevs_true: array-like, true prevalence values
    :param prevs_hat: array-like, estimated prevalence values
    :param prevs_train: array-like, training prevalence values
    :param eps: float, for smoothing the prevalence values. It is 0 by default (no smoothing), meaning the
        training prevalence is expected to be >0 everywhere.
    :return: the squared ratio error
    """
    prevs_true = np.asarray(prevs_true)
    prevs_hat = np.asarray(prevs_hat)
    prevs_train = np.asarray(prevs_train)
    assert prevs_true.shape == prevs_hat.shape, f'wrong shape {prevs_true.shape=} vs {prevs_hat.shape=}'
    assert prevs_true.shape[-1]==prevs_train.shape[-1], 'wrong shape for training prevalence'
    if eps>0:
        prevs_true = smooth(prevs_true, eps)
        prevs_hat = smooth(prevs_hat, eps)
        prevs_train = smooth(prevs_train, eps)
    N = prevs_true.shape[-1]
    w = prevs_true / prevs_train
    w_hat = prevs_hat / prevs_train
    return (1./N) * (w - w_hat)**2.
 def msre(prevs_true, prevs_hat, prevs_train, eps=0.):
    """
    Returns the mean across all experiments. See :function:`sre`.
    :param prevs_true: array-like, true prevalence values of shape (n_experiments, n_classes,) or (n_classes,)
    :param prevs_hat: array-like, estimated prevalence values of shape equal to prevs_true
    :param prevs_train: array-like, training prevalence values of  (n_experiments, n_classes,) or (n_classes,)
    :param eps: float, for smoothing the prevalence values. It is 0 by default (no smoothing), meaning the
        training prevalence is expected to be >0 everywhere.
    :return: the squared ratio error
    """
    return np.mean(squared_ratio_error(prevs_true, prevs_hat, prevs_train, eps))
 def dist_aitchison(prevs_true, prevs_hat):
    clr = CLRtransformation()
    return np.linalg.norm(clr(prevs_true) - clr(prevs_hat), axis=-1)
 def mean_dist_aitchison(prevs_true, prevs_hat):
    return np.mean(dist_aitchison(prevs_true, prevs_hat))
 def mkld(prevs_true, prevs_hat, eps=None):
    """Computes the mean Kullback-Leibler divergence (see :meth:`quapy.error.kld`) across the
    sample pairs. The distributions are smoothed using the `eps` factor
@ -374,8 +428,8 @@ def __check_eps(eps=None):
 CLASSIFICATION_ERROR = {f1e, acce}
-QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld}
+QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld, msre, mean_dist_aitchison}
-QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld}
+QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld, sre, dist_aitchison}
 QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, nrae, mkld, mnkld, mrae}
 CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
 QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
@ -387,6 +441,7 @@ ERROR_NAMES = \
 f1_error = f1e
 acc_error = acce
 mean_absolute_error = mae
 squared_ratio_error = sre
 absolute_error = ae
 mean_relative_absolute_error = mrae
 relative_absolute_error = rae
--- a/quapy/functional.py
+++ b/quapy/functional.py
@ -1,10 +1,15 @@
 import warnings
 from abc import ABC, abstractmethod
 from collections import defaultdict
 from functools import lru_cache
 from typing import Literal, Union, Callable
 from numpy.typing import ArrayLike
 import scipy
 import numpy as np
 from scipy.special import softmax
 import quapy as qp
 # ------------------------------------------------------------------------------------------
@ -583,8 +588,8 @@ def solve_adjustment(
    """
    Function that tries to solve for :math:`p` the equation :math:`q = M p`, where :math:`q` is the vector of
    `unadjusted counts` (as estimated, e.g., via classify and count) with :math:`q_i` an estimate of
-    :math:`P(\hat{Y}=y_i)`, and where :math:`M` is the matrix of `class-conditional rates` with :math:`M_{ij}` an
+    :math:`P(\\hat{Y}=y_i)`, and where :math:`M` is the matrix of `class-conditional rates` with :math:`M_{ij}` an
-    estimate of :math:`P(\hat{Y}=y_i|Y=y_j)`.
+    estimate of :math:`P(\\hat{Y}=y_i|Y=y_j)`.
    :param class_conditional_rates: array of shape `(n_classes, n_classes,)` with entry `(i,j)` being the estimate
        of :math:`P(\hat{Y}=y_i|Y=y_j)`, that is, the probability that an instance that belongs to class :math:`y_j`
@ -649,3 +654,96 @@ def solve_adjustment(
        raise ValueError(f'unknown {solver=}')
 # ------------------------------------------------------------------------------------------
 # Transformations from Compositional analysis
 # ------------------------------------------------------------------------------------------
 class CompositionalTransformation(ABC):
    """
    Abstract class of transformations from compositional data.
    Basically, callable functions with an "inverse" function.
    """
    @abstractmethod
    def __call__(self, X): ...
    @abstractmethod
    def inverse(self, Z): ...
    EPSILON=1e-6
 class CLRtransformation(CompositionalTransformation):
    """
    Centered log-ratio (CLR), from compositional analysis
    """
    def __call__(self, X):
        """
        Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
        actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
        :param X: np.ndarray of (n_instances, n_dimensions) to be transformed
        :param epsilon: small float for prevalence smoothing
        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
        """
        X = np.asarray(X)
        X = qp.error.smooth(X, self.EPSILON)
        G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True))  # geometric mean
        return np.log(X / G)
    def inverse(self, Z):
        """
        Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
        :param Z: np.ndarray of (n_instances, n_dimensions) to be transformed
        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
        """
        return scipy.special.softmax(Z, axis=-1)
 class ILRtransformation(CompositionalTransformation):
    """
    Isometric log-ratio (ILR), from compositional analysis
    """
    def __call__(self, X):
        X = np.asarray(X)
        X = qp.error.smooth(X, self.EPSILON)
        k = X.shape[-1]
        V = self.get_V(k)  # (k-1, k)
        logp = np.log(X)
        return logp @ V.T
    def inverse(self, Z):
        Z = np.asarray(Z)
        # get dimension
        k_minus_1 = Z.shape[-1]
        k = k_minus_1 + 1
        V = self.get_V(k)  # (k-1, k)
        logp = Z @ V
        p = np.exp(logp)
        p = p / np.sum(p, axis=-1, keepdims=True)
        return p
    @lru_cache(maxsize=None)
    def get_V(self, k):
        def helmert_matrix(k):
            """
            Returns the (k x k) Helmert matrix.
            """
            H = np.zeros((k, k))
            for i in range(1, k):
                H[i, :i] = 1
                H[i, i] = -(i)
                H[i] = H[i] / np.sqrt(i * (i + 1))
            # row 0 stays zeros; will be discarded
            return H
        def ilr_basis(k):
            """
            Constructs an orthonormal ILR basis using the Helmert submatrix.
            Output shape: (k-1, k)
            """
            H = helmert_matrix(k)
            V = H[1:, :]  # remove first row of zeros
            return V
        return ilr_basis(k)
--- a/quapy/method/init.py
+++ b/quapy/method/init.py
@ -29,6 +29,7 @@ AGGREGATIVE_METHODS = {
    aggregative.KDEyHD,
    # aggregative.OneVsAllAggregative,
    confidence.BayesianCC,
    confidence.PQ,
 }
 BINARY_METHODS = {
@ -40,6 +41,7 @@ BINARY_METHODS = {
    aggregative.MAX,
    aggregative.MS,
    aggregative.MS2,
    confidence.PQ,
 }
 MULTICLASS_METHODS = {
--- a/quapy/method/_bayesian.py
+++ b/quapy/method/_bayesian.py
@ -1,13 +1,21 @@
 """
 Utility functions for `Bayesian quantification <https://arxiv.org/abs/2302.09159>`_ methods.
 """
 import contextlib
 import os
 import sys
 import numpy as np
 import importlib.resources
 try:
    import jax
    import jax.numpy as jnp
    import numpyro
    import numpyro.distributions as dist
    import stan
    import logging
    import stan.common
    DEPENDENCIES_INSTALLED = True
 except ImportError:
@ -15,6 +23,7 @@ except ImportError:
    jnp = None
    numpyro = None
    dist = None
    stan = None
    DEPENDENCIES_INSTALLED = False
@ -77,3 +86,71 @@ def sample_posterior(
    rng_key = jax.random.PRNGKey(seed)
    mcmc.run(rng_key, n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=n_y_and_c_labeled)
    return mcmc.get_samples()
 def load_stan_file():
    return importlib.resources.files('quapy.method').joinpath('stan/pq.stan').read_text(encoding='utf-8')
@contextlib.contextmanager
 def _suppress_stan_logging():
    with open(os.devnull, "w") as devnull:
        old_stderr = sys.stderr
        sys.stderr = devnull
        try:
            yield
        finally:
            sys.stderr = old_stderr
 def pq_stan(stan_code, n_bins, pos_hist, neg_hist, test_hist, number_of_samples, num_warmup, stan_seed):
    """
    Perform Bayesian prevalence estimation using a Stan model for probabilistic quantification.
    This function builds and samples from a Stan model that implements a bin-based Bayesian
    quantifier. It uses the class-conditional histograms of the classifier
    outputs for positive and negative examples, along with the test histogram, to estimate
    the posterior distribution of prevalence in the test set.
    Parameters
    ----------
    stan_code : str
        The Stan model code as a string. 
    n_bins : int
        Number of bins used to build the histograms for positive and negative examples.
    pos_hist : array-like of shape (n_bins,)
        Histogram counts of the classifier outputs for the positive class.
    neg_hist : array-like of shape (n_bins,)
        Histogram counts of the classifier outputs for the negative class.
    test_hist : array-like of shape (n_bins,)
        Histogram counts of the classifier outputs for the test set, binned using the same bins.
    number_of_samples : int
        Number of post-warmup samples to draw from the Stan posterior.
    num_warmup : int
        Number of warmup iterations for the sampler.
    stan_seed : int
        Random seed for Stan model compilation and sampling, ensuring reproducibility.
    Returns
    -------
    prev_samples : numpy.ndarray
        An array of posterior samples of the prevalence (`prev`) in the test set.
        Each element corresponds to one draw from the posterior distribution.
    """
    logging.getLogger("stan.common").setLevel(logging.ERROR)
    stan_data = {
            'n_bucket': n_bins,
            'train_neg': neg_hist.tolist(),
            'train_pos': pos_hist.tolist(),
            'test': test_hist.tolist(),
            'posterior': 1
        }
    with _suppress_stan_logging():
        stan_model = stan.build(stan_code, data=stan_data, random_seed=stan_seed)
        fit = stan_model.sample(num_chains=1, num_samples=number_of_samples,num_warmup=num_warmup)
    return fit['prev']
--- a/quapy/method/_kdey.py
+++ b/quapy/method/_kdey.py
@ -15,9 +15,11 @@ class KDEBase:
    """
    BANDWIDTH_METHOD = ['scott', 'silverman']
    KERNELS = ['gaussian', 'aitchison', 'ilr']
    @classmethod
-    def _check_bandwidth(cls, bandwidth):
+    def _check_bandwidth(cls, bandwidth, kernel):
        """
        Checks that the bandwidth parameter is correct
@ -27,32 +29,56 @@ class KDEBase:
        assert bandwidth in KDEBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
            f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values'
        if isinstance(bandwidth, float):
-            assert 0 < bandwidth < 1,  \
+            assert kernel!='gaussian' or (0 < bandwidth < 1),  \
-                "the bandwith for KDEy should be in (0,1), since this method models the unit simplex"
+                ("the bandwidth for a Gaussian kernel in KDEy should be in (0,1), "
                 "since this method models the unit simplex")
        return bandwidth
-    def get_kde_function(self, X, bandwidth):
+    @classmethod
    def _check_kernel(cls, kernel):
        """
        Checks that the kernel parameter is correct
        :param kernel: str
        :return: the validated kernel
        """
        assert kernel in KDEBase.KERNELS, f'unknown {kernel=}'
        return kernel
    def get_kde_function(self, X, bandwidth, kernel):
        """
        Wraps the KDE function from scikit-learn.
        :param X: data for which the density function is to be estimated
        :param bandwidth: the bandwidth of the kernel
        :param kernel: the kernel (valid ones are in KDEBase.KERNELS)
        :return: a scikit-learn's KernelDensity object
        """
        if kernel == 'aitchison':
            X = self.clr_transform(X)
        elif kernel == 'ilr':
            X = self.ilr_transform(X)
        return KernelDensity(bandwidth=bandwidth).fit(X)
-    def pdf(self, kde, X):
+    def pdf(self, kde, X, kernel):
        """
        Wraps the density evalution of scikit-learn's KDE. Scikit-learn returns log-scores (s), so this
        function returns :math:`e^{s}`
        :param kde: a previously fit KDE function
        :param X: the data for which the density is to be estimated
        :param kernel: the kernel (valid ones are in KDEBase.KERNELS)
        :return: np.ndarray with the densities
        """
        if kernel == 'aitchison':
            X = self.clr_transform(X)
        elif kernel == 'ilr':
            X = self.ilr_transform(X)
        return np.exp(kde.score_samples(X))
-    def get_mixture_components(self, X, y, classes, bandwidth):
+    def get_mixture_components(self, X, y, classes, bandwidth, kernel):
        """
        Returns an array containing the mixture components, i.e., the KDE functions for each class.
@ -60,6 +86,7 @@ class KDEBase:
        :param y: the class labels
        :param n_classes: integer, the number of classes
        :param bandwidth: float, the bandwidth of the kernel
        :param kernel: the kernel (valid ones are in KDEBase.KERNELS)
        :return: a list of KernelDensity objects, each fitted with the corresponding class-specific covariates
        """
        class_cond_X = []
@ -67,8 +94,19 @@ class KDEBase:
            selX = X[y==cat]
            if selX.size==0:
                selX = [F.uniform_prevalence(len(classes))]
            class_cond_X.append(selX)
-        return [self.get_kde_function(X_cond_yi, bandwidth) for X_cond_yi in class_cond_X]
+        return [self.get_kde_function(X_cond_yi, bandwidth, kernel) for X_cond_yi in class_cond_X]
    def clr_transform(self, X):
        if not hasattr(self, 'clr'):
            self.clr = F.CLRtransformation()
        return self.clr(X)
    def ilr_transform(self, X):
        if not hasattr(self, 'ilr'):
            self.ilr = F.ILRtransformation()
        return self.ilr(X)
 class KDEyML(AggregativeSoftQuantifier, KDEBase):
@ -107,17 +145,19 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
        are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
        for `k`); or as a tuple (X,y) defining the specific set of data to use for validation.
    :param bandwidth: float, the bandwidth of the Kernel
    :param kernel: kernel of KDE, valid ones are in KDEBase.KERNELS
    :param random_state: a seed to be set before fitting any base quantifier (default None)
    """
-    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=0.1,
+    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=0.1, kernel='gaussian',
                 random_state=None):
        super().__init__(classifier, fit_classifier, val_split)
-        self.bandwidth = KDEBase._check_bandwidth(bandwidth)
+        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel)
        self.kernel = self._check_kernel(kernel)
        self.random_state=random_state
    def aggregation_fit(self, classif_predictions, labels):
-        self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth)
+        self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth, self.kernel)
        return self
    def aggregate(self, posteriors: np.ndarray):
@ -131,10 +171,11 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
        with qp.util.temp_seed(self.random_state):
            epsilon = 1e-10
            n_classes = len(self.mix_densities)
-            test_densities = [self.pdf(kde_i, posteriors) for kde_i in self.mix_densities]
+            test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
            def neg_loglikelihood(prev):
-                test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))
+                # test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))
                test_mixture_likelihood = prev @ test_densities
                test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
                return  -np.sum(test_loglikelihood)
@ -191,7 +232,7 @@ class KDEyHD(AggregativeSoftQuantifier, KDEBase):
        super().__init__(classifier, fit_classifier, val_split)
        self.divergence = divergence
-        self.bandwidth = KDEBase._check_bandwidth(bandwidth)
+        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel='gaussian')
        self.random_state=random_state
        self.montecarlo_trials = montecarlo_trials
@ -278,7 +319,7 @@ class KDEyCS(AggregativeSoftQuantifier):
    def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=0.1):
        super().__init__(classifier, fit_classifier, val_split)
-        self.bandwidth = KDEBase._check_bandwidth(bandwidth)
+        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel='gaussian')
    def gram_matrix_mix_sum(self, X, Y=None):
        # this adapts the output of the rbf_kernel function (pairwise evaluations of Gaussian kernels k(x,y))
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -673,7 +673,7 @@ class PACC(AggregativeSoftQuantifier):
 class EMQ(AggregativeSoftQuantifier):
    """
    `Expectation Maximization for Quantification <https://ieeexplore.ieee.org/abstract/document/6789744>`_ (EMQ),
-    aka `Saerens-Latinne-Decaestecker` (SLD) algorithm.
+    aka `Saerens-Latinne-Decaestecker` (SLD) algorithm, or `Maximum Likelihood Label Shif` (MLLS).
    EMQ consists of using the well-known `Expectation Maximization algorithm` to iteratively update the posterior
    probabilities generated by a probabilistic classifier and the class prevalence estimates obtained via
    maximum-likelihood estimation, in a mutually recursive way, until convergence.
--- a/quapy/method/confidence.py
+++ b/quapy/method/confidence.py
@ -1,19 +1,21 @@
 import numpy as np
 from joblib import Parallel, delayed
 from sklearn.base import BaseEstimator
 from sklearn.metrics import confusion_matrix
 import quapy as qp
 import quapy.functional as F
 from quapy.functional import CompositionalTransformation, CLRtransformation, ILRtransformation
 from quapy.method import _bayesian
 from quapy.method.aggregative import AggregativeCrispQuantifier
 from quapy.data import LabelledCollection
-from quapy.method.aggregative import AggregativeQuantifier
+from quapy.method.aggregative import AggregativeQuantifier, AggregativeCrispQuantifier, AggregativeSoftQuantifier, BinaryAggregativeQuantifier
 from scipy.stats import chi2
 from sklearn.utils import resample
 from abc import ABC, abstractmethod
-from scipy.special import softmax, factorial
+from scipy.special import factorial
 import copy
 from functools import lru_cache
 from tqdm import tqdm
 """
 This module provides implementation of different types of confidence regions, and the implementation of Bootstrap
@ -80,6 +82,58 @@ class ConfidenceRegionABC(ABC):
        proportion = np.clip(self.coverage(uniform_simplex), 0., 1.)
        return proportion
    @property
    @abstractmethod
    def samples(self):
        """ Returns internal samples """
        ...
    def __contains__(self, p):
        """
        Overloads in operator, checks if `p` is contained in the region
        :param p: array-like
        :return: boolean
        """
        p = np.asarray(p)
        assert p.ndim==1, f'unexpected shape for point parameter'
        return self.coverage(p)==1.
    def closest_point_in_region(self, p, tol=1e-6, max_iter=30):
        """
        Finds the closes point to p that belongs to the region. Assumes the region is convex.
        :param p: array-like, the point
        :param tol: float, error tolerance
        :param max_iter: int, max number of iterations
        :returns: array-like, the closes point to p in the segment between p and the center of the region, that
            belongs to the region
        """
        p = np.asarray(p, dtype=float)
        # if p in region, returns p itself
        if p in self:
            return p.copy()
        # center of the region
        c = self.point_estimate()
        # binary search in [0,1], interpolation parameter
        # low=closest to p, high=closest to c
        low, high = 0.0, 1.0
        for _ in range(max_iter):
            mid = 0.5 * (low + high)
            x = p*(1-mid) + c*mid
            if x in self:
                high = mid
            else:
                low = mid
            if high - low < tol:
                break
        in_boundary = p*(1-high) + c*high
        return in_boundary
 class WithConfidenceABC(ABC):
    """
@ -88,20 +142,32 @@ class WithConfidenceABC(ABC):
    METHODS = ['intervals', 'ellipse', 'ellipse-clr']
    @abstractmethod
-    def quantify_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
+    def predict_conf(self, instances, confidence_level=0.95) -> (np.ndarray, ConfidenceRegionABC):
        """
-        Adds the method `quantify_conf` to the interface. This method returns not only the point-estimate, but
+        Adds the method `predict_conf` to the interface. This method returns not only the point-estimate, but
        also the confidence region around it.
        :param instances: a np.ndarray of shape (n_instances, n_features,)
-        :confidence_level: float in (0, 1)
+        :param confidence_level: float in (0, 1), default is 0.95
        :return: a tuple (`point_estimate`, `conf_region`), where `point_estimate` is a np.ndarray of shape
            (n_classes,) and  `conf_region` is an object from :class:`ConfidenceRegionABC`
        """
        ...
    def quantify_conf(self, instances, confidence_level=0.95) -> (np.ndarray, ConfidenceRegionABC):
        """
        Alias to `predict_conf`. This method returns not only the point-estimate, but
        also the confidence region around it.
        :param instances: a np.ndarray of shape (n_instances, n_features,)
        :param confidence_level: float in (0, 1), default is 0.95
        :return: a tuple (`point_estimate`, `conf_region`), where `point_estimate` is a np.ndarray of shape
            (n_classes,) and  `conf_region` is an object from :class:`ConfidenceRegionABC`
        """
        return self.predict_conf(instances=instances, confidence_level=confidence_level)
    @classmethod
-    def construct_region(cls, prev_estims, confidence_level=0.95, method='intervals'):
+    def construct_region(cls, prev_estims, confidence_level=0.95, method='intervals')->ConfidenceRegionABC:
        """
        Construct a confidence region given many prevalence estimations.
@ -136,7 +202,7 @@ def simplex_volume(n):
    return 1 / factorial(n)
-def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
+def within_ellipse_prop__(values, mean, prec_matrix, chi2_critical):
    """
    Checks the proportion of values that belong to the ellipse with center `mean` and precision matrix `prec_matrix`
    at a distance `chi2_critical`.
@ -169,110 +235,125 @@ def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
    return within_elipse * 1.0
-class ConfidenceEllipseSimplex(ConfidenceRegionABC):
+def within_ellipse_prop(values, mean, prec_matrix, chi2_critical):
    """
-    Instantiates a Confidence Ellipse in the probability simplex.
+        Checks the proportion of values that belong to the ellipse with center `mean` and precision matrix `prec_matrix`
        at a distance `chi2_critical`.
-    :param X: np.ndarray of shape (n_bootstrap_samples, n_classes)
+        :param values: a np.ndarray of shape (n_dim,) or (n_values, n_dim,)
-    :param confidence_level: float, the confidence level (default 0.95)
+        :param mean: a np.ndarray of shape (n_dim,) with the center of the ellipse
        :param prec_matrix: a np.ndarray with the precision matrix (inverse of the
            covariance matrix) of the ellipse. If this inverse cannot be computed
            then None must be passed
        :param chi2_critical: float, the chi2 critical value
        :return: float in [0,1], the fraction of values that are contained in the ellipse
            defined by the mean (center), the precision matrix (shape), and the chi2_critical value (distance).
            If `values` is only one value, then either 0. (not contained) or 1. (contained) is returned.
        """
    if prec_matrix is None:
        return 0.
    values = np.atleast_2d(values)
    diff = values - mean
    d_M_squared = np.sum(diff @ prec_matrix * diff, axis=-1)
    within_ellipse = d_M_squared <= chi2_critical
    if len(within_ellipse) == 1:
        return float(within_ellipse[0])
    else:
        return float(np.mean(within_ellipse))
 def closest_point_on_ellipsoid(p, mean, cov, chi2_critical, tol=1e-9, max_iter=100):
    """
    Computes the closest point on the ellipsoid defined by:
        (x - mean)^T cov^{-1} (x - mean) = chi2_critical
    """
-    def __init__(self, X, confidence_level=0.95):
+    p = np.asarray(p)
    mean = np.asarray(mean)
    Sigma = np.asarray(cov)
-        assert 0. < confidence_level < 1., f'{confidence_level=} must be in range(0,1)'
+    # Precompute precision matrix
    P = np.linalg.pinv(Sigma)
    d = P.shape[0]
-        X = np.asarray(X)
+    # Define v = p - mean
    v = p - mean
-        self.mean_ = X.mean(axis=0)
+    # If p is inside the ellipsoid, return p itself
-        self.cov_ = np.cov(X, rowvar=False, ddof=1)
+    M_dist = v @ P @ v
    if M_dist <= chi2_critical:
        return p.copy()
-        try:
+    # Function to compute x(lambda)
-            self.precision_matrix_ = np.linalg.inv(self.cov_)
+    def x_lambda(lmbda):
-        except:
+        A = np.eye(d) + lmbda * P
-            self.precision_matrix_ = None
+        return mean + np.linalg.solve(A, v)
-        self.dim = X.shape[-1]
+    # Function whose root we want: f(lambda) = Mahalanobis distance - chi2
-        self.ddof = self.dim - 1
+    def f(lmbda):
        x = x_lambda(lmbda)
        diff = x - mean
        return diff @ P @ diff - chi2_critical
-        # critical chi-square value
+    # Bisection search over lambda >= 0
-        self.confidence_level = confidence_level
+    l_low, l_high = 0.0, 1.0
        self.chi2_critical_ = chi2.ppf(confidence_level, df=self.ddof)
-    def point_estimate(self):
+    # Increase high until f(high) < 0
-        """
+    while f(l_high) > 0:
-        Returns the point estimate, the center of the ellipse.
+        l_high *= 2
        if l_high > 1e12:
            raise RuntimeError("Failed to bracket the root.")
-        :return: np.ndarray of shape (n_classes,)
+    # Bisection
-        """
+    for _ in range(max_iter):
-        return self.mean_
+        l_mid = 0.5 * (l_low + l_high)
        fm = f(l_mid)
        if abs(fm) < tol:
            break
        if fm > 0:
            l_low = l_mid
        else:
            l_high = l_mid
-    def coverage(self, true_value):
+    l_opt = l_mid
-        """
+    return x_lambda(l_opt)
        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
        fraction of these that are contained in the region, if more than one value is passed. If only one value is
        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
        :return: float in [0,1]
        """
        return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_)
 class ConfidenceEllipseCLR(ConfidenceRegionABC):
    """
    Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
    :param X: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    """
    def __init__(self, X, confidence_level=0.95):
        self.clr = CLRtransformation()
        Z = self.clr(X)
        self.mean_ = np.mean(X, axis=0)
        self.conf_region_clr = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
    def point_estimate(self):
        """
        Returns the point estimate, the center of the ellipse.
        :return: np.ndarray of shape (n_classes,)
        """
        # The inverse of the CLR does not coincide with the true mean, because the geometric mean
        # requires smoothing the prevalence vectors and this affects the softmax (inverse);
        # return self.clr.inverse(self.mean_) # <- does not coincide
        return self.mean_
    def coverage(self, true_value):
        """
        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
        fraction of these that are contained in the region, if more than one value is passed. If only one value is
        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
        :return: float in [0,1]
        """
        transformed_values = self.clr(true_value)
        return self.conf_region_clr.coverage(transformed_values)
 class ConfidenceIntervals(ConfidenceRegionABC):
    """
    Instantiates a region based on (independent) Confidence Intervals.
-    :param X: np.ndarray of shape (n_bootstrap_samples, n_classes)
+    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    :param bonferroni_correction: bool (default False), if True, a Bonferroni correction
        is applied to the significance level (`alpha`) before computing confidence intervals.
        The correction consists of replacing `alpha` with `alpha/n_classes`. When
        `n_classes=2` the correction is not applied because there is only one verification test
        since the other class is constrained. This is not necessarily true for n_classes>2.
    """
-    def __init__(self, X, confidence_level=0.95):
+    def __init__(self, samples, confidence_level=0.95, bonferroni_correction=False):
        assert 0 < confidence_level < 1, f'{confidence_level=} must be in range(0,1)'
-        X = np.asarray(X)
+        samples = np.asarray(samples)
-        self.means_ = X.mean(axis=0)
+        self.means_ = samples.mean(axis=0)
        alpha = 1-confidence_level
        if bonferroni_correction:
            n_classes = samples.shape[-1]
            if n_classes>2:
                alpha = alpha/n_classes
        low_perc = (alpha/2.)*100
        high_perc = (1-alpha/2.)*100
-        self.I_low, self.I_high = np.percentile(X, q=[low_perc, high_perc], axis=0)
+        self.I_low, self.I_high = np.percentile(samples, q=[low_perc, high_perc], axis=0)
        self._samples = samples
        self.alpha = alpha
    @property
    def samples(self):
        return self._samples
    def point_estimate(self):
        """
@ -297,33 +378,174 @@ class ConfidenceIntervals(ConfidenceRegionABC):
        return proportion
    def __repr__(self):
        return '['+', '.join(f'({low:.4f}, {high:.4f})' for (low,high) in zip(self.I_low, self.I_high))+']'
-class CLRtransformation:
+    @property
-    """
+    def n_dim(self):
-    Centered log-ratio, from component analysis
+        return len(self.I_low)
    """
    def __call__(self, X, epsilon=1e-6):
        """
        Applies the CLR function to X thus mapping the instances, which are contained in `\\mathcal{R}^{n}` but
        actually lie on a `\\mathcal{R}^{n-1}` simplex, onto an unrestricted space in :math:`\\mathcal{R}^{n}`
-        :param X: np.ndarray of (n_instances, n_dimensions) to be transformed
+    def winkler_scores(self, true_prev):
-        :param epsilon: small float for prevalence smoothing
+        true_prev = np.asarray(true_prev)
-        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
+        assert true_prev.ndim == 1, 'unexpected dimensionality for true_prev'
-        """
+        assert len(true_prev)==self.n_dim, \
-        X = np.asarray(X)
+            f'unexpected number of dimensions; found {true_prev.ndim}, expected {self.n_dim}'
        X = qp.error.smooth(X, epsilon)
        G = np.exp(np.mean(np.log(X), axis=-1, keepdims=True))  # geometric mean
        return np.log(X / G)
-    def inverse(self, X):
+        def winkler_score(low, high, true_val, alpha):
-        """
+            amp = high-low
-        Inverse function. However, clr.inverse(clr(X)) does not exactly coincide with X due to smoothing.
+            scale_cost = 1./alpha
            cost = np.max([0, low-true_val], axis=0) + np.max([0, true_val-high], axis=0)
            return amp + scale_cost*cost
-        :param X: np.ndarray of (n_instances, n_dimensions) to be transformed
+        return np.asarray(
-        :return: np.ndarray of (n_instances, n_dimensions), the CLR-transformed points
+            [winkler_score(low_i, high_i, true_v, self.alpha)
                for (low_i, high_i, true_v) in zip(self.I_low, self.I_high, true_prev)]
        )
    def mean_winkler_score(self, true_prev):
        return np.mean(self.winkler_scores(true_prev))
 class ConfidenceEllipseSimplex(ConfidenceRegionABC):
    """
-        return softmax(X, axis=-1)
+    Instantiates a Confidence Ellipse in the probability simplex.
    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    """
    def __init__(self, samples, confidence_level=0.95):
        assert 0. < confidence_level < 1., f'{confidence_level=} must be in range(0,1)'
        samples = np.asarray(samples)
        self.mean_ = samples.mean(axis=0)
        self.cov_ = np.cov(samples, rowvar=False, ddof=1)
        try:
            self.precision_matrix_ = np.linalg.pinv(self.cov_)
        except:
            self.precision_matrix_ = None
        self.dim = samples.shape[-1]
        self.ddof = self.dim - 1
        # critical chi-square value
        self.confidence_level = confidence_level
        self.chi2_critical_ = chi2.ppf(confidence_level, df=self.ddof)
        self._samples = samples
        self.alpha = 1.-confidence_level
    @property
    def samples(self):
        return self._samples
    def point_estimate(self):
        """
        Returns the point estimate, the center of the ellipse.
        :return: np.ndarray of shape (n_classes,)
        """
        return self.mean_
    def coverage(self, true_value):
        """
        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
        fraction of these that are contained in the region, if more than one value is passed. If only one value is
        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
        :return: float in [0,1]
        """
        return within_ellipse_prop(true_value, self.mean_, self.precision_matrix_, self.chi2_critical_)
    def closest_point_in_region(self, p, tol=1e-6, max_iter=30):
        return closest_point_on_ellipsoid(
            p,
            mean=self.mean_,
            cov=self.cov_,
            chi2_critical=self.chi2_critical_
        )
 class ConfidenceEllipseTransformed(ConfidenceRegionABC):
    """
    Instantiates a Confidence Ellipse in a transformed space.
    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    """
    def __init__(self, samples, transformation: CompositionalTransformation, confidence_level=0.95):
        samples = np.asarray(samples)
        self.transformation = transformation
        Z = self.transformation(samples)
        # self.mean_ = np.mean(samples, axis=0)
        self.mean_ = self.transformation.inverse(np.mean(Z, axis=0))
        self.conf_region_z = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
        self._samples = samples
        self.alpha = 1.-confidence_level
    @property
    def samples(self):
        return self._samples
    def point_estimate(self):
        """
        Returns the point estimate, the center of the ellipse.
        :return: np.ndarray of shape (n_classes,)
        """
        # The inverse of the CLR does not coincide with the true mean, because the geometric mean
        # requires smoothing the prevalence vectors and this affects the softmax (inverse);
        # return self.clr.inverse(self.mean_) # <- does not coincide
        return self.mean_
    def coverage(self, true_value):
        """
        Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
        fraction of these that are contained in the region, if more than one value is passed. If only one value is
        passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
        :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
        :return: float in [0,1]
        """
        transformed_values = self.transformation(true_value)
        return self.conf_region_z.coverage(transformed_values)
    def closest_point_in_region(self, p, tol=1e-6, max_iter=30):
        p_prime = self.transformation(p)
        b_prime = self.conf_region_z.closest_point_in_region(p_prime, tol=tol, max_iter=max_iter)
        b = self.transformation.inverse(b_prime)
        return b
 class ConfidenceEllipseCLR(ConfidenceEllipseTransformed):
    """
    Instantiates a Confidence Ellipse in the Centered-Log Ratio (CLR) space.
    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    """
    def __init__(self, samples, confidence_level=0.95):
        super().__init__(samples, CLRtransformation(), confidence_level=confidence_level)
 class ConfidenceEllipseILR(ConfidenceEllipseTransformed):
    """
    Instantiates a Confidence Ellipse in the Isometric-Log Ratio (CLR) space.
    :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
    :param confidence_level: float, the confidence level (default 0.95)
    """
    def __init__(self, samples, confidence_level=0.95):
        super().__init__(samples, ILRtransformation(), confidence_level=confidence_level)
 class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
@ -339,6 +561,12 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
    During inference, the bootstrap repetitions are applied to the pre-classified test instances.
    See
    `Moreo, A., Salvati, N.
    An Efficient Method for Deriving Confidence Intervals in Aggregative Quantification.
    Learning to Quantify: Methods and Applications (LQ 2025), co-located at ECML-PKDD 2025.
    pp 12-33 <https://lq-2025.github.io/proceedings/CompleteVolume.pdf>`_
    :param quantifier: an aggregative quantifier
    :para n_train_samples: int, the number of training resamplings (defaults to 1, set to > 1 to activate a
        model-based bootstrap approach)
@ -357,7 +585,8 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
                 n_test_samples=500,
                 confidence_level=0.95,
                 region='intervals',
-                 random_state=None):
+                 random_state=None,
                 verbose=False):
        assert isinstance(quantifier, AggregativeQuantifier), \
            f'base quantifier does not seem to be an instance of {AggregativeQuantifier.__name__}'
@ -374,6 +603,7 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
        self.confidence_level = confidence_level
        self.region = region
        self.random_state = random_state
        self.verbose = verbose
    def aggregation_fit(self, classif_predictions, labels):
        data = LabelledCollection(classif_predictions, labels, classes=self.classes_)
@ -399,6 +629,24 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
        prev_mean, self.confidence = self.aggregate_conf(classif_predictions)
        return prev_mean
    def aggregate_conf_sequential__(self, classif_predictions: np.ndarray, confidence_level=None):
        if confidence_level is None:
            confidence_level = self.confidence_level
        n_samples = classif_predictions.shape[0]
        prevs = []
        with qp.util.temp_seed(self.random_state):
            for quantifier in self.quantifiers:
                for i in tqdm(range(self.n_test_samples), desc='resampling', total=self.n_test_samples, disable=not self.verbose):
                    sample_i = resample(classif_predictions, n_samples=n_samples)
                    prev_i = quantifier.aggregate(sample_i)
                    prevs.append(prev_i)
        conf = WithConfidenceABC.construct_region(prevs, confidence_level, method=self.region)
        prev_estim = conf.point_estimate()
        return prev_estim, conf
    def aggregate_conf(self, classif_predictions: np.ndarray, confidence_level=None):
        if confidence_level is None:
            confidence_level = self.confidence_level
@ -407,10 +655,15 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
        prevs = []
        with qp.util.temp_seed(self.random_state):
            for quantifier in self.quantifiers:
-                for i in range(self.n_test_samples):
+                results = Parallel(n_jobs=-1)(
-                    sample_i = resample(classif_predictions, n_samples=n_samples)
+                    delayed(bootstrap_once)(i, classif_predictions, quantifier, n_samples)
-                    prev_i = quantifier.aggregate(sample_i)
+                    for i in range(self.n_test_samples)
-                    prevs.append(prev_i)
+                )
                prevs.extend(results)
                # for i in tqdm(range(self.n_test_samples), desc='resampling', total=self.n_test_samples, disable=not self.verbose):
                #     sample_i = resample(classif_predictions, n_samples=n_samples)
                #     prev_i = quantifier.aggregate(sample_i)
                #     prevs.append(prev_i)
        conf = WithConfidenceABC.construct_region(prevs, confidence_level, method=self.region)
        prev_estim = conf.point_estimate()
@ -423,7 +676,7 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
        self.aggregation_fit(classif_predictions, labels)
        return self
-    def quantify_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
+    def predict_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
        predictions = self.quantifier.classify(instances)
        return self.aggregate_conf(predictions, confidence_level=confidence_level)
@ -435,9 +688,16 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
        return self.quantifier._classifier_method()
 def bootstrap_once(i, classif_predictions, quantifier, n_samples):
    idx = np.random.randint(0, len(classif_predictions), n_samples)
    sample = classif_predictions[idx]
    prev = quantifier.aggregate(sample)
    return prev
 class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
    """
-    `Bayesian quantification <https://arxiv.org/abs/2302.09159>`_ method,
+    `Bayesian quantification <https://arxiv.org/abs/2302.09159>`_ method (by Albert Ziegler and Paweł Czyż),
    which is a variant of :class:`ACC` that calculates the posterior probability distribution
    over the prevalence vectors, rather than providing a point estimate obtained
    by matrix inversion.
@ -543,9 +803,115 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
        samples = self.sample_from_posterior(classif_predictions)[_bayesian.P_TEST_Y]
        return np.asarray(samples.mean(axis=0), dtype=float)
-    def quantify_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
+    def predict_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
        if confidence_level is None:
            confidence_level = self.confidence_level
        classif_predictions = self.classify(instances)
        point_estimate = self.aggregate(classif_predictions)
        samples = self.get_prevalence_samples()  # available after calling "aggregate" function
-        region = WithConfidenceABC.construct_region(samples, confidence_level=self.confidence_level, method=self.region)
+        region = WithConfidenceABC.construct_region(samples, confidence_level=confidence_level, method=self.region)
        return point_estimate, region
 class PQ(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
    """
    `Precise Quantifier: Bayesian distribution matching quantifier <https://arxiv.org/abs/2507.06061>,
    which is a variant of :class:`HDy` that calculates the posterior probability distribution
    over the prevalence vectors, rather than providing a point estimate.
    This method relies on extra dependencies, which have to be installed via:
    `$ pip install quapy[bayes]`
    :param classifier: a scikit-learn's BaseEstimator, or None, in which case the classifier is taken to be
        the one indicated in `qp.environ['DEFAULT_CLS']`
    :param val_split:  specifies the data used for generating classifier predictions. This specification
        can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to
        be extracted from the training set; or as an integer (default 5), indicating that the predictions
        are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value
        for `k`); or as a tuple `(X,y)` defining the specific set of data to use for validation. Set to
        None when the method does not require any validation data, in order to avoid that some portion of
        the training data be wasted.
    :param num_warmup: number of warmup iterations for the STAN sampler (default 500)
    :param num_samples: number of samples to draw from the posterior (default 1000)
    :param stan_seed: random seed for the STAN sampler (default 0)
    :param region: string, set to `intervals` for constructing confidence intervals (default), or to
        `ellipse` for constructing an ellipse in the probability simplex, or to `ellipse-clr` for
        constructing an ellipse in the Centered-Log Ratio (CLR) unconstrained space.
    """
    def __init__(self,
                 classifier: BaseEstimator=None,
                 fit_classifier=True,
                 val_split: int = 5,
                 nbins: int = 4,
                 fixed_bins: bool = False,
                 num_warmup: int = 500,
                 num_samples: int = 1_000,
                 stan_seed: int = 0,
                 confidence_level: float = 0.95,
                 region: str = 'intervals'):
        if num_warmup <= 0:
            raise ValueError(f'parameter {num_warmup=} must be a positive integer')
        if num_samples <= 0:
            raise ValueError(f'parameter {num_samples=} must be a positive integer')
        if not _bayesian.DEPENDENCIES_INSTALLED:
            raise ImportError("Auxiliary dependencies are required. "
                              "Run `$ pip install quapy[bayes]` to install them.")
        super().__init__(classifier, fit_classifier, val_split)
        self.nbins = nbins
        self.fixed_bins = fixed_bins
        self.num_warmup = num_warmup
        self.num_samples = num_samples
        self.stan_seed = stan_seed
        self.stan_code = _bayesian.load_stan_file()
        self.confidence_level = confidence_level
        self.region = region
    def aggregation_fit(self, classif_predictions, labels):
        y_pred = classif_predictions[:, self.pos_label]
        # Compute bin limits
        if self.fixed_bins:
            # Uniform bins in [0,1]
            self.bin_limits = np.linspace(0, 1, self.nbins + 1)
        else:
            # Quantile bins
            self.bin_limits = np.quantile(y_pred, np.linspace(0, 1, self.nbins + 1))
        # Assign each prediction to a bin
        bin_indices = np.digitize(y_pred, self.bin_limits[1:-1], right=True)
        # Positive and negative masks
        pos_mask = (labels == self.pos_label)
        neg_mask = ~pos_mask
        # Count positives and negatives per bin
        self.pos_hist = np.bincount(bin_indices[pos_mask], minlength=self.nbins)
        self.neg_hist = np.bincount(bin_indices[neg_mask], minlength=self.nbins)
    def aggregate(self, classif_predictions):
        Px_test = classif_predictions[:, self.pos_label]
        test_hist, _ = np.histogram(Px_test, bins=self.bin_limits)
        prevs = _bayesian.pq_stan(
            self.stan_code, self.nbins, self.pos_hist, self.neg_hist, test_hist,
            self.num_samples, self.num_warmup, self.stan_seed
        ).flatten()
        self.prev_distribution = np.vstack([1-prevs, prevs]).T
        return self.prev_distribution.mean(axis=0)
    def aggregate_conf(self, predictions, confidence_level=None):
        if confidence_level is None:
            confidence_level = self.confidence_level
        point_estimate = self.aggregate(predictions)
        samples = self.prev_distribution
        region = WithConfidenceABC.construct_region(samples, confidence_level=confidence_level, method=self.region)
        return point_estimate, region
    def predict_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
        predictions = self.classify(instances)
        return self.aggregate_conf(predictions, confidence_level=confidence_level)
--- a/quapy/method/non_aggregative.py
+++ b/quapy/method/non_aggregative.py
@ -1,11 +1,17 @@
-from typing import Union, Callable
+from itertools import product
 from tqdm import tqdm
 from typing import Union, Callable, Counter
 import numpy as np
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.utils import resample
 from sklearn.preprocessing import normalize
 from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
 from quapy.functional import get_divergence
 from quapy.data import LabelledCollection
 from quapy.method.base import BaseQuantifier, BinaryQuantifier
 import quapy.functional as F
 from scipy.optimize import lsq_linear
 from scipy import sparse
 class MaximumLikelihoodPrevalenceEstimation(BaseQuantifier):
@ -149,53 +155,164 @@ class DMx(BaseQuantifier):
        return F.argmin_prevalence(loss, n_classes, method=self.search)
-# class ReadMe(BaseQuantifier):
+
-#
+
-#     def __init__(self, bootstrap_trials=100, bootstrap_range=100, bagging_trials=100, bagging_range=25, **vectorizer_kwargs):
+class ReadMe(BaseQuantifier, WithConfidenceABC):
-#         raise NotImplementedError('under development ...')
+    """
-#         self.bootstrap_trials = bootstrap_trials
+    ReadMe is a non-aggregative quantification system proposed by
-#         self.bootstrap_range = bootstrap_range
+    `Daniel Hopkins and Gary King, 2007. A method of automated nonparametric content analysis for
-#         self.bagging_trials = bagging_trials
+    social science. American Journal of Political Science, 54(1):229–247.
-#         self.bagging_range = bagging_range
+    <https://onlinelibrary.wiley.com/doi/abs/10.1111/j.1540-5907.2009.00428.x>`_.
-#         self.vectorizer_kwargs = vectorizer_kwargs
+    The idea is to estimate `Q(Y=i)` directly from:
-#
+
-#     def fit(self, data: LabelledCollection):
+    :math:`Q(X)=\\sum_{i=1} Q(X|Y=i) Q(Y=i)`
-#         X, y = data.Xy
+
-#         self.vectorizer = CountVectorizer(binary=True, **self.vectorizer_kwargs)
+    via least-squares regression, i.e., without incurring the cost of computing posterior probabilities.
-#         X = self.vectorizer.fit_transform(X)
+    However, this poses a very difficult representation in which the vector `Q(X)` and the matrix `Q(X|Y=i)`
-#         self.class_conditional_X = {i: X[y==i] for i in range(data.classes_)}
+    can be of very high dimensions. In order to render the problem tracktable, ReadMe performs bagging in
-#
+    the feature space. ReadMe also combines bagging with bootstrap in order to derive confidence intervals
-#     def predict(self, X):
+    around point estimations.
-#         X = self.vectorizer.transform(X)
+
-#
+    We use the same default parameters as in the official
-#         # number of features
+    `R implementation <https://github.com/iqss-research/ReadMeV1/blob/master/R/prototype.R>`_.
-#         num_docs, num_feats = X.shape
+
-#
+    :param prob_model: str ('naive', or 'full'), selects the modality in which the probabilities `Q(X)` and
-#         # bootstrap
+        `Q(X|Y)` are to be modelled. Options include "full", which corresponds to the original formulation of
-#         p_boots = []
+        ReadMe, in which X is constrained to be a binary matrix (e.g., of term presence/absence) and in which
-#         for _ in range(self.bootstrap_trials):
+        `Q(X)` and `Q(X|Y)` are modelled, respectively, as matrices of `(2^K, 1)` and `(2^K, n)` values, where
-#             docs_idx = np.random.choice(num_docs, size=self.bootstra_range, replace=False)
+        `K` is the number of columns in the data matrix (i.e., `bagging_range`), and `n` is the number of classes.
-#             class_conditional_X = {i: X[docs_idx] for i, X in self.class_conditional_X.items()}
+        Of course, this approach is computationally prohibited for large `K`, so the computation is restricted to data
-#             Xboot = X[docs_idx]
+        matrices with `K<=25` (although we recommend even smaller values of `K`). A much faster model is "naive", which
-#
+        considers the `Q(X)` and `Q(X|Y)` be multinomial distributions under the `bag-of-words` perspective. In this
-#             # bagging
+        case, `bagging_range` can be set to much larger values. Default is "full" (i.e., original ReadMe behavior).
-#             p_bags = []
+    :param bootstrap_trials: int, number of bootstrap trials (default 300)
-#             for _ in range(self.bagging_trials):
+    :param bagging_trials: int, number of bagging trials (default 300)
-#                 feat_idx = np.random.choice(num_feats, size=self.bagging_range, replace=False)
+    :param bagging_range: int, number of features to keep for each bagging trial (default 15)
-#                 class_conditional_Xbag = {i: X[:, feat_idx] for i, X in class_conditional_X.items()}
+    :param confidence_level: float, a value in (0,1) reflecting the desired confidence level (default 0.95)
-#                 Xbag = Xboot[:,feat_idx]
+    :param region: str in 'intervals', 'ellipse', 'ellipse-clr'; indicates the preferred method for
-#                 p = self.std_constrained_linear_ls(Xbag, class_conditional_Xbag)
+        defining the confidence region (see :class:`WithConfidenceABC`)
-#                 p_bags.append(p)
+    :param random_state: int or None, allows replicability (default None)
-#             p_boots.append(np.mean(p_bags, axis=0))
+    :param verbose: bool, whether to display information during the process (default False)
-#
+    """
-#         p_mean = np.mean(p_boots, axis=0)
+
-#         p_std  = np.std(p_bags, axis=0)
+    MAX_FEATURES_FOR_EMPIRICAL_ESTIMATION = 25
-#
+    PROBABILISTIC_MODELS = ["naive", "full"]
-#         return p_mean
+
-#
+    def __init__(self,
-#
+                 prob_model="full",
-#     def std_constrained_linear_ls(self, X, class_cond_X: dict):
+                 bootstrap_trials=300,
-#         pass
+                 bagging_trials=300,
                 bagging_range=15,
                 confidence_level=0.95,
                 region='intervals',
                 random_state=None,
                 verbose=False):
        assert prob_model in ReadMe.PROBABILISTIC_MODELS, \
            f'unknown {prob_model=}, valid ones are {ReadMe.PROBABILISTIC_MODELS=}'
        self.prob_model = prob_model
        self.bootstrap_trials = bootstrap_trials
        self.bagging_trials = bagging_trials
        self.bagging_range = bagging_range
        self.confidence_level = confidence_level
        self.region = region
        self.random_state = random_state
        self.verbose = verbose
    def fit(self, X, y):
        self._check_matrix(X)
        self.rng = np.random.default_rng(self.random_state)
        self.classes_ = np.unique(y)
        Xsize = X.shape[0]
        # Bootstrap loop
        self.Xboots, self.yboots = [], []
        for _ in range(self.bootstrap_trials):
            idx = self.rng.choice(Xsize, size=Xsize, replace=True)
            self.Xboots.append(X[idx])
            self.yboots.append(y[idx])
        return self
    def predict_conf(self, X, confidence_level=0.95) -> (np.ndarray, ConfidenceRegionABC):
        self._check_matrix(X)
        n_features = X.shape[1]
        boots_prevalences = []
        for Xboots, yboots in tqdm(
                zip(self.Xboots, self.yboots),
                desc='bootstrap predictions', total=self.bootstrap_trials, disable=not self.verbose
        ):
            bagging_estimates = []
            for _ in range(self.bagging_trials):
                feat_idx = self.rng.choice(n_features, size=self.bagging_range, replace=False)
                Xboots_bagging = Xboots[:, feat_idx]
                X_boots_bagging = X[:, feat_idx]
                bagging_prev = self._quantify_iteration(Xboots_bagging, yboots, X_boots_bagging)
                bagging_estimates.append(bagging_prev)
            boots_prevalences.append(np.mean(bagging_estimates, axis=0))
        conf = WithConfidenceABC.construct_region(boots_prevalences, confidence_level, method=self.region)
        prev_estim = conf.point_estimate()
        return prev_estim, conf
    def predict(self, X):
        prev_estim, _ = self.predict_conf(X)
        return prev_estim
    def _quantify_iteration(self, Xtr, ytr, Xte):
        """Single ReadMe estimate."""
        PX_given_Y = np.asarray([self._compute_P(Xtr[ytr == c]) for i,c in enumerate(self.classes_)])
        PX = self._compute_P(Xte)
        res = lsq_linear(A=PX_given_Y.T, b=PX, bounds=(0, 1))
        pY = np.maximum(res.x, 0)
        return pY / pY.sum()
    def _check_matrix(self, X):
        """the "full" model requires estimating empirical distributions; due to the high computational cost,
                this function is only made available for binary matrices"""
        if self.prob_model == 'full' and not self._is_binary_matrix(X):
            raise ValueError('the empirical distribution can only be computed efficiently on binary matrices')
    def _is_binary_matrix(self, X):
        data = X.data if sparse.issparse(X) else X
        return np.all((data == 0) | (data == 1))
    def _compute_P(self, X):
        if self.prob_model == 'naive':
            return self._multinomial_distribution(X)
        elif self.prob_model == 'full':
            return self._empirical_distribution(X)
        else:
            raise ValueError(f'unknown {self.prob_model}; valid ones are {ReadMe.PROBABILISTIC_MODELS=}')
    def _empirical_distribution(self, X):
        if X.shape[1] > self.MAX_FEATURES_FOR_EMPIRICAL_ESTIMATION:
            raise ValueError(f'the empirical distribution can only be computed efficiently for dimensions '
                             f'less or equal than {self.MAX_FEATURES_FOR_EMPIRICAL_ESTIMATION}')
        # we convert every binary row (e.g., 0 0 1 0 1) into the equivalent number (e.g., 5)
        K = X.shape[1]
        binary_powers = 1 << np.arange(K-1, -1, -1)  # (2^K, ..., 32, 16, 8, 4, 2, 1)
        X_as_binary_numbers = X @ binary_powers
        # count occurrences and compute probs
        counts = np.bincount(X_as_binary_numbers, minlength=2 ** K).astype(float)
        probs = counts / counts.sum()
        return probs
    def _multinomial_distribution(self, X):
        PX = np.asarray(X.sum(axis=0))
        PX = normalize(PX, norm='l1', axis=1)
        return PX.ravel()
 def _get_features_range(X):
--- a/quapy/method/stan/pq.stan
+++ b/quapy/method/stan/pq.stan
@ -0,0 +1,39 @@
 data {
  int<lower=0> n_bucket;
  array[n_bucket] int<lower=0> train_pos;
  array[n_bucket] int<lower=0> train_neg;
  array[n_bucket] int<lower=0> test;
  int<lower=0,upper=1> posterior;
 }
 transformed data{
  row_vector<lower=0>[n_bucket] train_pos_rv;
  row_vector<lower=0>[n_bucket] train_neg_rv;
  row_vector<lower=0>[n_bucket] test_rv;
  real n_test;
  train_pos_rv = to_row_vector( train_pos );
  train_neg_rv = to_row_vector( train_neg );
  test_rv      = to_row_vector( test );
  n_test       = sum( test );
 }
 parameters {
  simplex[n_bucket] p_neg;
  simplex[n_bucket] p_pos;
  real<lower=0,upper=1> prev_prior;
 }
 model {
  if( posterior ) {
    target += train_neg_rv * log( p_neg );
    target += train_pos_rv * log( p_pos );
    target += test_rv * log( p_neg * ( 1 - prev_prior) + p_pos * prev_prior );
  }
 }
 generated quantities {
  real<lower=0,upper=1> prev;
  prev = sum( binomial_rng(test, 1 / ( 1 + (p_neg./p_pos) *(1-prev_prior)/prev_prior ) ) ) / n_test;
 }
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@ -410,7 +410,7 @@ def group_params(param_grid: dict):
    """
    classifier_params, quantifier_params = {}, {}
    for key, values in param_grid.items():
-        if key.startswith('classifier__') or key == 'val_split':
+        if 'classifier__' in key or key == 'val_split':
            classifier_params[key] = values
        else:
            quantifier_params[key] = values
--- a/quapy/plot.py
+++ b/quapy/plot.py
@ -80,7 +80,7 @@ def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=No
        train_prev = train_prev[pos_class]
        ax.scatter(train_prev, train_prev, c='c', label='tr-prev', linewidth=2, edgecolor='k', s=100, zorder=3)
-    ax.set(xlabel='true prevalence', ylabel='estimated prevalence', title=title)
+    ax.set(xlabel='true frequency', ylabel='estimated frequency', title=title)
    ax.set_ylim(0, 1)
    ax.set_xlim(0, 1)
@ -241,6 +241,7 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
                   title=None,
                   vlines=None,
                   method_order=None,
                   fontsize=18,
                   savepath=None):
    """
    Plots the error (along the x-axis, as measured in terms of `error_name`) as a function of the train-test shift
@ -276,6 +277,8 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
    :return: returns (fig, ax) matplotlib objects for eventual customisation
    """
    plt.rcParams['font.size'] = fontsize
    fig, ax = plt.subplots()
    ax.grid()
@ -311,7 +314,7 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
        for p,ind in enumerate(range(len(bins))):
            selected = inds==ind
            if selected.sum() > 0:
-                xs.append(ind*binwidth-binwidth/2)
+                xs.append(ind*binwidth)
                ys.append(np.mean(method_drifts[selected]))
                ystds.append(np.std(method_drifts[selected]))
                npoints[p] += len(method_drifts[selected])
@ -320,6 +323,9 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
        ys = np.asarray(ys)
        ystds = np.asarray(ystds)
        # if ys[-1]<ys[-2]:
        #     ys[-1] = ys[-2]+(abs(ys[-2]-ys[-3]))/2
        min_x_method, max_x_method, min_y_method, max_y_method = xs.min(), xs.max(), ys.min(), ys.max()
        min_x = min_x_method if min_x is None or min_x_method < min_x else min_x
        max_x = max_x_method if max_x is None or max_x_method > max_x else max_x
@ -342,8 +348,8 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
        ax2.spines['right'].set_color('g')
        ax2.tick_params(axis='y', colors='g')
-    ax.set(xlabel=f'Prior shift between training set and test sample',
+    ax.set(xlabel=f'Amount of label shift',
-           ylabel=f'{error_name.upper()} (true prev, predicted prev)',
+           ylabel=f'Absolute error',
           title=title)
    # box = ax.get_position()
    # ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
@ -357,9 +363,10 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
        ax.set_ylim(0,10 ** math.ceil(math.log10(max_y)))
    if show_legend:
-        fig.legend(loc='center left',
+        ax.legend(loc='center right', bbox_to_anchor=(1.31, 0.5))
-                  bbox_to_anchor=(1, 0.5),
+        # fig.legend(loc='lower center',
-                  ncol=1)
+        #           bbox_to_anchor=(1, 0.5),
        #           ncol=(len(method_names)+1)//2)
    _save_or_show(savepath)
--- a/setup.py
+++ b/setup.py
@ -111,6 +111,12 @@ setup(
    #
    packages=find_packages(include=['quapy', 'quapy.*']),  # Required
    package_data={
        # For the 'quapy.method' package, include all files
        # in the 'stan' subdirectory that end with .stan
        'quapy.method': ['stan/*.stan']
    },
    python_requires='>=3.8, <4',
    install_requires=['scikit-learn', 'pandas', 'tqdm', 'matplotlib', 'joblib', 'xlrd', 'abstention', 'ucimlrepo', 'certifi'],
@ -124,7 +130,7 @@ setup(
    # Similar to `install_requires` above, these must be valid existing
    # projects.
    extras_require={  # Optional
-       'bayes': ['jax', 'jaxlib', 'numpyro'],
+       'bayes': ['jax', 'jaxlib', 'numpyro', 'pystan'],
       'neural': ['torch'],
       'tests': ['certifi'],
       'docs' : ['sphinx-rtd-theme', 'myst-parser'],
Author	SHA1	Message	Date
Alejandro Moreo Fernandez	1661a79dbb	quantification over time experiment	2025-12-10 19:43:08 +01:00
Alejandro Moreo Fernandez	5c2554861c	new plots	2025-12-10 13:11:49 +01:00
Alejandro Moreo Fernandez	1eebfbc709	refactor folders	2025-12-10 13:10:03 +01:00
Alejandro Moreo Fernandez	8c063afd1e	merged from devel	2025-12-10 12:06:03 +01:00
Alejandro Moreo Fernandez	befe7503bd	merged from devel	2025-12-10 12:05:10 +01:00
Alejandro Moreo Fernandez	3824a19e10	merged from cacm	2025-12-10 11:57:57 +01:00
Alejandro Moreo Fernandez	b2e2794012	renaming branch	2025-12-10 11:52:32 +01:00
Alejandro Moreo Fernandez	89a8cad0b3	atichison distance and tests for evaluating regions	2025-12-09 18:12:06 +01:00
Alejandro Moreo Fernandez	c492415977	adding bootstrap-emq	2025-12-08 12:40:44 +01:00
Alejandro Moreo Fernandez	7cb4bd550f	adding bootstrap-emq	2025-12-08 12:31:23 +01:00
Alejandro Moreo Fernandez	7b75954f9b	adding bootstrap-emq	2025-12-08 12:17:54 +01:00
Alejandro Moreo Fernandez	7342e57cda	trying with emcee with no success...	2025-12-08 12:14:43 +01:00
Alejandro Moreo Fernandez	b2750ab6ea	added temperature, and coverage increases!	2025-12-06 14:06:14 +01:00
Alejandro Moreo Fernandez	602b89bd21	added temperature, and coverage increases!	2025-12-06 13:53:34 +01:00
Alejandro Moreo Fernandez	84c11d956b	added temperature, and coverage increases!	2025-12-06 13:51:49 +01:00
Alejandro Moreo Fernandez	7c7af2cfaf	added temperature, and coverage increases!	2025-12-06 13:50:15 +01:00
Alejandro Moreo Fernandez	2e0068b6ae	import fix	2025-12-06 13:36:13 +01:00
Alejandro Moreo Fernandez	d52fc40d2b	ilr added	2025-12-06 13:20:11 +01:00
Alejandro Moreo Fernandez	59ef17c86c	ilr only on multiclass	2025-12-04 20:10:25 +01:00
Alejandro Moreo Fernandez	a6974b7624	adding experiment with ILR	2025-12-04 20:03:30 +01:00
Alejandro Moreo Fernandez	e8d175106f	adding experiment with ILR	2025-12-04 20:02:26 +01:00
Alejandro Moreo Fernandez	b180aae16c	added ILR to conf regions	2025-12-04 19:16:52 +01:00
Alejandro Moreo Fernandez	d87625bd09	adding new config for bayesian	2025-12-04 18:27:15 +01:00
Alejandro Moreo Fernandez	90981088b0	launching PQ	2025-12-04 18:00:01 +01:00
Alejandro Moreo Fernandez	78fd05ab33	repairing safe check	2025-12-04 15:43:31 +01:00
Alejandro Moreo Fernandez	1080973d25	recomputing confidence regions	2025-12-04 15:42:00 +01:00
Alejandro Moreo Fernandez	bfb6482410	launching kde**	2025-12-04 10:34:38 +01:00
Alejandro Moreo Fernandez	23608f2038	adding exploration in CLR	2025-12-04 10:24:02 +01:00
Alejandro Moreo Fernandez	881e1033f1	lauching experiments	2025-11-26 15:19:08 +01:00
Alejandro Moreo Fernandez	a3f0008a2a	Merge branch 'devel' into kdeplus	2025-11-18 10:13:56 +01:00
Alejandro Moreo Fernandez	6db659e3c4	unifying n_bins in PQ and DMy	2025-11-18 10:13:34 +01:00
Alejandro Moreo Fernandez	277a2e617f	added aggregative bootstrap	2025-11-18 10:12:41 +01:00
Alejandro Moreo Fernandez	be465712cd	Merge branch 'devel' into kdeplus	2025-11-17 17:55:15 +01:00
Alejandro Moreo Fernandez	db49cd31be	Merge branch 'devel' of github.com:HLT-ISTI/QuaPy into devel	2025-11-17 17:54:09 +01:00
Alejandro Moreo Fernandez	9da4fd57db	added property samples to confidence regions	2025-11-17 17:53:56 +01:00
Alejandro Moreo Fernandez	fdc0560ccc	no optim classifier	2025-11-17 17:47:14 +01:00
Alejandro Moreo Fernandez	fd62e73d2d	adding bootstrap variants	2025-11-17 12:37:12 +01:00
Alejandro Moreo Fernandez	12b431ef4b	scripting experiments binary and multiclass	2025-11-17 12:22:40 +01:00
Alejandro Moreo Fernandez	2f83a520c7	drafting experiments	2025-11-16 12:42:26 +01:00
Alejandro Moreo Fernandez	4255098ba7	creating class BayesianKDEy	2025-11-15 21:47:30 +01:00
Alejandro Moreo Fernandez	8a8988d2de	Merge branch 'kdeplus' of gitea-s2i2s.isti.cnr.it:moreo/QuaPy into kdeplus	2025-11-15 19:01:42 +01:00
Alejandro Moreo Fernandez	f324ca049e	Merge branch 'devel' into kdeplus	2025-11-15 18:57:56 +01:00
Alejandro Moreo Fernandez	5f6a151263	Merge branch 'pglez82-precisequant' into devel	2025-11-15 18:55:38 +01:00
Alejandro Moreo Fernandez	047cb9e533	merged	2025-11-15 18:54:04 +01:00
Alejandro Moreo Fernandez	6388d9b549	merged	2025-11-15 18:03:06 +01:00
Alejandro Moreo Fernandez	d9cf6cc11d	index in labelled collection from versions restored	2025-11-15 17:56:37 +01:00
pglez82	46e7246f3a	adding stan file to setup	2025-11-15 17:04:55 +01:00
pglez82	c6492a0f20	fixing import	2025-11-15 17:04:44 +01:00
pglez82	e4c07e1835	changing the way the file is loaded	2025-11-15 16:51:48 +01:00
Alejandro Moreo Fernandez	c2044cb200	Merge branch 'precisequant' of github.com:pglez82/QuaPy into pglez82-precisequant	2025-11-15 15:03:20 +01:00
pglez82	3268e9fada	PQ (precise quantifier)	2025-11-14 18:35:40 +01:00
Alejandro Moreo Fernandez	a868d2d561	import fix	2025-11-14 16:10:17 +01:00
Alejandro Moreo Fernandez	ccb634fae5	step rate adaptation	2025-11-14 16:09:34 +01:00
Alejandro Moreo Fernandez	3dba708fe4	Merge branch 'devel' into kdeplus	2025-11-14 12:09:23 +01:00
Alejandro Moreo Fernandez	7ad5311fac	merging from devel	2025-11-14 12:07:32 +01:00
Alejandro Moreo Fernandez	400edfdb63	bayesian plot	2025-11-13 19:43:07 +01:00
Alejandro Moreo Fernandez	3c09b1c98a	adding prev@densities in KDEyML, huge effiency improvement...	2025-11-13 18:45:07 +01:00
Alejandro Moreo Fernandez	3e7a431d26	bayeisan kdey	2025-11-13 18:43:03 +01:00
Alejandro Moreo Fernandez	f227ed2f60	adding kdex	2025-10-23 14:12:39 +02:00
Alejandro Moreo Fernandez	41baeb78ca	lazy index construction in labelled collection	2025-10-23 12:35:01 +02:00
Alejandro Moreo Fernandez	088ebcdd31	adding non aggregative methods experimental	2025-10-23 12:10:21 +02:00
Alejandro Moreo Fernandez	c11b99e08a	improved ReadMe method	2025-10-22 18:51:35 +02:00
Alejandro Moreo Fernandez	854b3ba3f9	documented ReadMe	2025-10-20 18:33:45 +02:00
Alejandro Moreo Fernandez	eafe486893	adding readme to non-aggregative	2025-10-20 18:13:34 +02:00
Alejandro Moreo Fernandez	1fb8500e87	improved doc	2025-10-09 12:49:08 +02:00
Alejandro Moreo Fernandez	d597820a59	missing doc of confidence methods	2025-10-09 12:10:26 +02:00
Alejandro Moreo Fernandez	010676df12	starting devel 0.2.1	2025-10-07 10:27:59 +02:00
Alejandro Moreo Fernandez	bd9f8e2cb4	show results fix	2025-09-30 12:02:13 +02:00
Alejandro Moreo Fernandez	79f3709e6f	adding mrae	2025-09-27 18:12:56 +02:00
Alejandro Moreo Fernandez	e4cb7868c7	adding mrae	2025-09-27 18:03:24 +02:00
Alejandro Moreo Fernandez	606ec2b89c	with tables in pdf	2025-09-27 18:01:39 +02:00
Alejandro Moreo Fernandez	27afea8bf1	with tables in pdf	2025-09-27 18:01:21 +02:00
Alejandro Moreo Fernandez	29bb261f62	testing kde normal	2025-09-27 17:47:52 +02:00
Alejandro Moreo Fernandez	c3fd92efde	experiment	2025-09-27 17:41:12 +02:00
Alejandro Moreo Fernandez	bb0950fad5	code used to generate plots	2023-11-13 12:07:59 +01:00
Alejandro Moreo Fernandez	2e992a0b9a	choosing plots for paper	2023-11-10 14:22:43 +01:00