mapls working

2026-01-15 09:41:37 +01:00 · 2026-01-15 09:41:37 +01:00 · 9ae65ab09a
parent a511f577c9
commit 9ae65ab09a
6 changed files with 431 additions and 79 deletions
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@ -1,10 +1,7 @@
 from functools import lru_cache
 from numpy.ma.core import shape
 from sklearn.base import BaseEstimator
 import numpy as np
-import quapy.util
+from BayesianKDEy.commons import ILRtransformation, in_simplex
 from quapy.method._kdey import KDEBase
 from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
 from quapy.functional import CLRtransformation
@ -95,7 +92,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
        self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth, self.kernel)
        return self
-    def aggregate(self, classif_predictions):
+    def aggregate(self, classif_predictions: np.ndarray):
        if self.engine == 'rw-mh':
            if self.prior != 'uniform':
                raise RuntimeError('prior is not yet implemented in rw-mh')
@ -105,6 +102,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
                raise RuntimeError('prior is not yet implemented in emcee')
            self.prevalence_samples = self._bayesian_emcee(classif_predictions)
        elif self.engine == 'numpyro':
            self.ilr = ILRtransformation(jax_mode=True)
            self.prevalence_samples = self._bayesian_numpyro(classif_predictions)
        return self.prevalence_samples.mean(axis=0)
@ -247,8 +245,6 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
            [self.pdf(kde_i, X_probs, self.kernel) for kde_i in kdes]
        )
        # move to jax
        test_densities = jnp.array(test_densities)
        n_classes = X_probs.shape[-1]
        if isinstance(self.prior, str) and self.prior == 'uniform':
            alpha = [1.] * n_classes
@ -270,8 +266,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
        samples_z = mcmc.get_samples()["z"]
        # back to simplex
-        ilr = ILRtransformation(jax_mode=True)
+        samples_prev = np.asarray(self.ilr.inverse(np.asarray(samples_z)))
        samples_prev = np.asarray(ilr.inverse(np.asarray(samples_z)))
        return samples_prev
@ -280,7 +275,6 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
        test_densities: shape (n_classes, n_instances,)
        """
        n_classes = test_densities.shape[0]
        ilr = ILRtransformation(jax_mode=True)
        # sample in unconstrained R^(n_classes-1)
        z = numpyro.sample(
@ -288,7 +282,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
            dist.Normal(0.0, 1.0).expand([n_classes - 1])
        )
-        prev = ilr.inverse(z)  # simplex, shape (n_classes,)
+        prev = self.ilr.inverse(z)  # simplex, shape (n_classes,)
        # prior
        if alpha is not None:
@ -299,66 +293,10 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
        # if alpha is None, then this corresponds to a weak logistic-normal prior
        # likelihood
        test_densities = jnp.array(test_densities)
        likelihoods = jnp.dot(prev, test_densities)
        numpyro.factor(
            "loglik", (1.0 / self.temperature) * jnp.sum(jnp.log(likelihoods + 1e-10))
        )
 def in_simplex(x):
    return np.all(x >= 0) and np.isclose(x.sum(), 1)
 class ILRtransformation(F.CompositionalTransformation):
    def __init__(self, jax_mode=False):
        self.jax_mode = jax_mode
    def array(self, X):
        if self.jax_mode:
            return jnp.array(X)
        else:
            return np.asarray(X)
    def __call__(self, X):
        X = self.array(X)
        X = quapy.error.smooth(X, self.EPSILON)
        k = X.shape[-1]
        V = self.array(self.get_V(k))
        logp = jnp.log(X) if self.jax_mode else np.log(X)
        return logp @ V.T
    def inverse(self, Z):
        Z = self.array(Z)
        k_minus_1 = Z.shape[-1]
        k = k_minus_1 + 1
        V = self.array(self.get_V(k))
        logp = Z @ V
        p = jnp.exp(logp) if self.jax_mode else np.exp(logp)
        p = p / jnp.sum(p, axis=-1, keepdims=True) if self.jax_mode else p / np.sum(p, axis=-1, keepdims=True)
        return p
    @lru_cache(maxsize=None)
    def get_V(self, k):
        def helmert_matrix(k):
            """
            Returns the (k x k) Helmert matrix.
            """
            H = np.zeros((k, k))
            for i in range(1, k):
                H[i, :i] = 1
                H[i, i] = -(i)
                H[i] = H[i] / np.sqrt(i * (i + 1))
            # row 0 stays zeros; will be discarded
            return H
        def ilr_basis(k):
            """
            Constructs an orthonormal ILR basis using the Helmert submatrix.
            Output shape: (k-1, k)
            """
            H = helmert_matrix(k)
            V = H[1:, :]  # remove first row of zeros
            return V
        return ilr_basis(k)
--- a/BayesianKDEy/_bayesian_mapls.py
+++ b/BayesianKDEy/_bayesian_mapls.py
@ -0,0 +1,327 @@
 import jax.numpy as jnp
 import numpy as np
 import numpyro
 import numpyro.distributions as dist
 from numpyro.infer import MCMC, NUTS, HMC
 import jax.random as random
 from sklearn.base import BaseEstimator
 from jax.scipy.special import logsumexp
 from BayesianKDEy.commons import ILRtransformation
 from quapy.method.aggregative import AggregativeSoftQuantifier
 from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
 import quapy.functional as F
 class BayesianMAPLS(AggregativeSoftQuantifier, WithConfidenceABC):
    """
    :param classifier:
    :param fit_classifier:
    :param val_split:
    :param exact_train_prev: set to True (default) for using the true training prevalence as the initial
        observation; set to False for computing the training prevalence as an estimate of it, i.e., as the
        expected value of the posterior probabilities of the training instances.
    :param num_samples:
    :param mcmc_seed:
    :param confidence_level:
    :param region:
    """
    def __init__(self,
                 classifier: BaseEstimator = None,
                 fit_classifier=True,
                 val_split: int = 5,
                 exact_train_prev=True,
                 num_warmup: int = 500,
                 num_samples: int = 1_000,
                 mcmc_seed: int = 0,
                 confidence_level: float = 0.95,
                 region: str = 'intervals',
                 temperature=1.,
                 prior='uniform',
                 verbose=False
                 ):
        if num_samples <= 0:
            raise ValueError(f'parameter {num_samples=} must be a positive integer')
        super().__init__(classifier, fit_classifier, val_split)
        self.exact_train_prev = exact_train_prev
        self.num_warmup = num_warmup
        self.num_samples = num_samples
        self.mcmc_seed = mcmc_seed
        self.confidence_level = confidence_level
        self.region = region
        self.temperature = temperature
        self.prior = prior
        self.verbose = verbose
    def aggregation_fit(self, classif_predictions, labels):
        self.train_post = classif_predictions
        if self.exact_train_prev:
            self.train_prevalence = F.prevalence_from_labels(labels, classes=self.classes_)
        else:
            self.train_prevalence = F.prevalence_from_probabilities(classif_predictions)
        self.ilr = ILRtransformation(jax_mode=True)
        return self
    def aggregate(self, classif_predictions: np.ndarray):
        n_test, n_classes = classif_predictions.shape
        pi_star, lam = mapls(
            self.train_post,
            test_probs=classif_predictions,
            pz=self.train_prevalence,
            return_lambda=True
        )
        # pi_star: MAP in simplex (shape: [K]), convert to ILR space
        z0 = self.ilr(pi_star)
        if self.prior == 'uniform':
            alpha = [1.] * n_classes
        elif self.prior == 'map':
            alpha_0 = alpha0_from_lamda(lam, n_test=n_test, n_classes=n_classes)
            alpha = [alpha_0] * n_classes
        elif self.prior == 'map2':
            lam2 = get_lamda(
                test_probs=classif_predictions,
                pz=self.train_prevalence,
                q_prior=pi_star,
                dvg=kl_div
            )
            alpha_0 = alpha0_from_lamda(lam2, n_test=n_test, n_classes=n_classes)
            alpha = [alpha_0] * n_classes
        else:
            alpha = self.prior
        kernel = NUTS(self.model)
        mcmc = MCMC(
            kernel,
            num_warmup=self.num_warmup,
            num_samples=self.num_samples,
            num_chains=1,
            progress_bar=self.verbose
        )
        mcmc.run(
            random.PRNGKey(self.mcmc_seed),
            test_posteriors=classif_predictions,
            alpha=alpha,
            init_params={"z": z0}
        )
        samples = mcmc.get_samples()["z"]
        self.prevalence_samples = self.ilr.inverse(samples)
        return self.prevalence_samples.mean(axis=0)
    def predict_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
        if confidence_level is None:
            confidence_level = self.confidence_level
        classif_predictions = self.classify(instances)
        point_estimate = self.aggregate(classif_predictions)
        samples = self.prevalence_samples  # available after calling "aggregate" function
        region = WithConfidenceABC.construct_region(samples, confidence_level=confidence_level, method=self.region)
        return point_estimate, region
    def log_likelihood(self, test_classif, test_prev, train_prev):
        # n_test = test_classif.shape[0]
        log_w = jnp.log(test_prev) - jnp.log(train_prev)
        # return (1/n_test) * jnp.sum(
        #     logsumexp(jnp.log(test_classif) + log_w, axis=-1)
        # )
        return jnp.sum(
            logsumexp(jnp.log(test_classif) + log_w, axis=-1)
        )
    def model(self, test_posteriors, alpha):
        test_posteriors = jnp.array(test_posteriors)
        n_classes = test_posteriors.shape[1]
        # prior in ILR
        z = numpyro.sample(
            "z",
            dist.Normal(jnp.zeros(n_classes-1), 1.0)
        )
        # back to simplex
        prev = self.ilr.inverse(z)
        train_prev = jnp.array(self.train_prevalence)
        # prior
        alpha = jnp.array(alpha)
        numpyro.factor(
            "dirichlet_prior",
            dist.Dirichlet(alpha).log_prob(prev)
        )
        # Likelihood
        numpyro.factor(
            "likelihood",
            (1.0 / self.temperature) * self.log_likelihood(test_posteriors, test_prev=prev, train_prev=train_prev)
        )
 # adapted from https://github.com/ChangkunYe/MAPLS/blob/main/label_shift/mapls.py
 def mapls(train_probs: np.ndarray,
          test_probs: np.ndarray,
          pz: np.ndarray,
          qy_mode: str = 'soft',
          max_iter: int = 100,
          init_mode: str = 'identical',
          lam: float = None,
          dvg_name='kl',
          return_lambda=False
          ):
    r"""
    Implementation of Maximum A Posteriori Label Shift,
    for Unknown target label distribution estimation
    Given source domain P(Y_s=i|X_s=x) = f(x) and P(Y_s=i),
    estimate targe domain P(Y_t=i) on test set
    """
    # Sanity Check
    cls_num = len(pz)
    assert test_probs.shape[-1] == cls_num
    if type(max_iter) != int or max_iter < 0:
        raise Exception('max_iter should be a positive integer, not ' + str(max_iter))
    # Setup d(p,q) measure
    if dvg_name == 'kl':
        dvg = kl_div
    elif dvg_name == 'js':
        dvg = js_div
    else:
        raise Exception('Unsupported distribution distance measure, expect kl or js.')
    # Set Prior of Target Label Distribution
    q_prior = np.ones(cls_num) / cls_num
    # q_prior = pz.copy()
    # Lambda estimation-------------------------------------------------------#
    if lam is None:
        # logging.info('Data shape: %s, %s' % (str(train_probs.shape), str(test_probs.shape)))
        # logging.info('Divergence type is %s' % (dvg))
        lam = get_lamda(test_probs, pz, q_prior, dvg=dvg, max_iter=max_iter)
        # logging.info('Estimated lambda value is %.4f' % lam)
    # else:
    #     logging.info('Assigned lambda is %.4f' % lam)
    # EM Algorithm Computation
    qz = mapls_EM(test_probs, pz, lam, q_prior, cls_num,
                  init_mode=init_mode, max_iter=max_iter, qy_mode=qy_mode)
    if return_lambda:
        return qz, lam
    else:
        return qz
 def mapls_EM(probs, pz, lam, q_prior, cls_num, init_mode='identical', max_iter=100, qy_mode='soft'):
    # Normalize Source Label Distribution pz
    pz = np.array(pz) / np.sum(pz)
    # Initialize Target Label Distribution qz
    if init_mode == 'uniform':
        qz = np.ones(cls_num) / cls_num
    elif init_mode == 'identical':
        qz = pz.copy()
    else:
        raise ValueError('init_mode should be either "uniform" or "identical"')
    # Initialize w
    w = (np.array(qz) / np.array(pz))
    # EM algorithm with MAP estimation----------------------------------------#
    for i in range(max_iter):
        # print('w shape ', w.shape)
        # E-Step--------------------------------------------------------------#
        mapls_probs = normalized(probs * w, axis=-1, order=1)
        # M-Step--------------------------------------------------------------#
        if qy_mode == 'hard':
            pred = np.argmax(mapls_probs, axis=-1)
            qz_new = np.bincount(pred.reshape(-1), minlength=cls_num)
        elif qy_mode == 'soft':
            qz_new = np.mean(mapls_probs, axis=0)
        # elif qy_mode == 'topk':
        #     qz_new = Topk_qy(mapls_probs, cls_num, topk_ratio=0.9, head=0)
        else:
            raise Exception('mapls mode should be either "soft" or "hard". ')
        # print(np.shape(pc_probs), np.shape(pred), np.shape(cls_num_list_t))
        # Update w with MAP estimation of Target Label Distribution qz
        # qz = (qz_new + alpha) / (N + np.sum(alpha))
        qz = lam * qz_new + (1 - lam) * q_prior
        qz /= qz.sum()
        w = qz / pz
    return qz
 def get_lamda(test_probs, pz, q_prior, dvg, max_iter=50):
    K = len(pz)
    # MLLS estimation of source and target domain label distribution
    qz_pred = mapls_EM(test_probs, pz, 1, 0, K, max_iter=max_iter)
    TU_div = dvg(qz_pred, q_prior)
    TS_div = dvg(qz_pred, pz)
    SU_div = dvg(pz, q_prior)
    # logging.info('weights are, TU_div %.4f, TS_div %.4f, SU_div %.4f' % (TU_div, TS_div, SU_div))
    SU_conf = 1 - lam_forward(SU_div, lam_inv(dpq=0.5, lam=0.2))
    TU_conf = lam_forward(TU_div, lam_inv(dpq=0.5, lam=SU_conf))
    TS_conf = lam_forward(TS_div, lam_inv(dpq=0.5, lam=SU_conf))
    # logging.info('weights are, unviform_weight %.4f, differ_weight %.4f, regularize weight %.4f'
    #              % (TU_conf, TS_conf, SU_conf))
    confs = np.array([TU_conf, 1 - TS_conf])
    w = np.array([0.9, 0.1])
    lam = np.sum(w * confs)
    # logging.info('Estimated lambda is: %.4f', lam)
    return lam
 def lam_inv(dpq, lam):
    return (1 / (1 - lam) - 1) / dpq
 def lam_forward(dpq, gamma):
    return gamma * dpq / (1 + gamma * dpq)
 # def kl_div(p, q):
 #     p = np.asarray(p, dtype=np.float32)
 #     q = np.asarray(q + 1e-8, dtype=np.float32)
 #
 #     return np.sum(np.where(p != 0, p * np.log(p / q), 0))
 def kl_div(p, q, eps=1e-12):
    p = np.asarray(p, dtype=float)
    q = np.asarray(q, dtype=float)
    mask = p > 0
    return np.sum(p[mask] * np.log(p[mask] / (q[mask] + eps)))
 def js_div(p, q):
    assert (np.abs(np.sum(p) - 1) < 1e-6) and (np.abs(np.sum(q) - 1) < 1e-6)
    m = (p + q) / 2
    return kl_div(p, m) / 2 + kl_div(q, m) / 2
 def normalized(a, axis=-1, order=2):
    r"""
    Prediction Normalization
    """
    l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
    l2[l2 == 0] = 1
    return a / np.expand_dims(l2, axis)
 def alpha0_from_lamda(lam, n_test, n_classes):
    return 1+n_test*(1-lam)/(lam*n_classes)
--- a/BayesianKDEy/commons.py
+++ b/BayesianKDEy/commons.py
@ -1,8 +1,13 @@
 import os
 from functools import lru_cache
 from pathlib import Path
 from jax import numpy as jnp
 from sklearn.base import BaseEstimator
 import error
 import functional as F
 import quapy as qp
 import numpy as np
@ -80,3 +85,61 @@ class KDEyILR(KDEyML):
            classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
            random_state=random_state, kernel='ilr'
        )
 class ILRtransformation(F.CompositionalTransformation):
    def __init__(self, jax_mode=False):
        self.jax_mode = jax_mode
    def array(self, X):
        if self.jax_mode:
            return jnp.array(X)
        else:
            return np.asarray(X)
    def __call__(self, X):
        X = self.array(X)
        X = qp.error.smooth(X, self.EPSILON)
        k = X.shape[-1]
        V = self.array(self.get_V(k))
        logp = jnp.log(X) if self.jax_mode else np.log(X)
        return logp @ V.T
    def inverse(self, Z):
        Z = self.array(Z)
        k_minus_1 = Z.shape[-1]
        k = k_minus_1 + 1
        V = self.array(self.get_V(k))
        logp = Z @ V
        p = jnp.exp(logp) if self.jax_mode else np.exp(logp)
        p = p / jnp.sum(p, axis=-1, keepdims=True) if self.jax_mode else p / np.sum(p, axis=-1, keepdims=True)
        return p
    @lru_cache(maxsize=None)
    def get_V(self, k):
        def helmert_matrix(k):
            """
            Returns the (k x k) Helmert matrix.
            """
            H = np.zeros((k, k))
            for i in range(1, k):
                H[i, :i] = 1
                H[i, i] = -(i)
                H[i] = H[i] / np.sqrt(i * (i + 1))
            # row 0 stays zeros; will be discarded
            return H
        def ilr_basis(k):
            """
            Constructs an orthonormal ILR basis using the Helmert submatrix.
            Output shape: (k-1, k)
            """
            H = helmert_matrix(k)
            V = H[1:, :]  # remove first row of zeros
            return V
        return ilr_basis(k)
 def in_simplex(x):
    return np.all(x >= 0) and np.isclose(x.sum(), 1)
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@ -4,10 +4,11 @@ from sklearn.linear_model import LogisticRegression as LR
 from copy import deepcopy as cp
 import quapy as qp
 from BayesianKDEy._bayeisan_kdey import BayesianKDEy
 from BayesianKDEy._bayesian_mapls import BayesianMAPLS
 from BayesianKDEy.commons import multiclass, experiment_path, KDEyCLR
 from BayesianKDEy.temperature_calibration import temp_calibration
 from build.lib.quapy.data import LabelledCollection
-from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ
+from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ, CC
 from quapy.model_selection import GridSearchQ
 from quapy.data import Dataset
 # from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
@ -65,6 +66,20 @@ def methods():
    # yield f'BaKDE-numpyro-T10', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=10., **hyper), multiclass_method
    # yield f'BaKDE-numpyro*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
    # yield f'BaKDE-numpyro*ILR', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
    # yield 'BayEMQ', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', exact_train_prev=True), multiclass_method
    # yield 'BayEMQ*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', exact_train_prev=True), multiclass_method
    # yield 'BayEMQ*2', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', exact_train_prev=True), multiclass_method
    # yield 'BayEMQ*2T*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=None, exact_train_prev=True), multiclass_method
    # yield 'BayEMQ*2T01', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=0.1, exact_train_prev=True), multiclass_method
    # yield 'BayEMQ*2T10000', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=10000, exact_train_prev=True), multiclass_method
    # yield 'BayEMQ*2T100000', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=100000,
    #                                                                          exact_train_prev=True), multiclass_method
    # yield 'BayEMQ-U-Temp1-2', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', temperature=1, exact_train_prev=True), multiclass_method
    yield 'BayEMQ-U-Temp*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', temperature=None, exact_train_prev=True), multiclass_method
    # yield 'BayEMQ*Temp1', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=1, exact_train_prev=True), multiclass_method
    # yield 'BayEMQ*Temp10', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=10, exact_train_prev=True), multiclass_method
    # yield 'BayEMQ*Temp100', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=100, exact_train_prev=True), multiclass_method
    # yield 'BayEMQ*Temp1000', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=1000, exact_train_prev=True), multiclass_method
 def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
@ -101,6 +116,7 @@ def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method
        t_init = time()
        withconf_quantifier = withconf_constructor(best_hyperparams)
        if hasattr(withconf_quantifier, 'temperature') and withconf_quantifier.temperature is None:
            print('calibrating temperature')
            train, val = data.training.split_stratified(train_prop=0.6, random_state=0)
            temperature = temp_calibration(withconf_quantifier, train, val, temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.], n_jobs=-1)
            withconf_quantifier.temperature = temperature
@ -111,7 +127,8 @@ def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method
        train_prevalence = training.prevalence()
        results = defaultdict(list)
        test_generator = UPP(test, repeats=100, random_state=0)
-        for i, (sample_X, true_prevalence) in tqdm(enumerate(test_generator()), total=test_generator.total(), desc=f'{method_name} predictions'):
+        pbar = tqdm(enumerate(test_generator()), total=test_generator.total())
        for i, (sample_X, true_prevalence) in pbar:
            t_init = time()
            point_estimate, region = withconf_quantifier.predict_conf(sample_X)
            ttime = time()-t_init
@ -125,6 +142,8 @@ def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method
            results['test-time'].append(ttime)
            results['samples'].append(region.samples)
            pbar.set_description(f'{method_name} MAE={np.mean(results["ae"]):.5f} Cov={np.mean(results["coverage"]):.5f} AMP={np.mean(results["amplitude"]):.5f}')
        report = {
            'optim_hyper': best_hyperparams,
            'train_time': tr_time,
@ -162,7 +181,7 @@ if __name__ == '__main__':
                )
                print(f'dataset={data_name}, '
                      f'method={method_name}: '
-                      f'mae={report["results"]["ae"].mean():.3f}, '
+                      f'mae={report["results"]["ae"].mean():.5f}, '
                      f'coverage={report["results"]["coverage"].mean():.5f}, '
                      f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@ -90,10 +90,15 @@ methods = ['BayesianACC', #'BayesianKDEy',
           # 'BaKDE-Ait-numpyro',
           # 'BaKDE-Ait-numpyro-T*',
           'BaKDE-Ait-numpyro-T*-U',
-           'BootstrapACC',
+           'BayEMQ-U-Temp1-2',
-           'BootstrapHDy',
+           'BayEMQ-U-Temp*',
-           'BootstrapKDEy',
+           # 'BayEMQ*2Temp1',
-           'BootstrapEMQ'
+           # 'BayEMQ*2Temp*'
           # 'BootstrapACC',
           # 'BootstrapHDy',
           # 'BootstrapKDEy',
           # 'BootstrapEMQ'
           ]
 def nicer(name:str):
@ -193,7 +198,7 @@ for setup in ['multiclass']:
        pv = pd.pivot_table(
            df, index='dataset', columns='method', values=[
                # f'amperr-{region}',
-                f'a-{region}',
+                # f'a-{region}',
                f'c-{region}',
                # f'w-{region}',
                'ae',
--- a/BayesianKDEy/prior_effect.py
+++ b/BayesianKDEy/prior_effect.py
@ -17,7 +17,7 @@ from full_experiments import model_selection
 from itertools import chain
-def select_imbalanced_datasets(top_m=5):
+def select_imbalanced_datasets(top_m=10):
    datasets_prevs = []
    # choose top-m imbalanced datasets
    for data_name in multiclass['datasets']:
@ -107,7 +107,7 @@ def experiment(dataset: Dataset,
            run_test(test, alpha_test_informative, alpha_train, concentration, prior_type, bay_quant, train_prev, dataset_name, method_name, results)
            # informative prior
-            alpha_test_wrong = antagonistic_prevalence(train_prev, strength=0.5) * concentration
+            alpha_test_wrong = antagonistic_prevalence(train_prev, strength=0.25) * concentration
            prior_type = 'wrong'
            run_test(test, alpha_test_wrong, alpha_train, concentration, prior_type, bay_quant, train_prev, dataset_name, method_name, results)
@ -267,7 +267,7 @@ def coverage_vs_amplitude_plot(df, save_path=None):
 if __name__ == '__main__':
    result_dir = Path('./results/prior_effect')
-    selected = select_imbalanced_datasets()
+    selected = select_imbalanced_datasets(10)
    print(f'selected datasets={selected}')
    qp.environ['SAMPLE_SIZE'] = multiclass['sample_size']
    reports = []