From 47dc6acc751eec085ab388c44ffd5307d8c9a97a Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Tue, 13 Jan 2026 13:05:42 +0100 Subject: [PATCH] bayesianCC with custom prior --- BayesianKDEy/_bayeisan_kdey.py | 4 +++- BayesianKDEy/prior_effect.py | 4 ++-- quapy/method/_bayesian.py | 23 +++++++++++++---------- quapy/method/confidence.py | 21 +++++++++++++++++++-- 4 files changed, 37 insertions(+), 15 deletions(-) diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py index 356b259..31c3d46 100644 --- a/BayesianKDEy/_bayeisan_kdey.py +++ b/BayesianKDEy/_bayeisan_kdey.py @@ -42,6 +42,8 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): :param region: string, set to `intervals` for constructing confidence intervals (default), or to `ellipse` for constructing an ellipse in the probability simplex, or to `ellipse-clr` for constructing an ellipse in the Centered-Log Ratio (CLR) unconstrained space. + :param prior: an array-list with the alpha parameters of a Dirichlet prior, or the string 'uniform' + for a uniform, uninformative prior (default) :param verbose: bool, whether to display progress bar """ def __init__(self, @@ -249,7 +251,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): test_densities = jnp.array(test_densities) n_classes = X_probs.shape[-1] if isinstance(self.prior, str) and self.prior == 'uniform': - alpha = [1.]*n_classes + alpha = [1.] * n_classes else: alpha = self.prior diff --git a/BayesianKDEy/prior_effect.py b/BayesianKDEy/prior_effect.py index 07769a4..1a4b9e6 100644 --- a/BayesianKDEy/prior_effect.py +++ b/BayesianKDEy/prior_effect.py @@ -32,8 +32,8 @@ def methods(): kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]} kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]} - #yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0) - yield f'BaKDE-Ait', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', + yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0) + # yield f'BaKDE-Ait', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, diff --git a/quapy/method/_bayesian.py b/quapy/method/_bayesian.py index 6e75f65..cd0e8c2 100644 --- a/quapy/method/_bayesian.py +++ b/quapy/method/_bayesian.py @@ -33,7 +33,7 @@ P_TEST_C: str = "P_test(C)" P_C_COND_Y: str = "P(C|Y)" -def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray) -> None: +def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray, alpha: np.ndarray) -> None: """ Defines a probabilistic model in `NumPyro `_. @@ -47,7 +47,7 @@ def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray) - K = len(n_c_unlabeled) L = len(n_y_labeled) - pi_ = numpyro.sample(P_TEST_Y, dist.Dirichlet(jnp.ones(L))) + pi_ = numpyro.sample(P_TEST_Y, dist.Dirichlet(jnp.asarray(alpha, dtype=jnp.float32))) p_c_cond_y = numpyro.sample(P_C_COND_Y, dist.Dirichlet(jnp.ones(K).repeat(L).reshape(L, K))) with numpyro.plate('plate', L): @@ -57,13 +57,13 @@ def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray) - numpyro.sample('N_c', dist.Multinomial(jnp.sum(n_c_unlabeled), p_c), obs=n_c_unlabeled) - -def sample_posterior( - n_c_unlabeled: np.ndarray, - n_y_and_c_labeled: np.ndarray, - num_warmup: int, - num_samples: int, - seed: int = 0, +def sample_posterior_bayesianCC( + n_c_unlabeled: np.ndarray, + n_y_and_c_labeled: np.ndarray, + num_warmup: int, + num_samples: int, + alpha: np.ndarray, + seed: int = 0, ) -> dict: """ Samples from the Bayesian quantification model in NumPyro using the @@ -75,9 +75,12 @@ def sample_posterior( with entry `(y, c)` being the number of instances labeled as class `y` and predicted as class `c`. :param num_warmup: the number of warmup steps. :param num_samples: the number of samples to draw. + :param alpha: a `np.ndarray` of shape `(n_classes,)` with the alpha parameters of the + Dirichlet prior :seed: the random seed. :return: a `dict` with the samples. The keys are the names of the latent variables. """ + mcmc = numpyro.infer.MCMC( numpyro.infer.NUTS(model_bayesianCC), num_warmup=num_warmup, @@ -85,7 +88,7 @@ def sample_posterior( progress_bar=False ) rng_key = jax.random.PRNGKey(seed) - mcmc.run(rng_key, n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=n_y_and_c_labeled) + mcmc.run(rng_key, n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=n_y_and_c_labeled, alpha=alpha) return mcmc.get_samples() diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py index cc0673a..10b2245 100644 --- a/quapy/method/confidence.py +++ b/quapy/method/confidence.py @@ -1,3 +1,6 @@ +from numbers import Number +from typing import Iterable + import numpy as np from joblib import Parallel, delayed from sklearn.base import BaseEstimator @@ -726,6 +729,8 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC): :param region: string, set to `intervals` for constructing confidence intervals (default), or to `ellipse` for constructing an ellipse in the probability simplex, or to `ellipse-clr` for constructing an ellipse in the Centered-Log Ratio (CLR) unconstrained space. + :param prior: an array-list with the alpha parameters of a Dirichlet prior, or the string 'uniform' + for a uniform, uninformative prior (default) """ def __init__(self, classifier: BaseEstimator=None, @@ -735,12 +740,16 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC): num_samples: int = 1_000, mcmc_seed: int = 0, confidence_level: float = 0.95, - region: str = 'intervals'): + region: str = 'intervals', + prior = 'uniform'): if num_warmup <= 0: raise ValueError(f'parameter {num_warmup=} must be a positive integer') if num_samples <= 0: raise ValueError(f'parameter {num_samples=} must be a positive integer') + assert ((isinstance(prior, str) and prior == 'uniform') or + (isinstance(prior, Iterable) and all(isinstance(v, Number) for v in prior))), \ + f'wrong type for {prior=}; expected "uniform" or an array-like of real values' if _bayesian.DEPENDENCIES_INSTALLED is False: raise ImportError("Auxiliary dependencies are required. " @@ -752,6 +761,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC): self.mcmc_seed = mcmc_seed self.confidence_level = confidence_level self.region = region + self.prior = prior # Array of shape (n_classes, n_predicted_classes,) where entry (y, c) is the number of instances # labeled as class y and predicted as class c. @@ -782,11 +792,18 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC): n_c_unlabeled = F.counts_from_labels(classif_predictions, self.classifier.classes_).astype(float) - self._samples = _bayesian.sample_posterior( + n_classes = len(self.classifier.classes_) + if isinstance(self.prior, str) and self.prior == 'uniform': + alpha = np.asarray([1.] * n_classes) + else: + alpha = np.asarray(self.prior) + + self._samples = _bayesian.sample_posterior_bayesianCC( n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=self._n_and_c_labeled, num_warmup=self.num_warmup, num_samples=self.num_samples, + alpha=alpha, seed=self.mcmc_seed, ) return self._samples