bayesianCC with custom prior

This commit is contained in:
Alejandro Moreo Fernandez 2026-01-13 13:05:42 +01:00
parent 724e1b13a0
commit 47dc6acc75
4 changed files with 37 additions and 15 deletions

View File

@ -42,6 +42,8 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
:param region: string, set to `intervals` for constructing confidence intervals (default), or to
`ellipse` for constructing an ellipse in the probability simplex, or to `ellipse-clr` for
constructing an ellipse in the Centered-Log Ratio (CLR) unconstrained space.
:param prior: an array-list with the alpha parameters of a Dirichlet prior, or the string 'uniform'
for a uniform, uninformative prior (default)
:param verbose: bool, whether to display progress bar
"""
def __init__(self,
@ -249,7 +251,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
test_densities = jnp.array(test_densities)
n_classes = X_probs.shape[-1]
if isinstance(self.prior, str) and self.prior == 'uniform':
alpha = [1.]*n_classes
alpha = [1.] * n_classes
else:
alpha = self.prior

View File

@ -32,8 +32,8 @@ def methods():
kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
#yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0)
yield f'BaKDE-Ait', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison',
yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0)
# yield f'BaKDE-Ait', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison',
mcmc_seed=0,
engine='numpyro',
temperature=None,

View File

@ -33,7 +33,7 @@ P_TEST_C: str = "P_test(C)"
P_C_COND_Y: str = "P(C|Y)"
def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray) -> None:
def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray, alpha: np.ndarray) -> None:
"""
Defines a probabilistic model in `NumPyro <https://num.pyro.ai/>`_.
@ -47,7 +47,7 @@ def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray) -
K = len(n_c_unlabeled)
L = len(n_y_labeled)
pi_ = numpyro.sample(P_TEST_Y, dist.Dirichlet(jnp.ones(L)))
pi_ = numpyro.sample(P_TEST_Y, dist.Dirichlet(jnp.asarray(alpha, dtype=jnp.float32)))
p_c_cond_y = numpyro.sample(P_C_COND_Y, dist.Dirichlet(jnp.ones(K).repeat(L).reshape(L, K)))
with numpyro.plate('plate', L):
@ -57,13 +57,13 @@ def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray) -
numpyro.sample('N_c', dist.Multinomial(jnp.sum(n_c_unlabeled), p_c), obs=n_c_unlabeled)
def sample_posterior(
n_c_unlabeled: np.ndarray,
n_y_and_c_labeled: np.ndarray,
num_warmup: int,
num_samples: int,
seed: int = 0,
def sample_posterior_bayesianCC(
n_c_unlabeled: np.ndarray,
n_y_and_c_labeled: np.ndarray,
num_warmup: int,
num_samples: int,
alpha: np.ndarray,
seed: int = 0,
) -> dict:
"""
Samples from the Bayesian quantification model in NumPyro using the
@ -75,9 +75,12 @@ def sample_posterior(
with entry `(y, c)` being the number of instances labeled as class `y` and predicted as class `c`.
:param num_warmup: the number of warmup steps.
:param num_samples: the number of samples to draw.
:param alpha: a `np.ndarray` of shape `(n_classes,)` with the alpha parameters of the
Dirichlet prior
:seed: the random seed.
:return: a `dict` with the samples. The keys are the names of the latent variables.
"""
mcmc = numpyro.infer.MCMC(
numpyro.infer.NUTS(model_bayesianCC),
num_warmup=num_warmup,
@ -85,7 +88,7 @@ def sample_posterior(
progress_bar=False
)
rng_key = jax.random.PRNGKey(seed)
mcmc.run(rng_key, n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=n_y_and_c_labeled)
mcmc.run(rng_key, n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=n_y_and_c_labeled, alpha=alpha)
return mcmc.get_samples()

View File

@ -1,3 +1,6 @@
from numbers import Number
from typing import Iterable
import numpy as np
from joblib import Parallel, delayed
from sklearn.base import BaseEstimator
@ -726,6 +729,8 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
:param region: string, set to `intervals` for constructing confidence intervals (default), or to
`ellipse` for constructing an ellipse in the probability simplex, or to `ellipse-clr` for
constructing an ellipse in the Centered-Log Ratio (CLR) unconstrained space.
:param prior: an array-list with the alpha parameters of a Dirichlet prior, or the string 'uniform'
for a uniform, uninformative prior (default)
"""
def __init__(self,
classifier: BaseEstimator=None,
@ -735,12 +740,16 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
num_samples: int = 1_000,
mcmc_seed: int = 0,
confidence_level: float = 0.95,
region: str = 'intervals'):
region: str = 'intervals',
prior = 'uniform'):
if num_warmup <= 0:
raise ValueError(f'parameter {num_warmup=} must be a positive integer')
if num_samples <= 0:
raise ValueError(f'parameter {num_samples=} must be a positive integer')
assert ((isinstance(prior, str) and prior == 'uniform') or
(isinstance(prior, Iterable) and all(isinstance(v, Number) for v in prior))), \
f'wrong type for {prior=}; expected "uniform" or an array-like of real values'
if _bayesian.DEPENDENCIES_INSTALLED is False:
raise ImportError("Auxiliary dependencies are required. "
@ -752,6 +761,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
self.mcmc_seed = mcmc_seed
self.confidence_level = confidence_level
self.region = region
self.prior = prior
# Array of shape (n_classes, n_predicted_classes,) where entry (y, c) is the number of instances
# labeled as class y and predicted as class c.
@ -782,11 +792,18 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
n_c_unlabeled = F.counts_from_labels(classif_predictions, self.classifier.classes_).astype(float)
self._samples = _bayesian.sample_posterior(
n_classes = len(self.classifier.classes_)
if isinstance(self.prior, str) and self.prior == 'uniform':
alpha = np.asarray([1.] * n_classes)
else:
alpha = np.asarray(self.prior)
self._samples = _bayesian.sample_posterior_bayesianCC(
n_c_unlabeled=n_c_unlabeled,
n_y_and_c_labeled=self._n_and_c_labeled,
num_warmup=self.num_warmup,
num_samples=self.num_samples,
alpha=alpha,
seed=self.mcmc_seed,
)
return self._samples