bayesianCC with custom prior

This commit is contained in:
Alejandro Moreo Fernandez 2026-01-13 13:05:42 +01:00
parent 724e1b13a0
commit 47dc6acc75
4 changed files with 37 additions and 15 deletions

View File

@ -42,6 +42,8 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
:param region: string, set to `intervals` for constructing confidence intervals (default), or to :param region: string, set to `intervals` for constructing confidence intervals (default), or to
`ellipse` for constructing an ellipse in the probability simplex, or to `ellipse-clr` for `ellipse` for constructing an ellipse in the probability simplex, or to `ellipse-clr` for
constructing an ellipse in the Centered-Log Ratio (CLR) unconstrained space. constructing an ellipse in the Centered-Log Ratio (CLR) unconstrained space.
:param prior: an array-list with the alpha parameters of a Dirichlet prior, or the string 'uniform'
for a uniform, uninformative prior (default)
:param verbose: bool, whether to display progress bar :param verbose: bool, whether to display progress bar
""" """
def __init__(self, def __init__(self,
@ -249,7 +251,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
test_densities = jnp.array(test_densities) test_densities = jnp.array(test_densities)
n_classes = X_probs.shape[-1] n_classes = X_probs.shape[-1]
if isinstance(self.prior, str) and self.prior == 'uniform': if isinstance(self.prior, str) and self.prior == 'uniform':
alpha = [1.]*n_classes alpha = [1.] * n_classes
else: else:
alpha = self.prior alpha = self.prior

View File

@ -32,8 +32,8 @@ def methods():
kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]} kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]} kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
#yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0) yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0)
yield f'BaKDE-Ait', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', # yield f'BaKDE-Ait', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison',
mcmc_seed=0, mcmc_seed=0,
engine='numpyro', engine='numpyro',
temperature=None, temperature=None,

View File

@ -33,7 +33,7 @@ P_TEST_C: str = "P_test(C)"
P_C_COND_Y: str = "P(C|Y)" P_C_COND_Y: str = "P(C|Y)"
def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray) -> None: def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray, alpha: np.ndarray) -> None:
""" """
Defines a probabilistic model in `NumPyro <https://num.pyro.ai/>`_. Defines a probabilistic model in `NumPyro <https://num.pyro.ai/>`_.
@ -47,7 +47,7 @@ def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray) -
K = len(n_c_unlabeled) K = len(n_c_unlabeled)
L = len(n_y_labeled) L = len(n_y_labeled)
pi_ = numpyro.sample(P_TEST_Y, dist.Dirichlet(jnp.ones(L))) pi_ = numpyro.sample(P_TEST_Y, dist.Dirichlet(jnp.asarray(alpha, dtype=jnp.float32)))
p_c_cond_y = numpyro.sample(P_C_COND_Y, dist.Dirichlet(jnp.ones(K).repeat(L).reshape(L, K))) p_c_cond_y = numpyro.sample(P_C_COND_Y, dist.Dirichlet(jnp.ones(K).repeat(L).reshape(L, K)))
with numpyro.plate('plate', L): with numpyro.plate('plate', L):
@ -57,13 +57,13 @@ def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray) -
numpyro.sample('N_c', dist.Multinomial(jnp.sum(n_c_unlabeled), p_c), obs=n_c_unlabeled) numpyro.sample('N_c', dist.Multinomial(jnp.sum(n_c_unlabeled), p_c), obs=n_c_unlabeled)
def sample_posterior_bayesianCC(
def sample_posterior( n_c_unlabeled: np.ndarray,
n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray,
n_y_and_c_labeled: np.ndarray, num_warmup: int,
num_warmup: int, num_samples: int,
num_samples: int, alpha: np.ndarray,
seed: int = 0, seed: int = 0,
) -> dict: ) -> dict:
""" """
Samples from the Bayesian quantification model in NumPyro using the Samples from the Bayesian quantification model in NumPyro using the
@ -75,9 +75,12 @@ def sample_posterior(
with entry `(y, c)` being the number of instances labeled as class `y` and predicted as class `c`. with entry `(y, c)` being the number of instances labeled as class `y` and predicted as class `c`.
:param num_warmup: the number of warmup steps. :param num_warmup: the number of warmup steps.
:param num_samples: the number of samples to draw. :param num_samples: the number of samples to draw.
:param alpha: a `np.ndarray` of shape `(n_classes,)` with the alpha parameters of the
Dirichlet prior
:seed: the random seed. :seed: the random seed.
:return: a `dict` with the samples. The keys are the names of the latent variables. :return: a `dict` with the samples. The keys are the names of the latent variables.
""" """
mcmc = numpyro.infer.MCMC( mcmc = numpyro.infer.MCMC(
numpyro.infer.NUTS(model_bayesianCC), numpyro.infer.NUTS(model_bayesianCC),
num_warmup=num_warmup, num_warmup=num_warmup,
@ -85,7 +88,7 @@ def sample_posterior(
progress_bar=False progress_bar=False
) )
rng_key = jax.random.PRNGKey(seed) rng_key = jax.random.PRNGKey(seed)
mcmc.run(rng_key, n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=n_y_and_c_labeled) mcmc.run(rng_key, n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=n_y_and_c_labeled, alpha=alpha)
return mcmc.get_samples() return mcmc.get_samples()

View File

@ -1,3 +1,6 @@
from numbers import Number
from typing import Iterable
import numpy as np import numpy as np
from joblib import Parallel, delayed from joblib import Parallel, delayed
from sklearn.base import BaseEstimator from sklearn.base import BaseEstimator
@ -726,6 +729,8 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
:param region: string, set to `intervals` for constructing confidence intervals (default), or to :param region: string, set to `intervals` for constructing confidence intervals (default), or to
`ellipse` for constructing an ellipse in the probability simplex, or to `ellipse-clr` for `ellipse` for constructing an ellipse in the probability simplex, or to `ellipse-clr` for
constructing an ellipse in the Centered-Log Ratio (CLR) unconstrained space. constructing an ellipse in the Centered-Log Ratio (CLR) unconstrained space.
:param prior: an array-list with the alpha parameters of a Dirichlet prior, or the string 'uniform'
for a uniform, uninformative prior (default)
""" """
def __init__(self, def __init__(self,
classifier: BaseEstimator=None, classifier: BaseEstimator=None,
@ -735,12 +740,16 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
num_samples: int = 1_000, num_samples: int = 1_000,
mcmc_seed: int = 0, mcmc_seed: int = 0,
confidence_level: float = 0.95, confidence_level: float = 0.95,
region: str = 'intervals'): region: str = 'intervals',
prior = 'uniform'):
if num_warmup <= 0: if num_warmup <= 0:
raise ValueError(f'parameter {num_warmup=} must be a positive integer') raise ValueError(f'parameter {num_warmup=} must be a positive integer')
if num_samples <= 0: if num_samples <= 0:
raise ValueError(f'parameter {num_samples=} must be a positive integer') raise ValueError(f'parameter {num_samples=} must be a positive integer')
assert ((isinstance(prior, str) and prior == 'uniform') or
(isinstance(prior, Iterable) and all(isinstance(v, Number) for v in prior))), \
f'wrong type for {prior=}; expected "uniform" or an array-like of real values'
if _bayesian.DEPENDENCIES_INSTALLED is False: if _bayesian.DEPENDENCIES_INSTALLED is False:
raise ImportError("Auxiliary dependencies are required. " raise ImportError("Auxiliary dependencies are required. "
@ -752,6 +761,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
self.mcmc_seed = mcmc_seed self.mcmc_seed = mcmc_seed
self.confidence_level = confidence_level self.confidence_level = confidence_level
self.region = region self.region = region
self.prior = prior
# Array of shape (n_classes, n_predicted_classes,) where entry (y, c) is the number of instances # Array of shape (n_classes, n_predicted_classes,) where entry (y, c) is the number of instances
# labeled as class y and predicted as class c. # labeled as class y and predicted as class c.
@ -782,11 +792,18 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
n_c_unlabeled = F.counts_from_labels(classif_predictions, self.classifier.classes_).astype(float) n_c_unlabeled = F.counts_from_labels(classif_predictions, self.classifier.classes_).astype(float)
self._samples = _bayesian.sample_posterior( n_classes = len(self.classifier.classes_)
if isinstance(self.prior, str) and self.prior == 'uniform':
alpha = np.asarray([1.] * n_classes)
else:
alpha = np.asarray(self.prior)
self._samples = _bayesian.sample_posterior_bayesianCC(
n_c_unlabeled=n_c_unlabeled, n_c_unlabeled=n_c_unlabeled,
n_y_and_c_labeled=self._n_and_c_labeled, n_y_and_c_labeled=self._n_and_c_labeled,
num_warmup=self.num_warmup, num_warmup=self.num_warmup,
num_samples=self.num_samples, num_samples=self.num_samples,
alpha=alpha,
seed=self.mcmc_seed, seed=self.mcmc_seed,
) )
return self._samples return self._samples