mapls working
This commit is contained in:
parent
a511f577c9
commit
9ae65ab09a
|
|
@ -1,10 +1,7 @@
|
||||||
from functools import lru_cache
|
|
||||||
|
|
||||||
from numpy.ma.core import shape
|
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
import quapy.util
|
from BayesianKDEy.commons import ILRtransformation, in_simplex
|
||||||
from quapy.method._kdey import KDEBase
|
from quapy.method._kdey import KDEBase
|
||||||
from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
|
from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
|
||||||
from quapy.functional import CLRtransformation
|
from quapy.functional import CLRtransformation
|
||||||
|
|
@ -95,7 +92,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
||||||
self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth, self.kernel)
|
self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth, self.kernel)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def aggregate(self, classif_predictions):
|
def aggregate(self, classif_predictions: np.ndarray):
|
||||||
if self.engine == 'rw-mh':
|
if self.engine == 'rw-mh':
|
||||||
if self.prior != 'uniform':
|
if self.prior != 'uniform':
|
||||||
raise RuntimeError('prior is not yet implemented in rw-mh')
|
raise RuntimeError('prior is not yet implemented in rw-mh')
|
||||||
|
|
@ -105,6 +102,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
||||||
raise RuntimeError('prior is not yet implemented in emcee')
|
raise RuntimeError('prior is not yet implemented in emcee')
|
||||||
self.prevalence_samples = self._bayesian_emcee(classif_predictions)
|
self.prevalence_samples = self._bayesian_emcee(classif_predictions)
|
||||||
elif self.engine == 'numpyro':
|
elif self.engine == 'numpyro':
|
||||||
|
self.ilr = ILRtransformation(jax_mode=True)
|
||||||
self.prevalence_samples = self._bayesian_numpyro(classif_predictions)
|
self.prevalence_samples = self._bayesian_numpyro(classif_predictions)
|
||||||
return self.prevalence_samples.mean(axis=0)
|
return self.prevalence_samples.mean(axis=0)
|
||||||
|
|
||||||
|
|
@ -247,8 +245,6 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
||||||
[self.pdf(kde_i, X_probs, self.kernel) for kde_i in kdes]
|
[self.pdf(kde_i, X_probs, self.kernel) for kde_i in kdes]
|
||||||
)
|
)
|
||||||
|
|
||||||
# move to jax
|
|
||||||
test_densities = jnp.array(test_densities)
|
|
||||||
n_classes = X_probs.shape[-1]
|
n_classes = X_probs.shape[-1]
|
||||||
if isinstance(self.prior, str) and self.prior == 'uniform':
|
if isinstance(self.prior, str) and self.prior == 'uniform':
|
||||||
alpha = [1.] * n_classes
|
alpha = [1.] * n_classes
|
||||||
|
|
@ -270,8 +266,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
||||||
samples_z = mcmc.get_samples()["z"]
|
samples_z = mcmc.get_samples()["z"]
|
||||||
|
|
||||||
# back to simplex
|
# back to simplex
|
||||||
ilr = ILRtransformation(jax_mode=True)
|
samples_prev = np.asarray(self.ilr.inverse(np.asarray(samples_z)))
|
||||||
samples_prev = np.asarray(ilr.inverse(np.asarray(samples_z)))
|
|
||||||
|
|
||||||
return samples_prev
|
return samples_prev
|
||||||
|
|
||||||
|
|
@ -280,7 +275,6 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
||||||
test_densities: shape (n_classes, n_instances,)
|
test_densities: shape (n_classes, n_instances,)
|
||||||
"""
|
"""
|
||||||
n_classes = test_densities.shape[0]
|
n_classes = test_densities.shape[0]
|
||||||
ilr = ILRtransformation(jax_mode=True)
|
|
||||||
|
|
||||||
# sample in unconstrained R^(n_classes-1)
|
# sample in unconstrained R^(n_classes-1)
|
||||||
z = numpyro.sample(
|
z = numpyro.sample(
|
||||||
|
|
@ -288,7 +282,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
||||||
dist.Normal(0.0, 1.0).expand([n_classes - 1])
|
dist.Normal(0.0, 1.0).expand([n_classes - 1])
|
||||||
)
|
)
|
||||||
|
|
||||||
prev = ilr.inverse(z) # simplex, shape (n_classes,)
|
prev = self.ilr.inverse(z) # simplex, shape (n_classes,)
|
||||||
|
|
||||||
# prior
|
# prior
|
||||||
if alpha is not None:
|
if alpha is not None:
|
||||||
|
|
@ -299,66 +293,10 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
||||||
# if alpha is None, then this corresponds to a weak logistic-normal prior
|
# if alpha is None, then this corresponds to a weak logistic-normal prior
|
||||||
|
|
||||||
# likelihood
|
# likelihood
|
||||||
|
test_densities = jnp.array(test_densities)
|
||||||
likelihoods = jnp.dot(prev, test_densities)
|
likelihoods = jnp.dot(prev, test_densities)
|
||||||
numpyro.factor(
|
numpyro.factor(
|
||||||
"loglik", (1.0 / self.temperature) * jnp.sum(jnp.log(likelihoods + 1e-10))
|
"loglik", (1.0 / self.temperature) * jnp.sum(jnp.log(likelihoods + 1e-10))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def in_simplex(x):
|
|
||||||
return np.all(x >= 0) and np.isclose(x.sum(), 1)
|
|
||||||
|
|
||||||
|
|
||||||
class ILRtransformation(F.CompositionalTransformation):
|
|
||||||
def __init__(self, jax_mode=False):
|
|
||||||
self.jax_mode = jax_mode
|
|
||||||
|
|
||||||
def array(self, X):
|
|
||||||
if self.jax_mode:
|
|
||||||
return jnp.array(X)
|
|
||||||
else:
|
|
||||||
return np.asarray(X)
|
|
||||||
|
|
||||||
def __call__(self, X):
|
|
||||||
X = self.array(X)
|
|
||||||
X = quapy.error.smooth(X, self.EPSILON)
|
|
||||||
k = X.shape[-1]
|
|
||||||
V = self.array(self.get_V(k))
|
|
||||||
logp = jnp.log(X) if self.jax_mode else np.log(X)
|
|
||||||
return logp @ V.T
|
|
||||||
|
|
||||||
def inverse(self, Z):
|
|
||||||
Z = self.array(Z)
|
|
||||||
k_minus_1 = Z.shape[-1]
|
|
||||||
k = k_minus_1 + 1
|
|
||||||
V = self.array(self.get_V(k))
|
|
||||||
logp = Z @ V
|
|
||||||
p = jnp.exp(logp) if self.jax_mode else np.exp(logp)
|
|
||||||
p = p / jnp.sum(p, axis=-1, keepdims=True) if self.jax_mode else p / np.sum(p, axis=-1, keepdims=True)
|
|
||||||
return p
|
|
||||||
|
|
||||||
@lru_cache(maxsize=None)
|
|
||||||
def get_V(self, k):
|
|
||||||
def helmert_matrix(k):
|
|
||||||
"""
|
|
||||||
Returns the (k x k) Helmert matrix.
|
|
||||||
"""
|
|
||||||
H = np.zeros((k, k))
|
|
||||||
for i in range(1, k):
|
|
||||||
H[i, :i] = 1
|
|
||||||
H[i, i] = -(i)
|
|
||||||
H[i] = H[i] / np.sqrt(i * (i + 1))
|
|
||||||
# row 0 stays zeros; will be discarded
|
|
||||||
return H
|
|
||||||
|
|
||||||
def ilr_basis(k):
|
|
||||||
"""
|
|
||||||
Constructs an orthonormal ILR basis using the Helmert submatrix.
|
|
||||||
Output shape: (k-1, k)
|
|
||||||
"""
|
|
||||||
H = helmert_matrix(k)
|
|
||||||
V = H[1:, :] # remove first row of zeros
|
|
||||||
return V
|
|
||||||
|
|
||||||
return ilr_basis(k)
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,327 @@
|
||||||
|
import jax.numpy as jnp
|
||||||
|
import numpy as np
|
||||||
|
import numpyro
|
||||||
|
import numpyro.distributions as dist
|
||||||
|
from numpyro.infer import MCMC, NUTS, HMC
|
||||||
|
import jax.random as random
|
||||||
|
from sklearn.base import BaseEstimator
|
||||||
|
from jax.scipy.special import logsumexp
|
||||||
|
from BayesianKDEy.commons import ILRtransformation
|
||||||
|
from quapy.method.aggregative import AggregativeSoftQuantifier
|
||||||
|
from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
|
||||||
|
import quapy.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class BayesianMAPLS(AggregativeSoftQuantifier, WithConfidenceABC):
|
||||||
|
"""
|
||||||
|
|
||||||
|
:param classifier:
|
||||||
|
:param fit_classifier:
|
||||||
|
:param val_split:
|
||||||
|
:param exact_train_prev: set to True (default) for using the true training prevalence as the initial
|
||||||
|
observation; set to False for computing the training prevalence as an estimate of it, i.e., as the
|
||||||
|
expected value of the posterior probabilities of the training instances.
|
||||||
|
:param num_samples:
|
||||||
|
:param mcmc_seed:
|
||||||
|
:param confidence_level:
|
||||||
|
:param region:
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
classifier: BaseEstimator = None,
|
||||||
|
fit_classifier=True,
|
||||||
|
val_split: int = 5,
|
||||||
|
exact_train_prev=True,
|
||||||
|
num_warmup: int = 500,
|
||||||
|
num_samples: int = 1_000,
|
||||||
|
mcmc_seed: int = 0,
|
||||||
|
confidence_level: float = 0.95,
|
||||||
|
region: str = 'intervals',
|
||||||
|
temperature=1.,
|
||||||
|
prior='uniform',
|
||||||
|
verbose=False
|
||||||
|
):
|
||||||
|
|
||||||
|
if num_samples <= 0:
|
||||||
|
raise ValueError(f'parameter {num_samples=} must be a positive integer')
|
||||||
|
super().__init__(classifier, fit_classifier, val_split)
|
||||||
|
self.exact_train_prev = exact_train_prev
|
||||||
|
self.num_warmup = num_warmup
|
||||||
|
self.num_samples = num_samples
|
||||||
|
self.mcmc_seed = mcmc_seed
|
||||||
|
self.confidence_level = confidence_level
|
||||||
|
self.region = region
|
||||||
|
self.temperature = temperature
|
||||||
|
self.prior = prior
|
||||||
|
self.verbose = verbose
|
||||||
|
|
||||||
|
def aggregation_fit(self, classif_predictions, labels):
|
||||||
|
self.train_post = classif_predictions
|
||||||
|
if self.exact_train_prev:
|
||||||
|
self.train_prevalence = F.prevalence_from_labels(labels, classes=self.classes_)
|
||||||
|
else:
|
||||||
|
self.train_prevalence = F.prevalence_from_probabilities(classif_predictions)
|
||||||
|
self.ilr = ILRtransformation(jax_mode=True)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def aggregate(self, classif_predictions: np.ndarray):
|
||||||
|
n_test, n_classes = classif_predictions.shape
|
||||||
|
|
||||||
|
pi_star, lam = mapls(
|
||||||
|
self.train_post,
|
||||||
|
test_probs=classif_predictions,
|
||||||
|
pz=self.train_prevalence,
|
||||||
|
return_lambda=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# pi_star: MAP in simplex (shape: [K]), convert to ILR space
|
||||||
|
z0 = self.ilr(pi_star)
|
||||||
|
|
||||||
|
if self.prior == 'uniform':
|
||||||
|
alpha = [1.] * n_classes
|
||||||
|
elif self.prior == 'map':
|
||||||
|
alpha_0 = alpha0_from_lamda(lam, n_test=n_test, n_classes=n_classes)
|
||||||
|
alpha = [alpha_0] * n_classes
|
||||||
|
elif self.prior == 'map2':
|
||||||
|
lam2 = get_lamda(
|
||||||
|
test_probs=classif_predictions,
|
||||||
|
pz=self.train_prevalence,
|
||||||
|
q_prior=pi_star,
|
||||||
|
dvg=kl_div
|
||||||
|
)
|
||||||
|
alpha_0 = alpha0_from_lamda(lam2, n_test=n_test, n_classes=n_classes)
|
||||||
|
alpha = [alpha_0] * n_classes
|
||||||
|
else:
|
||||||
|
alpha = self.prior
|
||||||
|
|
||||||
|
kernel = NUTS(self.model)
|
||||||
|
mcmc = MCMC(
|
||||||
|
kernel,
|
||||||
|
num_warmup=self.num_warmup,
|
||||||
|
num_samples=self.num_samples,
|
||||||
|
num_chains=1,
|
||||||
|
progress_bar=self.verbose
|
||||||
|
)
|
||||||
|
|
||||||
|
mcmc.run(
|
||||||
|
random.PRNGKey(self.mcmc_seed),
|
||||||
|
test_posteriors=classif_predictions,
|
||||||
|
alpha=alpha,
|
||||||
|
init_params={"z": z0}
|
||||||
|
)
|
||||||
|
|
||||||
|
samples = mcmc.get_samples()["z"]
|
||||||
|
self.prevalence_samples = self.ilr.inverse(samples)
|
||||||
|
return self.prevalence_samples.mean(axis=0)
|
||||||
|
|
||||||
|
def predict_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
|
||||||
|
if confidence_level is None:
|
||||||
|
confidence_level = self.confidence_level
|
||||||
|
classif_predictions = self.classify(instances)
|
||||||
|
point_estimate = self.aggregate(classif_predictions)
|
||||||
|
samples = self.prevalence_samples # available after calling "aggregate" function
|
||||||
|
region = WithConfidenceABC.construct_region(samples, confidence_level=confidence_level, method=self.region)
|
||||||
|
return point_estimate, region
|
||||||
|
|
||||||
|
def log_likelihood(self, test_classif, test_prev, train_prev):
|
||||||
|
# n_test = test_classif.shape[0]
|
||||||
|
log_w = jnp.log(test_prev) - jnp.log(train_prev)
|
||||||
|
# return (1/n_test) * jnp.sum(
|
||||||
|
# logsumexp(jnp.log(test_classif) + log_w, axis=-1)
|
||||||
|
# )
|
||||||
|
return jnp.sum(
|
||||||
|
logsumexp(jnp.log(test_classif) + log_w, axis=-1)
|
||||||
|
)
|
||||||
|
|
||||||
|
def model(self, test_posteriors, alpha):
|
||||||
|
test_posteriors = jnp.array(test_posteriors)
|
||||||
|
n_classes = test_posteriors.shape[1]
|
||||||
|
|
||||||
|
# prior in ILR
|
||||||
|
z = numpyro.sample(
|
||||||
|
"z",
|
||||||
|
dist.Normal(jnp.zeros(n_classes-1), 1.0)
|
||||||
|
)
|
||||||
|
|
||||||
|
# back to simplex
|
||||||
|
prev = self.ilr.inverse(z)
|
||||||
|
train_prev = jnp.array(self.train_prevalence)
|
||||||
|
|
||||||
|
# prior
|
||||||
|
alpha = jnp.array(alpha)
|
||||||
|
numpyro.factor(
|
||||||
|
"dirichlet_prior",
|
||||||
|
dist.Dirichlet(alpha).log_prob(prev)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Likelihood
|
||||||
|
numpyro.factor(
|
||||||
|
"likelihood",
|
||||||
|
(1.0 / self.temperature) * self.log_likelihood(test_posteriors, test_prev=prev, train_prev=train_prev)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# adapted from https://github.com/ChangkunYe/MAPLS/blob/main/label_shift/mapls.py
|
||||||
|
|
||||||
|
def mapls(train_probs: np.ndarray,
|
||||||
|
test_probs: np.ndarray,
|
||||||
|
pz: np.ndarray,
|
||||||
|
qy_mode: str = 'soft',
|
||||||
|
max_iter: int = 100,
|
||||||
|
init_mode: str = 'identical',
|
||||||
|
lam: float = None,
|
||||||
|
dvg_name='kl',
|
||||||
|
return_lambda=False
|
||||||
|
):
|
||||||
|
r"""
|
||||||
|
Implementation of Maximum A Posteriori Label Shift,
|
||||||
|
for Unknown target label distribution estimation
|
||||||
|
|
||||||
|
Given source domain P(Y_s=i|X_s=x) = f(x) and P(Y_s=i),
|
||||||
|
estimate targe domain P(Y_t=i) on test set
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Sanity Check
|
||||||
|
cls_num = len(pz)
|
||||||
|
assert test_probs.shape[-1] == cls_num
|
||||||
|
if type(max_iter) != int or max_iter < 0:
|
||||||
|
raise Exception('max_iter should be a positive integer, not ' + str(max_iter))
|
||||||
|
|
||||||
|
# Setup d(p,q) measure
|
||||||
|
if dvg_name == 'kl':
|
||||||
|
dvg = kl_div
|
||||||
|
elif dvg_name == 'js':
|
||||||
|
dvg = js_div
|
||||||
|
else:
|
||||||
|
raise Exception('Unsupported distribution distance measure, expect kl or js.')
|
||||||
|
|
||||||
|
# Set Prior of Target Label Distribution
|
||||||
|
q_prior = np.ones(cls_num) / cls_num
|
||||||
|
# q_prior = pz.copy()
|
||||||
|
|
||||||
|
# Lambda estimation-------------------------------------------------------#
|
||||||
|
if lam is None:
|
||||||
|
# logging.info('Data shape: %s, %s' % (str(train_probs.shape), str(test_probs.shape)))
|
||||||
|
# logging.info('Divergence type is %s' % (dvg))
|
||||||
|
lam = get_lamda(test_probs, pz, q_prior, dvg=dvg, max_iter=max_iter)
|
||||||
|
# logging.info('Estimated lambda value is %.4f' % lam)
|
||||||
|
# else:
|
||||||
|
# logging.info('Assigned lambda is %.4f' % lam)
|
||||||
|
|
||||||
|
# EM Algorithm Computation
|
||||||
|
qz = mapls_EM(test_probs, pz, lam, q_prior, cls_num,
|
||||||
|
init_mode=init_mode, max_iter=max_iter, qy_mode=qy_mode)
|
||||||
|
|
||||||
|
if return_lambda:
|
||||||
|
return qz, lam
|
||||||
|
else:
|
||||||
|
return qz
|
||||||
|
|
||||||
|
|
||||||
|
def mapls_EM(probs, pz, lam, q_prior, cls_num, init_mode='identical', max_iter=100, qy_mode='soft'):
|
||||||
|
# Normalize Source Label Distribution pz
|
||||||
|
pz = np.array(pz) / np.sum(pz)
|
||||||
|
# Initialize Target Label Distribution qz
|
||||||
|
if init_mode == 'uniform':
|
||||||
|
qz = np.ones(cls_num) / cls_num
|
||||||
|
elif init_mode == 'identical':
|
||||||
|
qz = pz.copy()
|
||||||
|
else:
|
||||||
|
raise ValueError('init_mode should be either "uniform" or "identical"')
|
||||||
|
|
||||||
|
# Initialize w
|
||||||
|
w = (np.array(qz) / np.array(pz))
|
||||||
|
# EM algorithm with MAP estimation----------------------------------------#
|
||||||
|
for i in range(max_iter):
|
||||||
|
# print('w shape ', w.shape)
|
||||||
|
|
||||||
|
# E-Step--------------------------------------------------------------#
|
||||||
|
mapls_probs = normalized(probs * w, axis=-1, order=1)
|
||||||
|
|
||||||
|
# M-Step--------------------------------------------------------------#
|
||||||
|
if qy_mode == 'hard':
|
||||||
|
pred = np.argmax(mapls_probs, axis=-1)
|
||||||
|
qz_new = np.bincount(pred.reshape(-1), minlength=cls_num)
|
||||||
|
elif qy_mode == 'soft':
|
||||||
|
qz_new = np.mean(mapls_probs, axis=0)
|
||||||
|
# elif qy_mode == 'topk':
|
||||||
|
# qz_new = Topk_qy(mapls_probs, cls_num, topk_ratio=0.9, head=0)
|
||||||
|
else:
|
||||||
|
raise Exception('mapls mode should be either "soft" or "hard". ')
|
||||||
|
# print(np.shape(pc_probs), np.shape(pred), np.shape(cls_num_list_t))
|
||||||
|
|
||||||
|
# Update w with MAP estimation of Target Label Distribution qz
|
||||||
|
# qz = (qz_new + alpha) / (N + np.sum(alpha))
|
||||||
|
qz = lam * qz_new + (1 - lam) * q_prior
|
||||||
|
qz /= qz.sum()
|
||||||
|
w = qz / pz
|
||||||
|
|
||||||
|
return qz
|
||||||
|
|
||||||
|
|
||||||
|
def get_lamda(test_probs, pz, q_prior, dvg, max_iter=50):
|
||||||
|
K = len(pz)
|
||||||
|
|
||||||
|
# MLLS estimation of source and target domain label distribution
|
||||||
|
qz_pred = mapls_EM(test_probs, pz, 1, 0, K, max_iter=max_iter)
|
||||||
|
|
||||||
|
TU_div = dvg(qz_pred, q_prior)
|
||||||
|
TS_div = dvg(qz_pred, pz)
|
||||||
|
SU_div = dvg(pz, q_prior)
|
||||||
|
# logging.info('weights are, TU_div %.4f, TS_div %.4f, SU_div %.4f' % (TU_div, TS_div, SU_div))
|
||||||
|
|
||||||
|
SU_conf = 1 - lam_forward(SU_div, lam_inv(dpq=0.5, lam=0.2))
|
||||||
|
TU_conf = lam_forward(TU_div, lam_inv(dpq=0.5, lam=SU_conf))
|
||||||
|
TS_conf = lam_forward(TS_div, lam_inv(dpq=0.5, lam=SU_conf))
|
||||||
|
# logging.info('weights are, unviform_weight %.4f, differ_weight %.4f, regularize weight %.4f'
|
||||||
|
# % (TU_conf, TS_conf, SU_conf))
|
||||||
|
|
||||||
|
confs = np.array([TU_conf, 1 - TS_conf])
|
||||||
|
w = np.array([0.9, 0.1])
|
||||||
|
lam = np.sum(w * confs)
|
||||||
|
|
||||||
|
# logging.info('Estimated lambda is: %.4f', lam)
|
||||||
|
|
||||||
|
return lam
|
||||||
|
|
||||||
|
|
||||||
|
def lam_inv(dpq, lam):
|
||||||
|
return (1 / (1 - lam) - 1) / dpq
|
||||||
|
|
||||||
|
|
||||||
|
def lam_forward(dpq, gamma):
|
||||||
|
return gamma * dpq / (1 + gamma * dpq)
|
||||||
|
|
||||||
|
|
||||||
|
# def kl_div(p, q):
|
||||||
|
# p = np.asarray(p, dtype=np.float32)
|
||||||
|
# q = np.asarray(q + 1e-8, dtype=np.float32)
|
||||||
|
#
|
||||||
|
# return np.sum(np.where(p != 0, p * np.log(p / q), 0))
|
||||||
|
|
||||||
|
|
||||||
|
def kl_div(p, q, eps=1e-12):
|
||||||
|
p = np.asarray(p, dtype=float)
|
||||||
|
q = np.asarray(q, dtype=float)
|
||||||
|
|
||||||
|
mask = p > 0
|
||||||
|
return np.sum(p[mask] * np.log(p[mask] / (q[mask] + eps)))
|
||||||
|
|
||||||
|
|
||||||
|
def js_div(p, q):
|
||||||
|
assert (np.abs(np.sum(p) - 1) < 1e-6) and (np.abs(np.sum(q) - 1) < 1e-6)
|
||||||
|
m = (p + q) / 2
|
||||||
|
return kl_div(p, m) / 2 + kl_div(q, m) / 2
|
||||||
|
|
||||||
|
|
||||||
|
def normalized(a, axis=-1, order=2):
|
||||||
|
r"""
|
||||||
|
Prediction Normalization
|
||||||
|
"""
|
||||||
|
l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
|
||||||
|
l2[l2 == 0] = 1
|
||||||
|
return a / np.expand_dims(l2, axis)
|
||||||
|
|
||||||
|
|
||||||
|
def alpha0_from_lamda(lam, n_test, n_classes):
|
||||||
|
return 1+n_test*(1-lam)/(lam*n_classes)
|
||||||
|
|
@ -1,8 +1,13 @@
|
||||||
import os
|
import os
|
||||||
|
from functools import lru_cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from jax import numpy as jnp
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
|
|
||||||
|
import error
|
||||||
|
import functional as F
|
||||||
|
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
@ -80,3 +85,61 @@ class KDEyILR(KDEyML):
|
||||||
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
|
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
|
||||||
random_state=random_state, kernel='ilr'
|
random_state=random_state, kernel='ilr'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ILRtransformation(F.CompositionalTransformation):
|
||||||
|
def __init__(self, jax_mode=False):
|
||||||
|
self.jax_mode = jax_mode
|
||||||
|
|
||||||
|
def array(self, X):
|
||||||
|
if self.jax_mode:
|
||||||
|
return jnp.array(X)
|
||||||
|
else:
|
||||||
|
return np.asarray(X)
|
||||||
|
|
||||||
|
def __call__(self, X):
|
||||||
|
X = self.array(X)
|
||||||
|
X = qp.error.smooth(X, self.EPSILON)
|
||||||
|
k = X.shape[-1]
|
||||||
|
V = self.array(self.get_V(k))
|
||||||
|
logp = jnp.log(X) if self.jax_mode else np.log(X)
|
||||||
|
return logp @ V.T
|
||||||
|
|
||||||
|
def inverse(self, Z):
|
||||||
|
Z = self.array(Z)
|
||||||
|
k_minus_1 = Z.shape[-1]
|
||||||
|
k = k_minus_1 + 1
|
||||||
|
V = self.array(self.get_V(k))
|
||||||
|
logp = Z @ V
|
||||||
|
p = jnp.exp(logp) if self.jax_mode else np.exp(logp)
|
||||||
|
p = p / jnp.sum(p, axis=-1, keepdims=True) if self.jax_mode else p / np.sum(p, axis=-1, keepdims=True)
|
||||||
|
return p
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def get_V(self, k):
|
||||||
|
def helmert_matrix(k):
|
||||||
|
"""
|
||||||
|
Returns the (k x k) Helmert matrix.
|
||||||
|
"""
|
||||||
|
H = np.zeros((k, k))
|
||||||
|
for i in range(1, k):
|
||||||
|
H[i, :i] = 1
|
||||||
|
H[i, i] = -(i)
|
||||||
|
H[i] = H[i] / np.sqrt(i * (i + 1))
|
||||||
|
# row 0 stays zeros; will be discarded
|
||||||
|
return H
|
||||||
|
|
||||||
|
def ilr_basis(k):
|
||||||
|
"""
|
||||||
|
Constructs an orthonormal ILR basis using the Helmert submatrix.
|
||||||
|
Output shape: (k-1, k)
|
||||||
|
"""
|
||||||
|
H = helmert_matrix(k)
|
||||||
|
V = H[1:, :] # remove first row of zeros
|
||||||
|
return V
|
||||||
|
|
||||||
|
return ilr_basis(k)
|
||||||
|
|
||||||
|
|
||||||
|
def in_simplex(x):
|
||||||
|
return np.all(x >= 0) and np.isclose(x.sum(), 1)
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,11 @@ from sklearn.linear_model import LogisticRegression as LR
|
||||||
from copy import deepcopy as cp
|
from copy import deepcopy as cp
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
|
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
|
||||||
|
from BayesianKDEy._bayesian_mapls import BayesianMAPLS
|
||||||
from BayesianKDEy.commons import multiclass, experiment_path, KDEyCLR
|
from BayesianKDEy.commons import multiclass, experiment_path, KDEyCLR
|
||||||
from BayesianKDEy.temperature_calibration import temp_calibration
|
from BayesianKDEy.temperature_calibration import temp_calibration
|
||||||
from build.lib.quapy.data import LabelledCollection
|
from build.lib.quapy.data import LabelledCollection
|
||||||
from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ
|
from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ, CC
|
||||||
from quapy.model_selection import GridSearchQ
|
from quapy.model_selection import GridSearchQ
|
||||||
from quapy.data import Dataset
|
from quapy.data import Dataset
|
||||||
# from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
|
# from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
|
||||||
|
|
@ -65,6 +66,20 @@ def methods():
|
||||||
# yield f'BaKDE-numpyro-T10', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=10., **hyper), multiclass_method
|
# yield f'BaKDE-numpyro-T10', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=10., **hyper), multiclass_method
|
||||||
# yield f'BaKDE-numpyro*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
# yield f'BaKDE-numpyro*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||||
# yield f'BaKDE-numpyro*ILR', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
# yield f'BaKDE-numpyro*ILR', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||||
|
# yield 'BayEMQ', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', exact_train_prev=True), multiclass_method
|
||||||
|
# yield 'BayEMQ*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', exact_train_prev=True), multiclass_method
|
||||||
|
# yield 'BayEMQ*2', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', exact_train_prev=True), multiclass_method
|
||||||
|
# yield 'BayEMQ*2T*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=None, exact_train_prev=True), multiclass_method
|
||||||
|
# yield 'BayEMQ*2T01', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=0.1, exact_train_prev=True), multiclass_method
|
||||||
|
# yield 'BayEMQ*2T10000', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=10000, exact_train_prev=True), multiclass_method
|
||||||
|
# yield 'BayEMQ*2T100000', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=100000,
|
||||||
|
# exact_train_prev=True), multiclass_method
|
||||||
|
# yield 'BayEMQ-U-Temp1-2', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', temperature=1, exact_train_prev=True), multiclass_method
|
||||||
|
yield 'BayEMQ-U-Temp*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', temperature=None, exact_train_prev=True), multiclass_method
|
||||||
|
# yield 'BayEMQ*Temp1', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=1, exact_train_prev=True), multiclass_method
|
||||||
|
# yield 'BayEMQ*Temp10', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=10, exact_train_prev=True), multiclass_method
|
||||||
|
# yield 'BayEMQ*Temp100', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=100, exact_train_prev=True), multiclass_method
|
||||||
|
# yield 'BayEMQ*Temp1000', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=1000, exact_train_prev=True), multiclass_method
|
||||||
|
|
||||||
|
|
||||||
def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
|
def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
|
||||||
|
|
@ -101,6 +116,7 @@ def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method
|
||||||
t_init = time()
|
t_init = time()
|
||||||
withconf_quantifier = withconf_constructor(best_hyperparams)
|
withconf_quantifier = withconf_constructor(best_hyperparams)
|
||||||
if hasattr(withconf_quantifier, 'temperature') and withconf_quantifier.temperature is None:
|
if hasattr(withconf_quantifier, 'temperature') and withconf_quantifier.temperature is None:
|
||||||
|
print('calibrating temperature')
|
||||||
train, val = data.training.split_stratified(train_prop=0.6, random_state=0)
|
train, val = data.training.split_stratified(train_prop=0.6, random_state=0)
|
||||||
temperature = temp_calibration(withconf_quantifier, train, val, temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.], n_jobs=-1)
|
temperature = temp_calibration(withconf_quantifier, train, val, temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.], n_jobs=-1)
|
||||||
withconf_quantifier.temperature = temperature
|
withconf_quantifier.temperature = temperature
|
||||||
|
|
@ -111,7 +127,8 @@ def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method
|
||||||
train_prevalence = training.prevalence()
|
train_prevalence = training.prevalence()
|
||||||
results = defaultdict(list)
|
results = defaultdict(list)
|
||||||
test_generator = UPP(test, repeats=100, random_state=0)
|
test_generator = UPP(test, repeats=100, random_state=0)
|
||||||
for i, (sample_X, true_prevalence) in tqdm(enumerate(test_generator()), total=test_generator.total(), desc=f'{method_name} predictions'):
|
pbar = tqdm(enumerate(test_generator()), total=test_generator.total())
|
||||||
|
for i, (sample_X, true_prevalence) in pbar:
|
||||||
t_init = time()
|
t_init = time()
|
||||||
point_estimate, region = withconf_quantifier.predict_conf(sample_X)
|
point_estimate, region = withconf_quantifier.predict_conf(sample_X)
|
||||||
ttime = time()-t_init
|
ttime = time()-t_init
|
||||||
|
|
@ -125,6 +142,8 @@ def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method
|
||||||
results['test-time'].append(ttime)
|
results['test-time'].append(ttime)
|
||||||
results['samples'].append(region.samples)
|
results['samples'].append(region.samples)
|
||||||
|
|
||||||
|
pbar.set_description(f'{method_name} MAE={np.mean(results["ae"]):.5f} Cov={np.mean(results["coverage"]):.5f} AMP={np.mean(results["amplitude"]):.5f}')
|
||||||
|
|
||||||
report = {
|
report = {
|
||||||
'optim_hyper': best_hyperparams,
|
'optim_hyper': best_hyperparams,
|
||||||
'train_time': tr_time,
|
'train_time': tr_time,
|
||||||
|
|
@ -162,7 +181,7 @@ if __name__ == '__main__':
|
||||||
)
|
)
|
||||||
print(f'dataset={data_name}, '
|
print(f'dataset={data_name}, '
|
||||||
f'method={method_name}: '
|
f'method={method_name}: '
|
||||||
f'mae={report["results"]["ae"].mean():.3f}, '
|
f'mae={report["results"]["ae"].mean():.5f}, '
|
||||||
f'coverage={report["results"]["coverage"].mean():.5f}, '
|
f'coverage={report["results"]["coverage"].mean():.5f}, '
|
||||||
f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
|
f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -90,10 +90,15 @@ methods = ['BayesianACC', #'BayesianKDEy',
|
||||||
# 'BaKDE-Ait-numpyro',
|
# 'BaKDE-Ait-numpyro',
|
||||||
# 'BaKDE-Ait-numpyro-T*',
|
# 'BaKDE-Ait-numpyro-T*',
|
||||||
'BaKDE-Ait-numpyro-T*-U',
|
'BaKDE-Ait-numpyro-T*-U',
|
||||||
'BootstrapACC',
|
'BayEMQ-U-Temp1-2',
|
||||||
'BootstrapHDy',
|
'BayEMQ-U-Temp*',
|
||||||
'BootstrapKDEy',
|
# 'BayEMQ*2Temp1',
|
||||||
'BootstrapEMQ'
|
# 'BayEMQ*2Temp*'
|
||||||
|
|
||||||
|
# 'BootstrapACC',
|
||||||
|
# 'BootstrapHDy',
|
||||||
|
# 'BootstrapKDEy',
|
||||||
|
# 'BootstrapEMQ'
|
||||||
]
|
]
|
||||||
|
|
||||||
def nicer(name:str):
|
def nicer(name:str):
|
||||||
|
|
@ -193,7 +198,7 @@ for setup in ['multiclass']:
|
||||||
pv = pd.pivot_table(
|
pv = pd.pivot_table(
|
||||||
df, index='dataset', columns='method', values=[
|
df, index='dataset', columns='method', values=[
|
||||||
# f'amperr-{region}',
|
# f'amperr-{region}',
|
||||||
f'a-{region}',
|
# f'a-{region}',
|
||||||
f'c-{region}',
|
f'c-{region}',
|
||||||
# f'w-{region}',
|
# f'w-{region}',
|
||||||
'ae',
|
'ae',
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ from full_experiments import model_selection
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
|
||||||
|
|
||||||
def select_imbalanced_datasets(top_m=5):
|
def select_imbalanced_datasets(top_m=10):
|
||||||
datasets_prevs = []
|
datasets_prevs = []
|
||||||
# choose top-m imbalanced datasets
|
# choose top-m imbalanced datasets
|
||||||
for data_name in multiclass['datasets']:
|
for data_name in multiclass['datasets']:
|
||||||
|
|
@ -107,7 +107,7 @@ def experiment(dataset: Dataset,
|
||||||
run_test(test, alpha_test_informative, alpha_train, concentration, prior_type, bay_quant, train_prev, dataset_name, method_name, results)
|
run_test(test, alpha_test_informative, alpha_train, concentration, prior_type, bay_quant, train_prev, dataset_name, method_name, results)
|
||||||
|
|
||||||
# informative prior
|
# informative prior
|
||||||
alpha_test_wrong = antagonistic_prevalence(train_prev, strength=0.5) * concentration
|
alpha_test_wrong = antagonistic_prevalence(train_prev, strength=0.25) * concentration
|
||||||
prior_type = 'wrong'
|
prior_type = 'wrong'
|
||||||
run_test(test, alpha_test_wrong, alpha_train, concentration, prior_type, bay_quant, train_prev, dataset_name, method_name, results)
|
run_test(test, alpha_test_wrong, alpha_train, concentration, prior_type, bay_quant, train_prev, dataset_name, method_name, results)
|
||||||
|
|
||||||
|
|
@ -267,7 +267,7 @@ def coverage_vs_amplitude_plot(df, save_path=None):
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
result_dir = Path('./results/prior_effect')
|
result_dir = Path('./results/prior_effect')
|
||||||
selected = select_imbalanced_datasets()
|
selected = select_imbalanced_datasets(10)
|
||||||
print(f'selected datasets={selected}')
|
print(f'selected datasets={selected}')
|
||||||
qp.environ['SAMPLE_SIZE'] = multiclass['sample_size']
|
qp.environ['SAMPLE_SIZE'] = multiclass['sample_size']
|
||||||
reports = []
|
reports = []
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue