mapls working

This commit is contained in:
Alejandro Moreo Fernandez 2026-01-15 09:41:37 +01:00
parent a511f577c9
commit 9ae65ab09a
6 changed files with 431 additions and 79 deletions

View File

@ -1,10 +1,7 @@
from functools import lru_cache
from numpy.ma.core import shape
from sklearn.base import BaseEstimator
import numpy as np
import quapy.util
from BayesianKDEy.commons import ILRtransformation, in_simplex
from quapy.method._kdey import KDEBase
from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
from quapy.functional import CLRtransformation
@ -95,7 +92,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth, self.kernel)
return self
def aggregate(self, classif_predictions):
def aggregate(self, classif_predictions: np.ndarray):
if self.engine == 'rw-mh':
if self.prior != 'uniform':
raise RuntimeError('prior is not yet implemented in rw-mh')
@ -105,6 +102,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
raise RuntimeError('prior is not yet implemented in emcee')
self.prevalence_samples = self._bayesian_emcee(classif_predictions)
elif self.engine == 'numpyro':
self.ilr = ILRtransformation(jax_mode=True)
self.prevalence_samples = self._bayesian_numpyro(classif_predictions)
return self.prevalence_samples.mean(axis=0)
@ -247,8 +245,6 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
[self.pdf(kde_i, X_probs, self.kernel) for kde_i in kdes]
)
# move to jax
test_densities = jnp.array(test_densities)
n_classes = X_probs.shape[-1]
if isinstance(self.prior, str) and self.prior == 'uniform':
alpha = [1.] * n_classes
@ -270,8 +266,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
samples_z = mcmc.get_samples()["z"]
# back to simplex
ilr = ILRtransformation(jax_mode=True)
samples_prev = np.asarray(ilr.inverse(np.asarray(samples_z)))
samples_prev = np.asarray(self.ilr.inverse(np.asarray(samples_z)))
return samples_prev
@ -280,7 +275,6 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
test_densities: shape (n_classes, n_instances,)
"""
n_classes = test_densities.shape[0]
ilr = ILRtransformation(jax_mode=True)
# sample in unconstrained R^(n_classes-1)
z = numpyro.sample(
@ -288,7 +282,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
dist.Normal(0.0, 1.0).expand([n_classes - 1])
)
prev = ilr.inverse(z) # simplex, shape (n_classes,)
prev = self.ilr.inverse(z) # simplex, shape (n_classes,)
# prior
if alpha is not None:
@ -299,66 +293,10 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
# if alpha is None, then this corresponds to a weak logistic-normal prior
# likelihood
test_densities = jnp.array(test_densities)
likelihoods = jnp.dot(prev, test_densities)
numpyro.factor(
"loglik", (1.0 / self.temperature) * jnp.sum(jnp.log(likelihoods + 1e-10))
)
def in_simplex(x):
return np.all(x >= 0) and np.isclose(x.sum(), 1)
class ILRtransformation(F.CompositionalTransformation):
def __init__(self, jax_mode=False):
self.jax_mode = jax_mode
def array(self, X):
if self.jax_mode:
return jnp.array(X)
else:
return np.asarray(X)
def __call__(self, X):
X = self.array(X)
X = quapy.error.smooth(X, self.EPSILON)
k = X.shape[-1]
V = self.array(self.get_V(k))
logp = jnp.log(X) if self.jax_mode else np.log(X)
return logp @ V.T
def inverse(self, Z):
Z = self.array(Z)
k_minus_1 = Z.shape[-1]
k = k_minus_1 + 1
V = self.array(self.get_V(k))
logp = Z @ V
p = jnp.exp(logp) if self.jax_mode else np.exp(logp)
p = p / jnp.sum(p, axis=-1, keepdims=True) if self.jax_mode else p / np.sum(p, axis=-1, keepdims=True)
return p
@lru_cache(maxsize=None)
def get_V(self, k):
def helmert_matrix(k):
"""
Returns the (k x k) Helmert matrix.
"""
H = np.zeros((k, k))
for i in range(1, k):
H[i, :i] = 1
H[i, i] = -(i)
H[i] = H[i] / np.sqrt(i * (i + 1))
# row 0 stays zeros; will be discarded
return H
def ilr_basis(k):
"""
Constructs an orthonormal ILR basis using the Helmert submatrix.
Output shape: (k-1, k)
"""
H = helmert_matrix(k)
V = H[1:, :] # remove first row of zeros
return V
return ilr_basis(k)

View File

@ -0,0 +1,327 @@
import jax.numpy as jnp
import numpy as np
import numpyro
import numpyro.distributions as dist
from numpyro.infer import MCMC, NUTS, HMC
import jax.random as random
from sklearn.base import BaseEstimator
from jax.scipy.special import logsumexp
from BayesianKDEy.commons import ILRtransformation
from quapy.method.aggregative import AggregativeSoftQuantifier
from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC
import quapy.functional as F
class BayesianMAPLS(AggregativeSoftQuantifier, WithConfidenceABC):
"""
:param classifier:
:param fit_classifier:
:param val_split:
:param exact_train_prev: set to True (default) for using the true training prevalence as the initial
observation; set to False for computing the training prevalence as an estimate of it, i.e., as the
expected value of the posterior probabilities of the training instances.
:param num_samples:
:param mcmc_seed:
:param confidence_level:
:param region:
"""
def __init__(self,
classifier: BaseEstimator = None,
fit_classifier=True,
val_split: int = 5,
exact_train_prev=True,
num_warmup: int = 500,
num_samples: int = 1_000,
mcmc_seed: int = 0,
confidence_level: float = 0.95,
region: str = 'intervals',
temperature=1.,
prior='uniform',
verbose=False
):
if num_samples <= 0:
raise ValueError(f'parameter {num_samples=} must be a positive integer')
super().__init__(classifier, fit_classifier, val_split)
self.exact_train_prev = exact_train_prev
self.num_warmup = num_warmup
self.num_samples = num_samples
self.mcmc_seed = mcmc_seed
self.confidence_level = confidence_level
self.region = region
self.temperature = temperature
self.prior = prior
self.verbose = verbose
def aggregation_fit(self, classif_predictions, labels):
self.train_post = classif_predictions
if self.exact_train_prev:
self.train_prevalence = F.prevalence_from_labels(labels, classes=self.classes_)
else:
self.train_prevalence = F.prevalence_from_probabilities(classif_predictions)
self.ilr = ILRtransformation(jax_mode=True)
return self
def aggregate(self, classif_predictions: np.ndarray):
n_test, n_classes = classif_predictions.shape
pi_star, lam = mapls(
self.train_post,
test_probs=classif_predictions,
pz=self.train_prevalence,
return_lambda=True
)
# pi_star: MAP in simplex (shape: [K]), convert to ILR space
z0 = self.ilr(pi_star)
if self.prior == 'uniform':
alpha = [1.] * n_classes
elif self.prior == 'map':
alpha_0 = alpha0_from_lamda(lam, n_test=n_test, n_classes=n_classes)
alpha = [alpha_0] * n_classes
elif self.prior == 'map2':
lam2 = get_lamda(
test_probs=classif_predictions,
pz=self.train_prevalence,
q_prior=pi_star,
dvg=kl_div
)
alpha_0 = alpha0_from_lamda(lam2, n_test=n_test, n_classes=n_classes)
alpha = [alpha_0] * n_classes
else:
alpha = self.prior
kernel = NUTS(self.model)
mcmc = MCMC(
kernel,
num_warmup=self.num_warmup,
num_samples=self.num_samples,
num_chains=1,
progress_bar=self.verbose
)
mcmc.run(
random.PRNGKey(self.mcmc_seed),
test_posteriors=classif_predictions,
alpha=alpha,
init_params={"z": z0}
)
samples = mcmc.get_samples()["z"]
self.prevalence_samples = self.ilr.inverse(samples)
return self.prevalence_samples.mean(axis=0)
def predict_conf(self, instances, confidence_level=None) -> (np.ndarray, ConfidenceRegionABC):
if confidence_level is None:
confidence_level = self.confidence_level
classif_predictions = self.classify(instances)
point_estimate = self.aggregate(classif_predictions)
samples = self.prevalence_samples # available after calling "aggregate" function
region = WithConfidenceABC.construct_region(samples, confidence_level=confidence_level, method=self.region)
return point_estimate, region
def log_likelihood(self, test_classif, test_prev, train_prev):
# n_test = test_classif.shape[0]
log_w = jnp.log(test_prev) - jnp.log(train_prev)
# return (1/n_test) * jnp.sum(
# logsumexp(jnp.log(test_classif) + log_w, axis=-1)
# )
return jnp.sum(
logsumexp(jnp.log(test_classif) + log_w, axis=-1)
)
def model(self, test_posteriors, alpha):
test_posteriors = jnp.array(test_posteriors)
n_classes = test_posteriors.shape[1]
# prior in ILR
z = numpyro.sample(
"z",
dist.Normal(jnp.zeros(n_classes-1), 1.0)
)
# back to simplex
prev = self.ilr.inverse(z)
train_prev = jnp.array(self.train_prevalence)
# prior
alpha = jnp.array(alpha)
numpyro.factor(
"dirichlet_prior",
dist.Dirichlet(alpha).log_prob(prev)
)
# Likelihood
numpyro.factor(
"likelihood",
(1.0 / self.temperature) * self.log_likelihood(test_posteriors, test_prev=prev, train_prev=train_prev)
)
# adapted from https://github.com/ChangkunYe/MAPLS/blob/main/label_shift/mapls.py
def mapls(train_probs: np.ndarray,
test_probs: np.ndarray,
pz: np.ndarray,
qy_mode: str = 'soft',
max_iter: int = 100,
init_mode: str = 'identical',
lam: float = None,
dvg_name='kl',
return_lambda=False
):
r"""
Implementation of Maximum A Posteriori Label Shift,
for Unknown target label distribution estimation
Given source domain P(Y_s=i|X_s=x) = f(x) and P(Y_s=i),
estimate targe domain P(Y_t=i) on test set
"""
# Sanity Check
cls_num = len(pz)
assert test_probs.shape[-1] == cls_num
if type(max_iter) != int or max_iter < 0:
raise Exception('max_iter should be a positive integer, not ' + str(max_iter))
# Setup d(p,q) measure
if dvg_name == 'kl':
dvg = kl_div
elif dvg_name == 'js':
dvg = js_div
else:
raise Exception('Unsupported distribution distance measure, expect kl or js.')
# Set Prior of Target Label Distribution
q_prior = np.ones(cls_num) / cls_num
# q_prior = pz.copy()
# Lambda estimation-------------------------------------------------------#
if lam is None:
# logging.info('Data shape: %s, %s' % (str(train_probs.shape), str(test_probs.shape)))
# logging.info('Divergence type is %s' % (dvg))
lam = get_lamda(test_probs, pz, q_prior, dvg=dvg, max_iter=max_iter)
# logging.info('Estimated lambda value is %.4f' % lam)
# else:
# logging.info('Assigned lambda is %.4f' % lam)
# EM Algorithm Computation
qz = mapls_EM(test_probs, pz, lam, q_prior, cls_num,
init_mode=init_mode, max_iter=max_iter, qy_mode=qy_mode)
if return_lambda:
return qz, lam
else:
return qz
def mapls_EM(probs, pz, lam, q_prior, cls_num, init_mode='identical', max_iter=100, qy_mode='soft'):
# Normalize Source Label Distribution pz
pz = np.array(pz) / np.sum(pz)
# Initialize Target Label Distribution qz
if init_mode == 'uniform':
qz = np.ones(cls_num) / cls_num
elif init_mode == 'identical':
qz = pz.copy()
else:
raise ValueError('init_mode should be either "uniform" or "identical"')
# Initialize w
w = (np.array(qz) / np.array(pz))
# EM algorithm with MAP estimation----------------------------------------#
for i in range(max_iter):
# print('w shape ', w.shape)
# E-Step--------------------------------------------------------------#
mapls_probs = normalized(probs * w, axis=-1, order=1)
# M-Step--------------------------------------------------------------#
if qy_mode == 'hard':
pred = np.argmax(mapls_probs, axis=-1)
qz_new = np.bincount(pred.reshape(-1), minlength=cls_num)
elif qy_mode == 'soft':
qz_new = np.mean(mapls_probs, axis=0)
# elif qy_mode == 'topk':
# qz_new = Topk_qy(mapls_probs, cls_num, topk_ratio=0.9, head=0)
else:
raise Exception('mapls mode should be either "soft" or "hard". ')
# print(np.shape(pc_probs), np.shape(pred), np.shape(cls_num_list_t))
# Update w with MAP estimation of Target Label Distribution qz
# qz = (qz_new + alpha) / (N + np.sum(alpha))
qz = lam * qz_new + (1 - lam) * q_prior
qz /= qz.sum()
w = qz / pz
return qz
def get_lamda(test_probs, pz, q_prior, dvg, max_iter=50):
K = len(pz)
# MLLS estimation of source and target domain label distribution
qz_pred = mapls_EM(test_probs, pz, 1, 0, K, max_iter=max_iter)
TU_div = dvg(qz_pred, q_prior)
TS_div = dvg(qz_pred, pz)
SU_div = dvg(pz, q_prior)
# logging.info('weights are, TU_div %.4f, TS_div %.4f, SU_div %.4f' % (TU_div, TS_div, SU_div))
SU_conf = 1 - lam_forward(SU_div, lam_inv(dpq=0.5, lam=0.2))
TU_conf = lam_forward(TU_div, lam_inv(dpq=0.5, lam=SU_conf))
TS_conf = lam_forward(TS_div, lam_inv(dpq=0.5, lam=SU_conf))
# logging.info('weights are, unviform_weight %.4f, differ_weight %.4f, regularize weight %.4f'
# % (TU_conf, TS_conf, SU_conf))
confs = np.array([TU_conf, 1 - TS_conf])
w = np.array([0.9, 0.1])
lam = np.sum(w * confs)
# logging.info('Estimated lambda is: %.4f', lam)
return lam
def lam_inv(dpq, lam):
return (1 / (1 - lam) - 1) / dpq
def lam_forward(dpq, gamma):
return gamma * dpq / (1 + gamma * dpq)
# def kl_div(p, q):
# p = np.asarray(p, dtype=np.float32)
# q = np.asarray(q + 1e-8, dtype=np.float32)
#
# return np.sum(np.where(p != 0, p * np.log(p / q), 0))
def kl_div(p, q, eps=1e-12):
p = np.asarray(p, dtype=float)
q = np.asarray(q, dtype=float)
mask = p > 0
return np.sum(p[mask] * np.log(p[mask] / (q[mask] + eps)))
def js_div(p, q):
assert (np.abs(np.sum(p) - 1) < 1e-6) and (np.abs(np.sum(q) - 1) < 1e-6)
m = (p + q) / 2
return kl_div(p, m) / 2 + kl_div(q, m) / 2
def normalized(a, axis=-1, order=2):
r"""
Prediction Normalization
"""
l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
l2[l2 == 0] = 1
return a / np.expand_dims(l2, axis)
def alpha0_from_lamda(lam, n_test, n_classes):
return 1+n_test*(1-lam)/(lam*n_classes)

View File

@ -1,8 +1,13 @@
import os
from functools import lru_cache
from pathlib import Path
from jax import numpy as jnp
from sklearn.base import BaseEstimator
import error
import functional as F
import quapy as qp
import numpy as np
@ -80,3 +85,61 @@ class KDEyILR(KDEyML):
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
random_state=random_state, kernel='ilr'
)
class ILRtransformation(F.CompositionalTransformation):
def __init__(self, jax_mode=False):
self.jax_mode = jax_mode
def array(self, X):
if self.jax_mode:
return jnp.array(X)
else:
return np.asarray(X)
def __call__(self, X):
X = self.array(X)
X = qp.error.smooth(X, self.EPSILON)
k = X.shape[-1]
V = self.array(self.get_V(k))
logp = jnp.log(X) if self.jax_mode else np.log(X)
return logp @ V.T
def inverse(self, Z):
Z = self.array(Z)
k_minus_1 = Z.shape[-1]
k = k_minus_1 + 1
V = self.array(self.get_V(k))
logp = Z @ V
p = jnp.exp(logp) if self.jax_mode else np.exp(logp)
p = p / jnp.sum(p, axis=-1, keepdims=True) if self.jax_mode else p / np.sum(p, axis=-1, keepdims=True)
return p
@lru_cache(maxsize=None)
def get_V(self, k):
def helmert_matrix(k):
"""
Returns the (k x k) Helmert matrix.
"""
H = np.zeros((k, k))
for i in range(1, k):
H[i, :i] = 1
H[i, i] = -(i)
H[i] = H[i] / np.sqrt(i * (i + 1))
# row 0 stays zeros; will be discarded
return H
def ilr_basis(k):
"""
Constructs an orthonormal ILR basis using the Helmert submatrix.
Output shape: (k-1, k)
"""
H = helmert_matrix(k)
V = H[1:, :] # remove first row of zeros
return V
return ilr_basis(k)
def in_simplex(x):
return np.all(x >= 0) and np.isclose(x.sum(), 1)

View File

@ -4,10 +4,11 @@ from sklearn.linear_model import LogisticRegression as LR
from copy import deepcopy as cp
import quapy as qp
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
from BayesianKDEy._bayesian_mapls import BayesianMAPLS
from BayesianKDEy.commons import multiclass, experiment_path, KDEyCLR
from BayesianKDEy.temperature_calibration import temp_calibration
from build.lib.quapy.data import LabelledCollection
from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ
from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ, CC
from quapy.model_selection import GridSearchQ
from quapy.data import Dataset
# from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
@ -65,6 +66,20 @@ def methods():
# yield f'BaKDE-numpyro-T10', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=10., **hyper), multiclass_method
# yield f'BaKDE-numpyro*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
# yield f'BaKDE-numpyro*ILR', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
# yield 'BayEMQ', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', exact_train_prev=True), multiclass_method
# yield 'BayEMQ*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', exact_train_prev=True), multiclass_method
# yield 'BayEMQ*2', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', exact_train_prev=True), multiclass_method
# yield 'BayEMQ*2T*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=None, exact_train_prev=True), multiclass_method
# yield 'BayEMQ*2T01', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=0.1, exact_train_prev=True), multiclass_method
# yield 'BayEMQ*2T10000', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=10000, exact_train_prev=True), multiclass_method
# yield 'BayEMQ*2T100000', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=100000,
# exact_train_prev=True), multiclass_method
# yield 'BayEMQ-U-Temp1-2', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', temperature=1, exact_train_prev=True), multiclass_method
yield 'BayEMQ-U-Temp*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', temperature=None, exact_train_prev=True), multiclass_method
# yield 'BayEMQ*Temp1', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=1, exact_train_prev=True), multiclass_method
# yield 'BayEMQ*Temp10', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=10, exact_train_prev=True), multiclass_method
# yield 'BayEMQ*Temp100', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=100, exact_train_prev=True), multiclass_method
# yield 'BayEMQ*Temp1000', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=1000, exact_train_prev=True), multiclass_method
def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
@ -101,6 +116,7 @@ def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method
t_init = time()
withconf_quantifier = withconf_constructor(best_hyperparams)
if hasattr(withconf_quantifier, 'temperature') and withconf_quantifier.temperature is None:
print('calibrating temperature')
train, val = data.training.split_stratified(train_prop=0.6, random_state=0)
temperature = temp_calibration(withconf_quantifier, train, val, temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.], n_jobs=-1)
withconf_quantifier.temperature = temperature
@ -111,7 +127,8 @@ def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method
train_prevalence = training.prevalence()
results = defaultdict(list)
test_generator = UPP(test, repeats=100, random_state=0)
for i, (sample_X, true_prevalence) in tqdm(enumerate(test_generator()), total=test_generator.total(), desc=f'{method_name} predictions'):
pbar = tqdm(enumerate(test_generator()), total=test_generator.total())
for i, (sample_X, true_prevalence) in pbar:
t_init = time()
point_estimate, region = withconf_quantifier.predict_conf(sample_X)
ttime = time()-t_init
@ -125,6 +142,8 @@ def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method
results['test-time'].append(ttime)
results['samples'].append(region.samples)
pbar.set_description(f'{method_name} MAE={np.mean(results["ae"]):.5f} Cov={np.mean(results["coverage"]):.5f} AMP={np.mean(results["amplitude"]):.5f}')
report = {
'optim_hyper': best_hyperparams,
'train_time': tr_time,
@ -162,7 +181,7 @@ if __name__ == '__main__':
)
print(f'dataset={data_name}, '
f'method={method_name}: '
f'mae={report["results"]["ae"].mean():.3f}, '
f'mae={report["results"]["ae"].mean():.5f}, '
f'coverage={report["results"]["coverage"].mean():.5f}, '
f'amplitude={report["results"]["amplitude"].mean():.5f}, ')

View File

@ -90,10 +90,15 @@ methods = ['BayesianACC', #'BayesianKDEy',
# 'BaKDE-Ait-numpyro',
# 'BaKDE-Ait-numpyro-T*',
'BaKDE-Ait-numpyro-T*-U',
'BootstrapACC',
'BootstrapHDy',
'BootstrapKDEy',
'BootstrapEMQ'
'BayEMQ-U-Temp1-2',
'BayEMQ-U-Temp*',
# 'BayEMQ*2Temp1',
# 'BayEMQ*2Temp*'
# 'BootstrapACC',
# 'BootstrapHDy',
# 'BootstrapKDEy',
# 'BootstrapEMQ'
]
def nicer(name:str):
@ -193,7 +198,7 @@ for setup in ['multiclass']:
pv = pd.pivot_table(
df, index='dataset', columns='method', values=[
# f'amperr-{region}',
f'a-{region}',
# f'a-{region}',
f'c-{region}',
# f'w-{region}',
'ae',

View File

@ -17,7 +17,7 @@ from full_experiments import model_selection
from itertools import chain
def select_imbalanced_datasets(top_m=5):
def select_imbalanced_datasets(top_m=10):
datasets_prevs = []
# choose top-m imbalanced datasets
for data_name in multiclass['datasets']:
@ -107,7 +107,7 @@ def experiment(dataset: Dataset,
run_test(test, alpha_test_informative, alpha_train, concentration, prior_type, bay_quant, train_prev, dataset_name, method_name, results)
# informative prior
alpha_test_wrong = antagonistic_prevalence(train_prev, strength=0.5) * concentration
alpha_test_wrong = antagonistic_prevalence(train_prev, strength=0.25) * concentration
prior_type = 'wrong'
run_test(test, alpha_test_wrong, alpha_train, concentration, prior_type, bay_quant, train_prev, dataset_name, method_name, results)
@ -267,7 +267,7 @@ def coverage_vs_amplitude_plot(df, save_path=None):
if __name__ == '__main__':
result_dir = Path('./results/prior_effect')
selected = select_imbalanced_datasets()
selected = select_imbalanced_datasets(10)
print(f'selected datasets={selected}')
qp.environ['SAMPLE_SIZE'] = multiclass['sample_size']
reports = []