diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py index 16401bf..37fd280 100644 --- a/BayesianKDEy/_bayeisan_kdey.py +++ b/BayesianKDEy/_bayeisan_kdey.py @@ -1,7 +1,7 @@ from sklearn.base import BaseEstimator import numpy as np from quapy.method._kdey import KDEBase -from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC, CLRtransformation +from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC, CLRtransformation, ILRtransformation from quapy.method.aggregative import AggregativeSoftQuantifier from tqdm import tqdm import quapy.functional as F @@ -40,7 +40,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): mcmc_seed: int = 0, confidence_level: float = 0.95, region: str = 'intervals', - explore_CLR=False, + explore='simplex', step_size=0.05, verbose: bool = False): @@ -48,6 +48,8 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): raise ValueError(f'parameter {num_warmup=} must be a positive integer') if num_samples <= 0: raise ValueError(f'parameter {num_samples=} must be a positive integer') + assert explore in ['simplex', 'clr', 'ilr'], \ + f'unexpected value for param {explore=}; valid ones are "simplex", "clr", and "ilr"' super().__init__(classifier, fit_classifier, val_split) self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel) @@ -57,7 +59,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): self.mcmc_seed = mcmc_seed self.confidence_level = confidence_level self.region = region - self.explore_CLR = explore_CLR + self.explore = explore self.step_size = step_size self.verbose = verbose @@ -112,16 +114,24 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): def sample_neighbour(prev, step_size): # random-walk Metropolis-Hastings d = len(prev) - if not self.explore_CLR: + neighbour = None + if self.explore=='simplex': dir_noise = rng.normal(scale=step_size/np.sqrt(d), size=d) neighbour = F.normalize_prevalence(prev + dir_noise, method='mapsimplex') - else: + elif self.explore=='clr': clr = CLRtransformation() clr_point = clr(prev) dir_noise = rng.normal(scale=step_size, size=d) clr_neighbour = clr_point+dir_noise neighbour = clr.inverse(clr_neighbour) assert in_simplex(neighbour), 'wrong CLR transformation' + elif self.explore=='ilr': + ilr = ILRtransformation() + ilr_point = ilr(prev) + dir_noise = rng.normal(scale=step_size, size=d) + ilr_neighbour = ilr_point + dir_noise + neighbour = ilr.inverse(ilr_neighbour) + assert in_simplex(neighbour), 'wrong ILR transformation' return neighbour n_classes = X_probs.shape[1] diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py index 9f83f29..d821cb5 100644 --- a/BayesianKDEy/full_experiments.py +++ b/BayesianKDEy/full_experiments.py @@ -61,8 +61,9 @@ def methods(): yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method - yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore_CLR=True, step_size=.15, **hyper), multiclass_method - yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore_CLR=True, step_size=.05, **hyper), multiclass_method + yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method + # yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method + yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), multiclass_method def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict): diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py index b8127e3..b9b0e4e 100644 --- a/BayesianKDEy/generate_results.py +++ b/BayesianKDEy/generate_results.py @@ -105,6 +105,8 @@ for setup in ['binary', 'multiclass']: df = df[df["dataset"] != data_name] for region in ['CI', 'CE', 'CLR', 'ILR']: + if setup == 'binary' and region=='ILR': + continue pv = pd.pivot_table( df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True ) diff --git a/BayesianKDEy/single_experiment_debug.py b/BayesianKDEy/single_experiment_debug.py index 6cbe8a7..fb3be01 100644 --- a/BayesianKDEy/single_experiment_debug.py +++ b/BayesianKDEy/single_experiment_debug.py @@ -48,7 +48,7 @@ def method(): # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), return 'BayKDE*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, - explore_CLR=True, + explore=True, step_size=.15, # num_warmup = 5000, # num_samples = 10_000,