adding experiment with ILR

2025-12-04 20:02:26 +01:00 · 2025-12-04 20:02:26 +01:00 · e8d175106f
parent b180aae16c
commit e8d175106f
4 changed files with 21 additions and 8 deletions
--- a/BayesianKDEy/_bayeisan_kdey.py
+++ b/BayesianKDEy/_bayeisan_kdey.py
@ -1,7 +1,7 @@
 from sklearn.base import BaseEstimator
 import numpy as np
 from quapy.method._kdey import KDEBase
-from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC, CLRtransformation
+from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC, CLRtransformation, ILRtransformation
 from quapy.method.aggregative import AggregativeSoftQuantifier
 from tqdm import tqdm
 import quapy.functional as F
@ -40,7 +40,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
                 mcmc_seed: int = 0,
                 confidence_level: float = 0.95,
                 region: str = 'intervals',
-                 explore_CLR=False,
+                 explore='simplex',
                 step_size=0.05,
                 verbose: bool = False):
@ -48,6 +48,8 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
            raise ValueError(f'parameter {num_warmup=} must be a positive integer')
        if num_samples <= 0:
            raise ValueError(f'parameter {num_samples=} must be a positive integer')
        assert explore in ['simplex', 'clr', 'ilr'], \
            f'unexpected value for param {explore=}; valid ones are "simplex", "clr", and "ilr"'
        super().__init__(classifier, fit_classifier, val_split)
        self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel)
@ -57,7 +59,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
        self.mcmc_seed = mcmc_seed
        self.confidence_level = confidence_level
        self.region = region
-        self.explore_CLR = explore_CLR
+        self.explore = explore
        self.step_size = step_size
        self.verbose = verbose
@ -112,16 +114,24 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
        def sample_neighbour(prev, step_size):
            # random-walk Metropolis-Hastings
            d = len(prev)
-            if not self.explore_CLR:
+            neighbour = None
            if self.explore=='simplex':
                dir_noise = rng.normal(scale=step_size/np.sqrt(d), size=d)
                neighbour = F.normalize_prevalence(prev + dir_noise, method='mapsimplex')
-            else:
+            elif self.explore=='clr':
                clr = CLRtransformation()
                clr_point = clr(prev)
                dir_noise = rng.normal(scale=step_size, size=d)
                clr_neighbour = clr_point+dir_noise
                neighbour = clr.inverse(clr_neighbour)
                assert in_simplex(neighbour), 'wrong CLR transformation'
            elif self.explore=='ilr':
                ilr = ILRtransformation()
                ilr_point = ilr(prev)
                dir_noise = rng.normal(scale=step_size, size=d)
                ilr_neighbour = ilr_point + dir_noise
                neighbour = ilr.inverse(ilr_neighbour)
                assert in_simplex(neighbour), 'wrong ILR transformation'
            return neighbour
        n_classes = X_probs.shape[1]
--- a/BayesianKDEy/full_experiments.py
+++ b/BayesianKDEy/full_experiments.py
@ -61,8 +61,9 @@ def methods():
    yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
    yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
    yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
-    yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore_CLR=True, step_size=.15, **hyper), multiclass_method
+    yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
-    yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore_CLR=True, step_size=.05, **hyper), multiclass_method
+    # yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method
    yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), multiclass_method
 def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
--- a/BayesianKDEy/generate_results.py
+++ b/BayesianKDEy/generate_results.py
@ -105,6 +105,8 @@ for setup in ['binary', 'multiclass']:
            df = df[df["dataset"] != data_name]
    for region in ['CI', 'CE', 'CLR', 'ILR']:
        if setup == 'binary' and region=='ILR':
            continue
        pv = pd.pivot_table(
            df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
        )
--- a/BayesianKDEy/single_experiment_debug.py
+++ b/BayesianKDEy/single_experiment_debug.py
@ -48,7 +48,7 @@ def method():
    # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
    # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
    return 'BayKDE*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0,
-                                                                                   explore_CLR=True,
+                                                                                   explore=True,
                                                                                   step_size=.15,
                                                                                   # num_warmup = 5000,
                                                                                   # num_samples = 10_000,