adding experiment with ILR

This commit is contained in:
Alejandro Moreo Fernandez 2025-12-04 20:02:26 +01:00
parent b180aae16c
commit e8d175106f
4 changed files with 21 additions and 8 deletions

View File

@ -1,7 +1,7 @@
from sklearn.base import BaseEstimator from sklearn.base import BaseEstimator
import numpy as np import numpy as np
from quapy.method._kdey import KDEBase from quapy.method._kdey import KDEBase
from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC, CLRtransformation from quapy.method.confidence import WithConfidenceABC, ConfidenceRegionABC, CLRtransformation, ILRtransformation
from quapy.method.aggregative import AggregativeSoftQuantifier from quapy.method.aggregative import AggregativeSoftQuantifier
from tqdm import tqdm from tqdm import tqdm
import quapy.functional as F import quapy.functional as F
@ -40,7 +40,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
mcmc_seed: int = 0, mcmc_seed: int = 0,
confidence_level: float = 0.95, confidence_level: float = 0.95,
region: str = 'intervals', region: str = 'intervals',
explore_CLR=False, explore='simplex',
step_size=0.05, step_size=0.05,
verbose: bool = False): verbose: bool = False):
@ -48,6 +48,8 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
raise ValueError(f'parameter {num_warmup=} must be a positive integer') raise ValueError(f'parameter {num_warmup=} must be a positive integer')
if num_samples <= 0: if num_samples <= 0:
raise ValueError(f'parameter {num_samples=} must be a positive integer') raise ValueError(f'parameter {num_samples=} must be a positive integer')
assert explore in ['simplex', 'clr', 'ilr'], \
f'unexpected value for param {explore=}; valid ones are "simplex", "clr", and "ilr"'
super().__init__(classifier, fit_classifier, val_split) super().__init__(classifier, fit_classifier, val_split)
self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel) self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel)
@ -57,7 +59,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
self.mcmc_seed = mcmc_seed self.mcmc_seed = mcmc_seed
self.confidence_level = confidence_level self.confidence_level = confidence_level
self.region = region self.region = region
self.explore_CLR = explore_CLR self.explore = explore
self.step_size = step_size self.step_size = step_size
self.verbose = verbose self.verbose = verbose
@ -112,16 +114,24 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
def sample_neighbour(prev, step_size): def sample_neighbour(prev, step_size):
# random-walk Metropolis-Hastings # random-walk Metropolis-Hastings
d = len(prev) d = len(prev)
if not self.explore_CLR: neighbour = None
if self.explore=='simplex':
dir_noise = rng.normal(scale=step_size/np.sqrt(d), size=d) dir_noise = rng.normal(scale=step_size/np.sqrt(d), size=d)
neighbour = F.normalize_prevalence(prev + dir_noise, method='mapsimplex') neighbour = F.normalize_prevalence(prev + dir_noise, method='mapsimplex')
else: elif self.explore=='clr':
clr = CLRtransformation() clr = CLRtransformation()
clr_point = clr(prev) clr_point = clr(prev)
dir_noise = rng.normal(scale=step_size, size=d) dir_noise = rng.normal(scale=step_size, size=d)
clr_neighbour = clr_point+dir_noise clr_neighbour = clr_point+dir_noise
neighbour = clr.inverse(clr_neighbour) neighbour = clr.inverse(clr_neighbour)
assert in_simplex(neighbour), 'wrong CLR transformation' assert in_simplex(neighbour), 'wrong CLR transformation'
elif self.explore=='ilr':
ilr = ILRtransformation()
ilr_point = ilr(prev)
dir_noise = rng.normal(scale=step_size, size=d)
ilr_neighbour = ilr_point + dir_noise
neighbour = ilr.inverse(ilr_neighbour)
assert in_simplex(neighbour), 'wrong ILR transformation'
return neighbour return neighbour
n_classes = X_probs.shape[1] n_classes = X_probs.shape[1]

View File

@ -61,8 +61,9 @@ def methods():
yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore_CLR=True, step_size=.15, **hyper), multiclass_method yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore_CLR=True, step_size=.05, **hyper), multiclass_method # yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method
yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), multiclass_method
def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict): def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):

View File

@ -105,6 +105,8 @@ for setup in ['binary', 'multiclass']:
df = df[df["dataset"] != data_name] df = df[df["dataset"] != data_name]
for region in ['CI', 'CE', 'CLR', 'ILR']: for region in ['CI', 'CE', 'CLR', 'ILR']:
if setup == 'binary' and region=='ILR':
continue
pv = pd.pivot_table( pv = pd.pivot_table(
df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
) )

View File

@ -48,7 +48,7 @@ def method():
# yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True),
# yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper),
return 'BayKDE*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, return 'BayKDE*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0,
explore_CLR=True, explore=True,
step_size=.15, step_size=.15,
# num_warmup = 5000, # num_warmup = 5000,
# num_samples = 10_000, # num_samples = 10_000,