diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py index f4da22f..3a202d7 100644 --- a/BayesianKDEy/_bayeisan_kdey.py +++ b/BayesianKDEy/_bayeisan_kdey.py @@ -43,6 +43,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): region: str = 'intervals', explore='simplex', step_size=0.05, + temperature=1., verbose: bool = False): if num_warmup <= 0: @@ -51,6 +52,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): raise ValueError(f'parameter {num_samples=} must be a positive integer') assert explore in ['simplex', 'clr', 'ilr'], \ f'unexpected value for param {explore=}; valid ones are "simplex", "clr", and "ilr"' + assert temperature>0., f'temperature must be >0' super().__init__(classifier, fit_classifier, val_split) self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel) @@ -62,6 +64,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): self.region = region self.explore = explore self.step_size = step_size + self.temperature = temperature self.verbose = verbose def aggregation_fit(self, classif_predictions, labels): @@ -99,7 +102,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): def log_likelihood(prev, epsilon=1e-10): test_likelihoods = prev @ test_densities test_loglikelihood = np.log(test_likelihoods + epsilon) - return np.sum(test_loglikelihood) + return (1./self.temperature) * np.sum(test_loglikelihood) # def log_prior(prev): # todo: adapt to arbitrary prior knowledge (e.g., something around training prevalence) @@ -167,7 +170,8 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): recent_accept_rate = np.mean(acceptance_history[-100:]) step_size *= np.exp(adapt_rate * (recent_accept_rate - target_acceptance)) # step_size = float(np.clip(step_size, min_step, max_step)) - # print(f'acceptance-rate={recent_accept_rate*100:.3f}%, step-size={step_size:.5f}') + if i %100==0: + print(f'acceptance-rate={recent_accept_rate*100:.3f}%, step-size={step_size:.5f}') # remove "warmup" initial iterations samples = np.asarray(samples[self.num_warmup:]) diff --git a/BayesianKDEy/single_experiment_debug.py b/BayesianKDEy/single_experiment_debug.py index fb3be01..78a0d92 100644 --- a/BayesianKDEy/single_experiment_debug.py +++ b/BayesianKDEy/single_experiment_debug.py @@ -29,7 +29,7 @@ from time import time from sklearn.base import clone, BaseEstimator -def method(): +def methods(): """ Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where: - name: is a str representing the name of the method (e.g., 'BayesianKDEy') @@ -47,48 +47,41 @@ def method(): # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), - return 'BayKDE*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, - explore=True, - step_size=.15, - # num_warmup = 5000, - # num_samples = 10_000, - # region='ellipse', - **hyper), + for T in [1., 10., 100., 1000.]: + yield (f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr, + lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, temperature=T, step_size=.15, **hyper)), if __name__ == '__main__': binary = { - 'datasets': qp.datasets.UCI_BINARY_DATASETS, 'fetch_fn': qp.datasets.fetch_UCIBinaryDataset, 'sample_size': 500 } multiclass = { - 'datasets': qp.datasets.UCI_MULTICLASS_DATASETS, 'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset, 'sample_size': 1000 } - result_dir = Path('./results') - setup = multiclass - qp.environ['SAMPLE_SIZE'] = setup['sample_size'] data_name = 'digits' + + qp.environ['SAMPLE_SIZE'] = setup['sample_size'] print(f'dataset={data_name}') data = setup['fetch_fn'](data_name) is_binary = data.n_classes==2 - hyper_subdir = result_dir / 'hyperparams' / ('binary' if is_binary else 'multiclass') - method_name, method, hyper_params, withconf_constructor = method() - hyper_path = experiment_path(hyper_subdir, data_name, method.__class__.__name__) - report = experiment(data, method, method_name, hyper_params, withconf_constructor, hyper_path) + hyper_subdir = Path('./results') / 'hyperparams' / ('binary' if is_binary else 'multiclass') + for method_name, method, hyper_params, withconf_constructor in methods(): + hyper_path = experiment_path(hyper_subdir, data_name, method.__class__.__name__) + report = experiment(data, method, method_name, hyper_params, withconf_constructor, hyper_path) - print(f'dataset={data_name}, ' - f'method={method_name}: ' - f'mae={report["results"]["ae"].mean():.3f}, ' - f'coverage={report["results"]["coverage"].mean():.5f}, ' - f'amplitude={report["results"]["amplitude"].mean():.5f}, ') + print(f'dataset={data_name}, ' + f'method={method_name}: ' + f'mae={report["results"]["ae"].mean():.3f}, ' + f'coverage={report["results"]["coverage"].mean():.5f}, ' + f'amplitude={report["results"]["amplitude"].mean():.5f}, ') diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py index bb7d9cd..7a845d8 100644 --- a/quapy/method/confidence.py +++ b/quapy/method/confidence.py @@ -5,7 +5,7 @@ from sklearn.metrics import confusion_matrix import quapy as qp import quapy.functional as F -from functional import CompositionalTransformation, CLRtransformation, ILRtransformation +from quapy.functional import CompositionalTransformation, CLRtransformation, ILRtransformation from quapy.method import _bayesian from quapy.data import LabelledCollection from quapy.method.aggregative import AggregativeQuantifier, AggregativeCrispQuantifier, AggregativeSoftQuantifier, BinaryAggregativeQuantifier