diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py index 908342b..0edd80f 100644 --- a/BayesianKDEy/_bayeisan_kdey.py +++ b/BayesianKDEy/_bayeisan_kdey.py @@ -1,5 +1,6 @@ from sklearn.base import BaseEstimator import numpy as np +from sklearn.decomposition import PCA from BayesianKDEy.commons import ILRtransformation, in_simplex from quapy.method._kdey import KDEBase @@ -60,6 +61,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): temperature=1., engine='numpyro', prior='uniform', + reduce=None, verbose: bool = False, **kwargs): @@ -91,13 +93,22 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): self.temperature = temperature self.engine = engine self.prior = prior + self.reduce = reduce self.verbose = verbose def aggregation_fit(self, classif_predictions, labels): + if self.reduce is not None: + self.pca = PCA(n_components=self.reduce) + classif_predictions = self.pca.fit_transform(classif_predictions) + #print(f'reduce to ', classif_predictions.shape) self.mix_densities = self.get_mixture_components(classif_predictions, labels, self.classes_, self.bandwidth, self.kernel) + #print('num mix ', len(self.mix_densities)) return self def aggregate(self, classif_predictions: np.ndarray): + if hasattr(self, 'pca'): + classif_predictions = self.pca.transform(classif_predictions) + if self.engine == 'rw-mh': if self.prior != 'uniform': raise RuntimeError('prior is not yet implemented in rw-mh') @@ -245,6 +256,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): return samples def _bayesian_numpyro(self, X_probs): + #print("bayesian_numpyro", X_probs.shape) kdes = self.mix_densities # test_densities = np.asarray( # [self.pdf(kde_i, X_probs, self.kernel) for kde_i in kdes] @@ -252,12 +264,14 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): test_log_densities = np.asarray( [self.pdf(kde_i, X_probs, self.kernel, log_densities=True) for kde_i in kdes] ) - print(f'min={np.min(test_log_densities)}') - print(f'max={np.max(test_log_densities)}') + #print(f'min={np.min(test_log_densities)}') + #print(f'max={np.max(test_log_densities)}') + #print("bayesian_numpyro", test_log_densities.shape) + #print("len kdes ", len(kdes)) # import sys # sys.exit(0) - n_classes = X_probs.shape[-1] + n_classes = len(kdes) if isinstance(self.prior, str) and self.prior == 'uniform': alpha = [1.] * n_classes else: diff --git a/BayesianKDEy/bandwidth_and_dimensionality.py b/BayesianKDEy/bandwidth_and_dimensionality.py new file mode 100644 index 0000000..200e5e8 --- /dev/null +++ b/BayesianKDEy/bandwidth_and_dimensionality.py @@ -0,0 +1,78 @@ +from sklearn.neighbors import KernelDensity +import quapy.functional as F +import numpy as np + +import numpy as np +import matplotlib.pyplot as plt +from sklearn.neighbors import KernelDensity +import quapy.functional as F + +# aitchison=True +aitchison=False +clr = F.CLRtransformation() + +# h = 0.1 +# dims = list(range(5, 100, 5)) +dims = [10, 28, 100] + +center_densities = [] +vertex_densities = [] + +center_densities_scaled = [] +vertex_densities_scaled = [] + + +for n in dims: + h0 = 0.4 + simplex_center = F.uniform_prevalence(n) + simplex_vertex = np.asarray([.9] + [.1/ (n - 1)] * (n - 1), dtype=float) + + # KDE trained on a single point (the center) + kde = KernelDensity(bandwidth=h0) + X = simplex_center[None, :] + if aitchison: + X = clr(X) + kde.fit(X) + + X = np.vstack([simplex_center, simplex_vertex]) + if aitchison: + X = clr(X) + density = np.exp(kde.score_samples(X)) + + center_densities.append(density[0]) + vertex_densities.append(density[1]) + + h1= h0 * np.sqrt(n / 2) + # KDE trained on a single point (the center) + kde = KernelDensity(bandwidth=h1) + X = simplex_center[None, :] + if aitchison: + X = clr(X) + kde.fit(X) + + X = np.vstack([simplex_center, simplex_vertex]) + if aitchison: + X = clr(X) + density = np.exp(kde.score_samples(X)) + + center_densities_scaled.append(density[0]) + vertex_densities_scaled.append(density[1]) + +# Plot +plt.figure(figsize=(6*4, 4*4)) +plt.plot(dims, center_densities, marker='o', label='Center of simplex') +plt.plot(dims, vertex_densities, marker='s', label='Vertex of simplex') +plt.plot(dims, center_densities_scaled, marker='o', label='Center of simplex (scaled)') +plt.plot(dims, vertex_densities_scaled, marker='s', label='Vertex of simplex (scaled)') + +plt.xlabel('Number of classes (simplex dimension)') +# plt.ylim(min(center_densities+vertex_densities), max(center_densities+vertex_densities)) +plt.ylabel('Kernel density') +plt.yscale('log') # crucial to see anything meaningful +plt.title(f'KDE density vs dimension (bandwidth = {h0}) in {"Simplex" if not aitchison else "ILR-space"}') +plt.legend() +plt.grid(alpha=0.3) + +plt.tight_layout() +plt.show() + diff --git a/BayesianKDEy/commons.py b/BayesianKDEy/commons.py index 60925a7..3a2b73e 100644 --- a/BayesianKDEy/commons.py +++ b/BayesianKDEy/commons.py @@ -4,6 +4,7 @@ from pathlib import Path from jax import numpy as jnp from sklearn.base import BaseEstimator +from sklearn.decomposition import PCA import quapy.functional as F @@ -42,6 +43,57 @@ def antagonistic_prevalence(p, strength=1): return p_ant +""" +class KDEyScaledB(KDEyML): + def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None): + super().__init__( + classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth, + random_state=random_state, kernel='gaussian' + ) + + def aggregation_fit(self, classif_predictions, labels): + if not hasattr(self, '_changed'): + def scale_bandwidth(n_classes, beta=0.5): + return self.bandwidth * np.power(n_classes, beta) + n_classes = len(set(y)) + scaled = scale_bandwidth(n_classes) + print(f'bandwidth scaling: {self.bandwidth:.4f} => {scaled:.4f}') + self.bandwidth = scaled + self._changed = True + return super().aggregation_fit(classif_predictions, labels) +""" +class KDEyScaledB(KDEyML): + def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None): + super().__init__( + classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth, + random_state=random_state, kernel='gaussian' + ) + +class KDEyFresh(KDEyML): + def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None): + super().__init__( + classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth, + random_state=random_state, kernel='gaussian' + ) + +class KDEyReduce(KDEyML): + def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., n_components=10, random_state=None): + super().__init__( + classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth, + random_state=random_state, kernel='gaussian' + ) + self.n_components=n_components + + def aggregation_fit(self, classif_predictions, labels): + self.pca = PCA(n_components=self.n_components) + classif_predictions = self.pca.fit_transform(classif_predictions) + return super().aggregation_fit(classif_predictions, labels) + + def aggregate(self, posteriors: np.ndarray): + posteriors = self.pca.transform(posteriors) + return super().aggregate(posteriors) + + class KDEyCLR(KDEyML): def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None): super().__init__( diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py index 062eb09..a176b59 100644 --- a/BayesianKDEy/full_experiments.py +++ b/BayesianKDEy/full_experiments.py @@ -3,9 +3,10 @@ from pathlib import Path from sklearn.linear_model import LogisticRegression from copy import deepcopy as cp import quapy as qp +from BayesianKDEy.commons import KDEyReduce from _bayeisan_kdey import BayesianKDEy from _bayesian_mapls import BayesianMAPLS -from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors +from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors, KDEyScaledB, KDEyFresh # import datasets from datasets import LeQuaHandler, UCIMulticlassHandler, DatasetHandler, VisualDataHandler, CIFAR100Handler from temperature_calibration import temp_calibration @@ -55,6 +56,9 @@ def methods(data_handler: DatasetHandler): acc = ACC(Cls(), val_split=val_split) hdy = DMy(Cls(), val_split=val_split) kde_gau = KDEyML(Cls(), val_split=val_split) + kde_gau_scale = KDEyScaledB(Cls(), val_split=val_split) + kde_gau_pca = KDEyReduce(Cls(), val_split=val_split, n_components=5) + kde_gau_pca10 = KDEyReduce(Cls(), val_split=val_split, n_components=10) kde_ait = KDEyCLR(Cls(), val_split=val_split) emq = EMQ(Cls(), exact_train_prev=False, val_split=val_split) @@ -71,15 +75,33 @@ def methods(data_handler: DatasetHandler): # yield 'BayesianACC', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, mcmc_seed=0), multiclass_method #yield 'BayesianHDy', hdy, hdy_hyper, lambda hyper: PQ(Cls(), val_split=val_split, stan_seed=0, **hyper), only_binary # yield f'BaKDE-Ait-numpyro', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(), kernel='aitchison', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method - yield f'BaKDE-Gau-numpyro', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method + #yield f'BaKDE-Gau-numpyro', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method + #yield f'BaKDE-Gau-scale', kde_gau_scale, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method + yield f'BaKDE-Gau-pca5', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method + yield f'BaKDE-Gau-pca5*', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method + yield f'BaKDE-Gau-pca10', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method + yield f'BaKDE-Gau-pca10*', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method + # yield f'BaKDE-Gau-H0', KDEyFresh(Cls(), bandwidth=0.4), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=0.4, kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method + # yield f'BaKDE-Gau-H1', KDEyFresh(Cls(), bandwidth=1.), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1., kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method + # yield f'BaKDE-Gau-H2', KDEyFresh(Cls(), bandwidth=1.5), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1.5, + # kernel='gaussian', + # mcmc_seed=0, + # engine='numpyro', + # **hyper), multiclass_method # yield f'BaKDE-Ait-T*', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method - yield f'BaKDE-Gau-T*', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method + #yield f'BaKDE-Gau-T*', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method # yield 'BayEMQ', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=1, exact_train_prev=False, val_split=val_split), multiclass_method # yield 'BayEMQ*', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=None, exact_train_prev=False, val_split=val_split), multiclass_method def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict): with qp.util.temp_seed(0): + if isinstance(point_quantifier, KDEyScaledB) and 'bandwidth' in grid: + def scale_bandwidth(bandwidth, n_classes, beta=0.5): + return bandwidth * np.power(n_classes, beta) + n = dataset.get_training().n_classes + grid['bandwidth'] = [scale_bandwidth(b, n) for b in grid['bandwidth']] + print('bandwidth scaled') print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}') # model selection if len(grid)>0: @@ -108,8 +130,8 @@ def temperature_calibration(dataset: DatasetHandler, uncertainty_quantifier): if dataset.name.startswith('LeQua'): temp_grid=[100., 500, 1000, 5_000, 10_000, 50_000] else: - temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.] - temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1) + temp_grid=[.5, 1., 1.5, 2., 5., 10., 100., 1000.] + temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1, amplitude_threshold=.999) uncertainty_quantifier.temperature = temperature else: temperature = uncertainty_quantifier.temperature @@ -179,10 +201,12 @@ if __name__ == '__main__': result_dir = RESULT_DIR - for data_handler in [CIFAR100Handler, VisualDataHandler]:#, UCIMulticlassHandler,LeQuaHandler]: + for data_handler in [CIFAR100Handler]:#, UCIMulticlassHandler,LeQuaHandler, VisualDataHandler, CIFAR100Handler]: for dataset in data_handler.iter(): qp.environ['SAMPLE_SIZE'] = dataset.sample_size print(f'dataset={dataset.name}') + #if dataset.name != 'abalone': + # continue problem_type = 'binary' if dataset.is_binary() else 'multiclass' diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py index 205b6b8..cb1932f 100644 --- a/BayesianKDEy/generate_results.py +++ b/BayesianKDEy/generate_results.py @@ -9,7 +9,7 @@ from pathlib import Path import quapy as qp from BayesianKDEy.commons import RESULT_DIR from BayesianKDEy.datasets import LeQuaHandler, UCIMulticlassHandler, VisualDataHandler, CIFAR100Handler -from error import dist_aitchison +from quapy.error import dist_aitchison from quapy.method.confidence import ConfidenceIntervals from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC import quapy.functional as F @@ -27,7 +27,7 @@ methods = ['BayesianACC', 'BaKDE-Ait-numpyro', 'BaKDE-Ait-T*', 'BaKDE-Gau-numpyro', - 'BaKDE-Gau-T*', + 'BaKDE-Gau-T*', 'BaKDE-Gau-pca5', 'BaKDE-Gau-pca5*', 'BaKDE-Gau-pca10', 'BaKDE-Gau-pca10*', # 'BayEMQ-U-Temp1-2', # 'BayEMQ-T*', 'BayEMQ', diff --git a/BayesianKDEy/kdey_bandwidth_selection.py b/BayesianKDEy/kdey_bandwidth_selection.py deleted file mode 100644 index e2606b7..0000000 --- a/BayesianKDEy/kdey_bandwidth_selection.py +++ /dev/null @@ -1,134 +0,0 @@ -import numpy as np -from sklearn.linear_model import LogisticRegression, LogisticRegressionCV -from sklearn.neighbors import KernelDensity -from scipy.special import logsumexp -from sklearn.model_selection import StratifiedKFold, cross_val_predict - - - -def class_scale_factors(X, y): - lambdas = {} - scales = [] - for c in np.unique(y): - Xc = X[y == c] - cov = np.cov(Xc.T) - scale = np.trace(cov) - lambdas[c] = scale - scales.append(scale) - - mean_scale = np.mean(scales) - for c in lambdas: - lambdas[c] /= mean_scale - - return lambdas - - -class ClassConditionalKDE: - def __init__(self, kernel="gaussian", lambdas=None): - self.kernel = kernel - self.lambdas = lambdas or {} - self.models = {} - - def fit(self, X, y, bandwidth): - self.classes_ = np.unique(y) - for c in self.classes_: - h_c = bandwidth * self.lambdas.get(c, 1.0) - kde = KernelDensity(kernel=self.kernel, bandwidth=h_c) - kde.fit(X[y == c]) - self.models[c] = kde - return self - - def log_density(self, X): - logp = np.column_stack([ - self.models[c].score_samples(X) - for c in self.classes_ - ]) - return logp - - - -def conditional_log_likelihood(logp, y, priors=None): - if priors is None: - priors = np.ones(logp.shape[1]) / logp.shape[1] - - log_prior = np.log(priors) - log_joint = logp + log_prior - - denom = logsumexp(log_joint, axis=1) - num = log_joint[np.arange(len(y)), y] - - return np.sum(num - denom) - - -def cv_objective( - bandwidth, - X, - y, - lambdas, - kernel="gaussian", - n_splits=5, -): - skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=0) - score = 0.0 - - for train, test in skf.split(X, y): - model = ClassConditionalKDE(kernel=kernel, lambdas=lambdas) - model.fit(X[train], y[train], bandwidth) - - logp = model.log_density(X[test]) - score += conditional_log_likelihood(logp, y[test]) - - return score - - -if __name__ == '__main__': - - from BayesianKDEy.datasets import UCIMulticlassHandler - from quapy.method.aggregative import KDEyML - from quapy.model_selection import GridSearchQ - from quapy.evaluation import evaluation_report - - dataset = UCIMulticlassHandler('academic-success') - training = dataset.get_training() - X, y = training.Xy - - cls = LogisticRegression() - P = cross_val_predict(cls, X, y, cv=5, n_jobs=-1, method='predict_proba') - - bandwidths = np.logspace(-3, 0, 50) - lambdas=None - - scores = [ - cv_objective(h, P, y, lambdas) - for h in bandwidths - ] - - best_h = bandwidths[np.argmax(scores)] - print(best_h) - - cls = LogisticRegression() - kdey = KDEyML(cls, val_split=5, random_state=0) - train, val_prot = dataset.get_train_valprot_for_modsel() - modsel = GridSearchQ(kdey, param_grid={'bandwidth': bandwidths}, protocol=val_prot, n_jobs=-1, verbose=True) - modsel.fit(*train.Xy) - best_bandwidth = modsel.best_params_['bandwidth'] - print(best_bandwidth) - - print(f'First experiment, with bandwidth={best_h:.4f}') - cls = LogisticRegression() - kdey=KDEyML(cls, val_split=5, random_state=0, bandwidth=best_h) - train, test_prot = dataset.get_train_testprot_for_eval() - kdey.fit(*train.Xy) - report = evaluation_report(kdey, test_prot, error_metrics=['ae']) - print(report.mean(numeric_only=True)) - - print(f'Second experiment, with bandwidth={best_bandwidth:.4f}') - cls = LogisticRegression() - kdey=KDEyML(cls, val_split=5, random_state=0, bandwidth=best_bandwidth) - # train, test_prot = dataset.get_train_testprot_for_eval() - kdey.fit(*train.Xy) - report = evaluation_report(kdey, test_prot, error_metrics=['ae']) - print(report.mean(numeric_only=True)) - - - diff --git a/CHANGE_LOG.txt b/CHANGE_LOG.txt index 02b4166..9c10d20 100644 --- a/CHANGE_LOG.txt +++ b/CHANGE_LOG.txt @@ -9,6 +9,7 @@ Change Log 0.2.1 - Added ReadMe method by Daniel Hopkins and Gary King - Internal index in LabelledCollection is now "lazy", and is only constructed if required. - Added dist_aitchison and mean_dist_aitchison as a new error evaluation metric. +- Improved numerical stability of KDEyML through logsumexp; useful for cases with large number of classes, where densities for small bandwidths may become huge Change Log 0.2.0 ----------------- diff --git a/TODO.txt b/TODO.txt index de40ed9..2e1652b 100644 --- a/TODO.txt +++ b/TODO.txt @@ -47,13 +47,16 @@ Para quitar el labelledcollection de los métodos: - fit_classifier=False: - +- [TODO] check if the KDEyML variant with sumlogexp is slower than the original one, or check whether we can explore + an unconstrained space in which the parameter is already the log(prev); maybe also move to cvxq +- [TODO] why not simplifying the epsilon of RAE? at the end, it is meant to smooth the denominator for avoiding div 0 - [TODO] document confidence in manuals - [TODO] Test the return_type="index" in protocols and finish the "distributing_samples.py" example - [TODO] Add EDy (an implementation is available at quantificationlib) - [TODO] add ensemble methods SC-MQ, MC-SQ, MC-MQ - [TODO] add HistNetQ -- [TODO] add CDE-iteration and Bayes-CDE methods +- [TODO] add CDE-iteration (https://github.com/Arctickirillas/Rubrication/blob/master/quantification.py#L593 or + Schumacher's code) and Bayes-CDE methods - [TODO] add Friedman's method and DeBias - [TODO] check ignore warning stuff check https://docs.python.org/3/library/warnings.html#temporarily-suppressing-warnings diff --git a/quapy/method/_kdey.py b/quapy/method/_kdey.py index 2f60c2b..09df327 100644 --- a/quapy/method/_kdey.py +++ b/quapy/method/_kdey.py @@ -5,7 +5,7 @@ from sklearn.neighbors import KernelDensity import quapy as qp from quapy.method.aggregative import AggregativeSoftQuantifier import quapy.functional as F - +from scipy.special import logsumexp from sklearn.metrics.pairwise import rbf_kernel @@ -29,9 +29,10 @@ class KDEBase: assert bandwidth in KDEBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \ f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values' if isinstance(bandwidth, float): - assert kernel!='gaussian' or (0 < bandwidth < 1), \ - ("the bandwidth for a Gaussian kernel in KDEy should be in (0,1), " - "since this method models the unit simplex") + #assert kernel!='gaussian' or (0 < bandwidth < 1), \ + # ("the bandwidth for a Gaussian kernel in KDEy should be in (0,1), " + # "since this method models the unit simplex") + pass return bandwidth @classmethod @@ -175,14 +176,18 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase): :return: a vector of class prevalence estimates """ with qp.util.temp_seed(self.random_state): - epsilon = 1e-10 + epsilon = 1e-12 n_classes = len(self.mix_densities) - test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities] + #test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities] + test_log_densities = [self.pdf(kde_i, posteriors, self.kernel, log_densities=True) for kde_i in self.mix_densities] + #def neg_loglikelihood(prev): + # prev = np.clip(prev, epsilon, 1.0) + # test_mixture_likelihood = prev @ test_densities + # test_loglikelihood = np.log(test_mixture_likelihood + epsilon) + # return -np.sum(test_loglikelihood) def neg_loglikelihood(prev): - # test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities)) - test_mixture_likelihood = prev @ test_densities - test_loglikelihood = np.log(test_mixture_likelihood + epsilon) + test_loglikelihood = logsumexp(np.log(np.clip(prev, epsilon, 1.0))[:,None] + test_log_densities, axis=0) return -np.sum(test_loglikelihood) return F.optim_minimize(neg_loglikelihood, n_classes)