diff --git a/KDEy/kdey_devel.py b/KDEy/kdey_devel.py index fa3713b..0506fde 100644 --- a/KDEy/kdey_devel.py +++ b/KDEy/kdey_devel.py @@ -68,11 +68,11 @@ class KDEyMLauto(KDEyML): current_prevalence, current_bandwidth = self.optim_minimize_both_fine(current_bandwidth, current_prevalence, tr_posteriors, tr_y, te_posteriors, classes) # check converngece - prev_convergence = all(np.isclose(previous_prevalence, current_prevalence, atol=0.0001)) + prev_convergence = all(np.isclose(previous_prevalence, current_prevalence, atol=0.01)) if isinstance(current_bandwidth, float): - band_convergence = np.isclose(previous_bandwidth, current_bandwidth, atol=0.0001) + band_convergence = np.isclose(previous_bandwidth, current_bandwidth, atol=0.001) else: - band_convergence = all(np.isclose(previous_bandwidth, current_bandwidth, atol=0.0001)) + band_convergence = all(np.isclose(previous_bandwidth, current_bandwidth, atol=0.001)) convergence = band_convergence and prev_convergence @@ -94,17 +94,21 @@ class KDEyMLauto(KDEyML): def optim_minimize_bandwidth(self, current_bandwidth, current_prev, tr_posteriors, tr_y, te_posteriors, classes): def neg_loglikelihood_bandwidth(bandwidth): - mix_densities = self.get_mixture_components(tr_posteriors, tr_y, classes, bandwidth[0]) + # bandwidth = bandwidth[0] + mix_densities = self.get_mixture_components(tr_posteriors, tr_y, classes, bandwidth) test_densities = [self.pdf(kde_i, te_posteriors) for kde_i in mix_densities] test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip(current_prev, test_densities)) test_loglikelihood = np.log(test_mixture_likelihood + epsilon) - return -np.sum(test_loglikelihood) + nll = -np.sum(test_loglikelihood) + # print(f'\t{bandwidth=:.10f}\t{nll=:.10f}') + return nll - bounds = [(0.00001, 1)] - r = optimize.minimize(neg_loglikelihood_bandwidth, x0=[current_bandwidth], method='SLSQP', bounds=bounds) - print(f'iterations-bandwidth={r.nit}') - assert r.success, 'Process did not converge!' - return r.x[0] + # bounds = [(0.00001, 0.2)] + # r = optimize.minimize(neg_loglikelihood_bandwidth, x0=[current_bandwidth], method='SLSQP', bounds=bounds) + r = optimize.minimize_scalar(neg_loglikelihood_bandwidth, bounds=(0.00001, 0.2)) + # print(f'iterations-bandwidth={r.nit}') + assert r.success, f'Process did not converge! {r.message}' + return r.x def optim_minimize_both(self, current_bandwidth, current_prev, tr_posteriors, tr_y, te_posteriors, classes): n_classes = len(current_prev) @@ -117,7 +121,7 @@ class KDEyMLauto(KDEyML): test_loglikelihood = np.log(test_mixture_likelihood + epsilon) return -np.sum(test_loglikelihood) - bounds = [(0, 1) for _ in range(n_classes)] + [(0.00001, 1)] + bounds = [(0, 1) for _ in range(n_classes)] + [(0.00001, 0.2)] constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x[:n_classes])}) prevalence_bandwidth = np.append(current_prev, current_bandwidth) r = optimize.minimize(neg_loglikelihood_bandwidth, x0=prevalence_bandwidth, method='SLSQP', bounds=bounds, constraints=constraints) diff --git a/KDEy/quantification_evaluation.py b/KDEy/quantification_evaluation.py index 9cc8a8e..bfe8f58 100644 --- a/KDEy/quantification_evaluation.py +++ b/KDEy/quantification_evaluation.py @@ -35,7 +35,7 @@ METHODS = [ ('PACC', PACC(newLR()), wrap_hyper(logreg_grid)), ('EMQ', EMQ(newLR()), wrap_hyper(logreg_grid)), ('KDEy-ML', KDEyML(newLR()), {**wrap_hyper(logreg_grid), **{'bandwidth': np.logspace(-4, np.log10(0.2), 20)}}), - ('KDEy-MLred', KDEyMLred(newLR()), {**wrap_hyper(logreg_grid), **{'bandwidth': np.logspace(-4, np.log10(0.2), 20)}}), + # ('KDEy-MLred', KDEyMLred(newLR()), {**wrap_hyper(logreg_grid), **{'bandwidth': np.logspace(-4, np.log10(0.2), 20)}}), ('KDEy-ML-scott', KDEyML(newLR(), bandwidth='scott'), wrap_hyper(logreg_grid)), ('KDEy-ML-silver', KDEyML(newLR(), bandwidth='silverman'), wrap_hyper(logreg_grid)), ('KDEy-ML-autoLike', KDEyMLauto2(newLR(), bandwidth='auto', target='likelihood'), wrap_hyper(logreg_grid)), @@ -55,7 +55,7 @@ TRANSDUCTIVE_METHODS = [ #('TKDEy-ML', KDEyMLauto(newLR()), None), # ('TKDEy-MLboth', KDEyMLauto(newLR(), optim='both'), None), # ('TKDEy-MLbothfine', KDEyMLauto(newLR(), optim='both_fine'), None), - # ('TKDEy-ML2', KDEyMLauto(newLR()), None), + ('TKDEy-ML2', KDEyMLauto(newLR(), optim='two_steps'), None), # ('TKDEy-MLike', KDEyMLauto(newLR(), optim='max_likelihood'), None), # ('TKDEy-MLike2', KDEyMLauto(newLR(), optim='max_likelihood2'), None), #('TKDEy-ML3', KDEyMLauto(newLR()), None),