forked from moreo/QuaPy
checking stat significance
This commit is contained in:
parent
01ef81bf25
commit
b7b052ef7d
|
@ -135,7 +135,7 @@ def new_method(method, **lr_kwargs):
|
||||||
# where x ~iid r, with r = p_u, and u = (1/n, 1/n, ..., 1/n) the uniform vector
|
# where x ~iid r, with r = p_u, and u = (1/n, 1/n, ..., 1/n) the uniform vector
|
||||||
method_params = {'bandwidth': np.linspace(0.01, 0.2, 20)}
|
method_params = {'bandwidth': np.linspace(0.01, 0.2, 20)}
|
||||||
param_grid = {**method_params, **hyper_LR}
|
param_grid = {**method_params, **hyper_LR}
|
||||||
quantifier = KDEy(lr, target='min_divergence', divergence='HD', montecarlo_trials=5000, val_split=10)
|
quantifier = KDEy(lr, target='min_divergence', divergence='HD', montecarlo_trials=10000, val_split=10)
|
||||||
elif method == 'DM-HD':
|
elif method == 'DM-HD':
|
||||||
method_params = {
|
method_params = {
|
||||||
'nbins': [4,8,16,32],
|
'nbins': [4,8,16,32],
|
||||||
|
|
|
@ -129,7 +129,8 @@ class KDEy(AggregativeProbabilisticQuantifier):
|
||||||
elif self.target == 'min_divergence':
|
elif self.target == 'min_divergence':
|
||||||
N = self.montecarlo_trials
|
N = self.montecarlo_trials
|
||||||
rs = self.random_state
|
rs = self.random_state
|
||||||
self.reference_samples = np.vstack([kde_i.sample(N, random_state=rs) for kde_i in self.val_densities])
|
n = data.n_classes
|
||||||
|
self.reference_samples = np.vstack([kde_i.sample(N//n, random_state=rs) for kde_i in self.val_densities])
|
||||||
self.reference_classwise_densities = np.asarray([self.pdf(kde_j, self.reference_samples) for kde_j in self.val_densities])
|
self.reference_classwise_densities = np.asarray([self.pdf(kde_j, self.reference_samples) for kde_j in self.val_densities])
|
||||||
self.reference_density = np.mean(self.reference_classwise_densities, axis=0) # equiv. to (uniform @ self.reference_classwise_densities)
|
self.reference_density = np.mean(self.reference_classwise_densities, axis=0) # equiv. to (uniform @ self.reference_classwise_densities)
|
||||||
elif self.target == 'min_divergence_deprecated': # the version of the first draft, with n*N presampled, then alpha*N chosen for class
|
elif self.target == 'min_divergence_deprecated': # the version of the first draft, with n*N presampled, then alpha*N chosen for class
|
||||||
|
@ -170,15 +171,48 @@ class KDEy(AggregativeProbabilisticQuantifier):
|
||||||
else:
|
else:
|
||||||
raise ValueError('only squared HD is currently implemented')
|
raise ValueError('only squared HD is currently implemented')
|
||||||
|
|
||||||
|
epsilon = 1e-10
|
||||||
|
qs = test_densities + epsilon
|
||||||
|
rs = self.reference_density + epsilon
|
||||||
|
iw = qs/rs #importance weights
|
||||||
|
p_class = self.reference_classwise_densities + epsilon
|
||||||
|
fracs = p_class/qs
|
||||||
|
|
||||||
|
def divergence(prev):
|
||||||
|
# ps / qs = (prev @ p_class) / qs = prev @ (p_class / qs) = prev @ fracs
|
||||||
|
ps_div_qs = prev @ fracs
|
||||||
|
return np.mean( f(ps_div_qs) * iw )
|
||||||
|
|
||||||
|
return F.optim_minimize(divergence, n_classes)
|
||||||
|
|
||||||
|
# new version in which we retain all n*N examples (sampled from a mixture with uniform parameter), and then
|
||||||
|
# apply importance sampling (IS). In this version we compute D(q||p_alpha) with IS, and not D(p_alpha||q) as
|
||||||
|
# in the reformulation proposed above
|
||||||
|
def _target_divergence_q__p(self, posteriors):
|
||||||
|
# in this variant we evaluate the divergence using a Montecarlo approach
|
||||||
|
n_classes = len(self.val_densities)
|
||||||
|
|
||||||
|
test_kde = self.get_kde_function(posteriors)
|
||||||
|
test_densities = self.pdf(test_kde, self.reference_samples)
|
||||||
|
|
||||||
|
def f_squared_hellinger(u):
|
||||||
|
return (np.sqrt(u)-1)**2
|
||||||
|
|
||||||
|
# todo: this will fail when self.divergence is a callable, and is not the right place to do it anyway
|
||||||
|
if self.divergence.lower() == 'hd':
|
||||||
|
f = f_squared_hellinger
|
||||||
|
else:
|
||||||
|
raise ValueError('only squared HD is currently implemented')
|
||||||
|
|
||||||
epsilon = 1e-10
|
epsilon = 1e-10
|
||||||
qs = test_densities + epsilon
|
qs = test_densities + epsilon
|
||||||
rs = self.reference_density + epsilon
|
rs = self.reference_density + epsilon
|
||||||
|
p_class = self.reference_classwise_densities + epsilon
|
||||||
|
|
||||||
|
# D(q||p_a) = 1/N sum f(q/p_a) * (p_a / p_u)
|
||||||
def divergence(prev):
|
def divergence(prev):
|
||||||
ps = prev @ self.reference_classwise_densities + epsilon
|
p_a = prev @ p_class
|
||||||
return np.mean( f(ps/qs) * (qs/rs) )
|
return np.mean( f(qs/p_a) * (p_a/rs) )
|
||||||
|
|
||||||
return F.optim_minimize(divergence, n_classes)
|
return F.optim_minimize(divergence, n_classes)
|
||||||
|
|
||||||
|
|
|
@ -1,17 +1,19 @@
|
||||||
import pickle
|
import pickle
|
||||||
|
import os
|
||||||
|
|
||||||
dataset = 'lequa/T1A'
|
dataset = 'lequa/T1B'
|
||||||
for metric in ['mae', 'mrae']:
|
for metric in ['mae', 'mrae']:
|
||||||
method1 = 'KDEy-closed++'
|
print('metric', metric)
|
||||||
# method2 = 'KDEy-DMhd3+'
|
for method in ['KDEy-DMhd4', 'KDEy-DMhd4+', 'KDEy-DMhd4++']:
|
||||||
# method1 = 'KDEy-ML'
|
|
||||||
# method2 = 'KDEy-ML+'
|
|
||||||
|
|
||||||
path = f'../results/{dataset}/{metric}/{method1}.hyper.pkl'
|
path = f'/home/moreo/QuaPy/distribution_matching/results/{dataset}/{metric}/{method}.hyper.pkl'
|
||||||
|
if os.path.exists(path):
|
||||||
obj = pickle.load(open(path, 'rb'))
|
obj = pickle.load(open(path, 'rb'))
|
||||||
print(method1, obj)
|
print(method, obj)
|
||||||
|
else:
|
||||||
|
print(f'path {path} does not exist')
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
# path = f'../results/{dataset}/{metric}/{method2}.hyper.pkl'
|
|
||||||
# obj = pickle.load(open(path, 'rb'))
|
|
||||||
# print(method2, obj)
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue