diff --git a/distribution_matching/commons.py b/distribution_matching/commons.py
index a143baf..716768e 100644
--- a/distribution_matching/commons.py
+++ b/distribution_matching/commons.py
@@ -135,7 +135,7 @@ def new_method(method, **lr_kwargs):
         # where x ~iid r, with r = p_u, and u = (1/n, 1/n, ..., 1/n) the uniform vector
         method_params = {'bandwidth': np.linspace(0.01, 0.2, 20)}
         param_grid = {**method_params, **hyper_LR}
-        quantifier = KDEy(lr, target='min_divergence', divergence='HD', montecarlo_trials=5000, val_split=10)
+        quantifier = KDEy(lr, target='min_divergence', divergence='HD', montecarlo_trials=10000, val_split=10)
     elif method == 'DM-HD':
         method_params = {
             'nbins': [4,8,16,32],
diff --git a/distribution_matching/method_kdey.py b/distribution_matching/method_kdey.py
index fd6fb30..4b64498 100644
--- a/distribution_matching/method_kdey.py
+++ b/distribution_matching/method_kdey.py
@@ -129,7 +129,8 @@ class KDEy(AggregativeProbabilisticQuantifier):
         elif self.target == 'min_divergence':
             N = self.montecarlo_trials
             rs = self.random_state
-            self.reference_samples = np.vstack([kde_i.sample(N, random_state=rs) for kde_i in self.val_densities])
+            n = data.n_classes
+            self.reference_samples = np.vstack([kde_i.sample(N//n, random_state=rs) for kde_i in self.val_densities])
             self.reference_classwise_densities = np.asarray([self.pdf(kde_j, self.reference_samples) for kde_j in self.val_densities])
             self.reference_density = np.mean(self.reference_classwise_densities, axis=0)  # equiv. to (uniform @ self.reference_classwise_densities)
         elif self.target == 'min_divergence_deprecated':  # the version of the first draft, with n*N presampled, then alpha*N chosen for class
@@ -170,15 +171,48 @@ class KDEy(AggregativeProbabilisticQuantifier):
         else:
             raise ValueError('only squared HD is currently implemented')
 
+        epsilon = 1e-10
+        qs = test_densities + epsilon
+        rs = self.reference_density + epsilon
+        iw = qs/rs  #importance weights
+        p_class = self.reference_classwise_densities + epsilon
+        fracs = p_class/qs
+
+        def divergence(prev):
+            # ps / qs = (prev @ p_class) / qs = prev @ (p_class / qs) = prev @ fracs
+            ps_div_qs = prev @ fracs
+            return np.mean( f(ps_div_qs) * iw )
+
+        return F.optim_minimize(divergence, n_classes)
+
+    # new version in which we retain all n*N examples (sampled from a mixture with uniform parameter), and then
+    # apply importance sampling (IS). In this version we compute D(q||p_alpha) with IS, and not D(p_alpha||q) as
+    # in the reformulation proposed above
+    def _target_divergence_q__p(self, posteriors):
+        # in this variant we evaluate the divergence using a Montecarlo approach
+        n_classes = len(self.val_densities)
+
+        test_kde = self.get_kde_function(posteriors)
+        test_densities = self.pdf(test_kde, self.reference_samples)
+
+        def f_squared_hellinger(u):
+            return (np.sqrt(u)-1)**2
+
+        # todo: this will fail when self.divergence is a callable, and is not the right place to do it anyway
+        if self.divergence.lower() == 'hd':
+            f = f_squared_hellinger
+        else:
+            raise ValueError('only squared HD is currently implemented')
 
         epsilon = 1e-10
         qs = test_densities + epsilon
         rs = self.reference_density + epsilon
+        p_class = self.reference_classwise_densities + epsilon
 
-
+        # D(q||p_a) = 1/N sum f(q/p_a) * (p_a / p_u)
         def divergence(prev):
-            ps = prev @ self.reference_classwise_densities + epsilon
-            return np.mean( f(ps/qs) * (qs/rs) )
+            p_a = prev @ p_class
+            return np.mean( f(qs/p_a) * (p_a/rs) )
 
         return F.optim_minimize(divergence, n_classes)
 
diff --git a/distribution_matching/tmp/check_hyperparams.py b/distribution_matching/tmp/check_hyperparams.py
index d03e5a0..d5b4f13 100644
--- a/distribution_matching/tmp/check_hyperparams.py
+++ b/distribution_matching/tmp/check_hyperparams.py
@@ -1,17 +1,19 @@
 import pickle
+import os
 
-dataset = 'lequa/T1A'
+dataset = 'lequa/T1B'
 for metric in ['mae', 'mrae']:
-    method1 = 'KDEy-closed++'
-    # method2 = 'KDEy-DMhd3+'
-    # method1 = 'KDEy-ML'
-    # method2 = 'KDEy-ML+'
+    print('metric', metric)
+    for method in ['KDEy-DMhd4', 'KDEy-DMhd4+', 'KDEy-DMhd4++']:
+
+        path = f'/home/moreo/QuaPy/distribution_matching/results/{dataset}/{metric}/{method}.hyper.pkl'
+        if os.path.exists(path):
+            obj = pickle.load(open(path, 'rb'))
+            print(method, obj)
+        else:
+            print(f'path {path} does not exist')
+
+    print()
 
-    path = f'../results/{dataset}/{metric}/{method1}.hyper.pkl'
-    obj = pickle.load(open(path, 'rb'))
-    print(method1, obj)
 
-    # path = f'../results/{dataset}/{metric}/{method2}.hyper.pkl'
-    # obj = pickle.load(open(path, 'rb'))
-    # print(method2, obj)