From 0f4008e18d1ba96f56035ed6243192de95eb8a78 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Mon, 30 Oct 2023 09:41:52 +0100
Subject: [PATCH] switching to devel

---
 distribution_matching/commons.py           |  5 +-
 distribution_matching/lequa_experiments.py | 80 +++++++++++-----------
 laboratory/method_dxs.py                   |  2 +-
 3 files changed, 45 insertions(+), 42 deletions(-)

diff --git a/distribution_matching/commons.py b/distribution_matching/commons.py
index 970dafa..39bf5ec 100644
--- a/distribution_matching/commons.py
+++ b/distribution_matching/commons.py
@@ -8,7 +8,7 @@ from distribution_matching.method_dirichlety import DIRy
 from sklearn.linear_model import LogisticRegression
 from method_kdey_closed_efficient import KDEyclosed_efficient
 
-METHODS  = ['EMQ', 'EMQ-C', 'DM', 'DM-T', 'DM-HD', 'KDEy-DMhd3', 'DM-CS', 'KDEy-closed++', 'KDEy-ML'] #['ACC', 'PACC', 'HDy-OvA', 'DIR', 'DM', 'KDEy-DMhd3', 'KDEy-closed++', 'EMQ', 'KDEy-ML'] #, 'KDEy-DMhd2'] #, 'KDEy-DMhd2', 'DM-HD'] 'KDEy-DMjs', 'KDEy-DM', 'KDEy-ML+', 'KDEy-DMhd3+',
+METHODS  = ['ACC', 'PACC', 'HDy-OvA', 'DIR', 'DM-T', 'DM-HD', 'KDEy-DMhd3', 'DM-CS', 'KDEy-closed++', 'EMQ', 'KDEy-ML'] #['ACC', 'PACC', 'HDy-OvA', 'DIR', 'DM', 'KDEy-DMhd3', 'KDEy-closed++', 'EMQ', 'KDEy-ML'] #, 'KDEy-DMhd2'] #, 'KDEy-DMhd2', 'DM-HD'] 'KDEy-DMjs', 'KDEy-DM', 'KDEy-ML+', 'KDEy-DMhd3+', 'EMQ-C',
 BIN_METHODS = [x.replace('-OvA', '') for x in METHODS]
 
 
@@ -63,6 +63,9 @@ def new_method(method, **lr_kwargs):
         method_params = {'exact_train_prev': [False], 'recalib': ['bcts']}
         param_grid = {**method_params, **hyper_LR}
         quantifier = EMQ(lr)
+    elif method == 'HDy':
+        param_grid = hyper_LR
+        quantifier = HDy(lr)
     elif method == 'HDy-OvA':
         param_grid = {'binary_quantifier__' + key: val for key, val in hyper_LR.items()}
         quantifier = OneVsAllAggregative(HDy(lr))
diff --git a/distribution_matching/lequa_experiments.py b/distribution_matching/lequa_experiments.py
index 08d800c..1fe127b 100644
--- a/distribution_matching/lequa_experiments.py
+++ b/distribution_matching/lequa_experiments.py
@@ -2,64 +2,64 @@ import pickle
 import numpy as np
 import os
 import pandas as pd
-from distribution_matching.commons import METHODS, new_method, show_results
+from distribution_matching.commons import METHODS, BIN_METHODS, new_method, show_results
 
 import quapy as qp
 from quapy.model_selection import GridSearchQ
 
 
-
 if __name__ == '__main__':
 
-    qp.environ['SAMPLE_SIZE'] = qp.datasets.LEQUA2022_SAMPLE_SIZE['T1B']
-    qp.environ['N_JOBS'] = -1
-    for optim in ['mae', 'mrae']:
+    for task in ['T1A', 'T1B']:
+        qp.environ['SAMPLE_SIZE'] = qp.datasets.LEQUA2022_SAMPLE_SIZE[task]
+        qp.environ['N_JOBS'] = -1
+        for optim in ['mae', 'mrae']:
 
-        result_dir = f'results/lequa/{optim}'
+            result_dir = f'results/lequa/{task}/{optim}'
 
-        os.makedirs(result_dir, exist_ok=True)
+            os.makedirs(result_dir, exist_ok=True)
 
-        for method in METHODS:
+            for method in (METHODS if task=='T1B' else BIN_METHODS):
 
-            print('Init method', method)
+                print('Init method', method)
 
-            result_path = f'{result_dir}/{method}'
+                result_path = f'{result_dir}/{method}'
 
-            if os.path.exists(result_path+'.csv'):
-                print(f'file {result_path}.csv already exist; skipping')
-                continue
+                if os.path.exists(result_path+'.csv'):
+                    print(f'file {result_path}.csv already exist; skipping')
+                    continue
 
-            with open(result_path+'.csv', 'wt') as csv:
-                csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\n')
+                with open(result_path+'.csv', 'wt') as csv:
+                    csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\n')
 
-                dataset = 'T1B'
-                train, val_gen, test_gen = qp.datasets.fetch_lequa2022(dataset)
-                print(f'init {dataset} #instances: {len(train)}')
-                param_grid, quantifier = new_method(method)
+                    dataset = task
+                    train, val_gen, test_gen = qp.datasets.fetch_lequa2022(dataset)
+                    print(f'init {dataset} #instances: {len(train)}')
+                    param_grid, quantifier = new_method(method)
 
-                if param_grid is not None:
-                    modsel = GridSearchQ(quantifier, param_grid, protocol=val_gen, refit=False, n_jobs=-1, verbose=1, error=optim)
+                    if param_grid is not None:
+                        modsel = GridSearchQ(quantifier, param_grid, protocol=val_gen, refit=False, n_jobs=-1, verbose=1, error=optim)
 
-                    modsel.fit(train)
-                    print(f'best params {modsel.best_params_}')
-                    print(f'best score {modsel.best_score_}')
-                    pickle.dump(
-                        (modsel.best_params_, modsel.best_score_,),
-                        open(f'{result_path}.hyper.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)
+                        modsel.fit(train)
+                        print(f'best params {modsel.best_params_}')
+                        print(f'best score {modsel.best_score_}')
+                        pickle.dump(
+                            (modsel.best_params_, modsel.best_score_,),
+                            open(f'{result_path}.hyper.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)
 
-                    quantifier = modsel.best_model()
-                else:
-                    print('debug mode... skipping model selection')
-                    quantifier.fit(train)
+                        quantifier = modsel.best_model()
+                    else:
+                        print('debug mode... skipping model selection')
+                        quantifier.fit(train)
 
-                report = qp.evaluation.evaluation_report(
-                    quantifier, protocol=test_gen, error_metrics=['mae', 'mrae', 'kld'],
-                    verbose=True, verbose_error=optim[1:], n_jobs=-1
-                )
-                means = report.mean()
-                report.to_csv(result_path+'.dataframe')
-                csv.write(f'{method}\tLeQua-T1B\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
-                csv.flush()
-                print(means)
+                    report = qp.evaluation.evaluation_report(
+                        quantifier, protocol=test_gen, error_metrics=['mae', 'mrae', 'kld'],
+                        verbose=True, verbose_error=optim[1:], n_jobs=-1
+                    )
+                    means = report.mean()
+                    report.to_csv(result_path+'.dataframe')
+                    csv.write(f'{method}\tLeQua-{task}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
+                    csv.flush()
+                    print(means)
 
         show_results(result_path)
diff --git a/laboratory/method_dxs.py b/laboratory/method_dxs.py
index f0f0cf9..e5b3bcd 100644
--- a/laboratory/method_dxs.py
+++ b/laboratory/method_dxs.py
@@ -79,7 +79,7 @@ if __name__ == '__main__':
     repeats = 10
     error = 'mae'
 
-    div = 'HD'
+    div = 'topsoe'
 
     # generates tuples (dataset, method, method_name)
     # (the dataset is needed for methods that process the dataset differently)