forked from moreo/QuaPy
adding one plot, the error by drift plot... have to fix the legend though
This commit is contained in:
parent
49a8cf3b0d
commit
9555c4a731
|
@ -49,6 +49,8 @@ class Benchmark(ABC):
|
||||||
makedirs(join(home_dir, 'tables'))
|
makedirs(join(home_dir, 'tables'))
|
||||||
makedirs(join(home_dir, 'plots'))
|
makedirs(join(home_dir, 'plots'))
|
||||||
|
|
||||||
|
self.train_prevalence = {}
|
||||||
|
|
||||||
def _run_id(self, method: MethodDescriptor, dataset: str):
|
def _run_id(self, method: MethodDescriptor, dataset: str):
|
||||||
sep = Benchmark.ID_SEPARATOR
|
sep = Benchmark.ID_SEPARATOR
|
||||||
assert sep not in method.id, \
|
assert sep not in method.id, \
|
||||||
|
@ -104,10 +106,34 @@ class Benchmark(ABC):
|
||||||
Table.LatexPDF(join(self.home_dir, 'tables', 'results.pdf'), list(tables.values()))
|
Table.LatexPDF(join(self.home_dir, 'tables', 'results.pdf'), list(tables.values()))
|
||||||
|
|
||||||
|
|
||||||
def gen_plots(self):
|
def gen_plots(self, results, metrics=None):
|
||||||
pass
|
import matplotlib.pyplot as plt
|
||||||
|
plt.rcParams.update({'font.size': 11})
|
||||||
|
|
||||||
def show_report(self, method, dataset, report: pd.DataFrame):
|
if metrics is None:
|
||||||
|
metrics = ['ae']
|
||||||
|
|
||||||
|
for metric in metrics:
|
||||||
|
method_names, true_prevs, estim_prevs, train_prevs = [], [], [], []
|
||||||
|
skip=False
|
||||||
|
for (method, dataset, result) in results:
|
||||||
|
method_names.append(method.name)
|
||||||
|
true_prevs.append(np.vstack(result['true-prev'].values))
|
||||||
|
estim_prevs.append(np.vstack(result['estim-prev'].values))
|
||||||
|
train_prevs.append(self.get_training_prevalence(dataset))
|
||||||
|
if not skip:
|
||||||
|
path = join(self.home_dir, 'plots', f'err_by_drift_{metric}.pdf')
|
||||||
|
qp.plot.error_by_drift(method_names, true_prevs, estim_prevs, train_prevs, error_name=metric, n_bins=20, savepath=path)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _show_report(self, method, dataset, report: pd.DataFrame):
|
||||||
id = method.id
|
id = method.id
|
||||||
MAE = report['mae'].mean()
|
MAE = report['mae'].mean()
|
||||||
mae_std = report['mae'].std()
|
mae_std = report['mae'].std()
|
||||||
|
@ -146,19 +172,20 @@ class Benchmark(ABC):
|
||||||
seed=0,
|
seed=0,
|
||||||
asarray=False
|
asarray=False
|
||||||
)
|
)
|
||||||
results += [(method, dataset, result) for (method, dataset), result in zip(pending_job_args, remaining_results)]
|
results += [
|
||||||
|
(method, dataset, result) for (method, dataset), result in zip(pending_job_args, remaining_results)
|
||||||
|
]
|
||||||
|
|
||||||
# print results
|
# print results
|
||||||
for method, dataset, result in results:
|
for method, dataset, result in results:
|
||||||
self.show_report(method, dataset, result)
|
self._show_report(method, dataset, result)
|
||||||
|
|
||||||
self.gen_tables(results)
|
self.gen_tables(results)
|
||||||
self.gen_plots()
|
self.gen_plots(results)
|
||||||
|
|
||||||
# def gen_plots(self, methods=None):
|
|
||||||
# if methods is None:
|
|
||||||
|
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_training_prevalence(self, dataset: str):
|
||||||
|
...
|
||||||
|
|
||||||
def __add__(self, other: 'Benchmark'):
|
def __add__(self, other: 'Benchmark'):
|
||||||
return CombinedBenchmark(self, other, self.n_jobs)
|
return CombinedBenchmark(self, other, self.n_jobs)
|
||||||
|
@ -192,6 +219,10 @@ class TypicalBenchmark(Benchmark):
|
||||||
def get_sample_size(self)-> int:
|
def get_sample_size(self)-> int:
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_training(self, dataset:str)-> LabelledCollection:
|
||||||
|
...
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_trModsel_valprotModsel_trEval_teprotEval(self, dataset:str)->\
|
def get_trModsel_valprotModsel_trEval_teprotEval(self, dataset:str)->\
|
||||||
(LabelledCollection, AbstractProtocol, LabelledCollection, AbstractProtocol):
|
(LabelledCollection, AbstractProtocol, LabelledCollection, AbstractProtocol):
|
||||||
|
@ -212,7 +243,8 @@ class TypicalBenchmark(Benchmark):
|
||||||
|
|
||||||
with qp.util.temp_seed(random_state):
|
with qp.util.temp_seed(random_state):
|
||||||
# data split
|
# data split
|
||||||
trModSel, valprotModSel, trEval, teprotEval = self.get_trModsel_valprotModsel_trEval_teprotEval(dataset)
|
trModSel, valprotModSel, trEval, teprotEval = self.get_trModsel_valprotModsel_trEval_teprotEval(dataset)
|
||||||
|
self.train_prevalence[dataset] = trEval.prevalence()
|
||||||
|
|
||||||
# model selection
|
# model selection
|
||||||
modsel = GridSearchQ(
|
modsel = GridSearchQ(
|
||||||
|
@ -247,6 +279,12 @@ class TypicalBenchmark(Benchmark):
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
def get_training_prevalence(self, dataset: str):
|
||||||
|
if not dataset in self.train_prevalence:
|
||||||
|
training = self.get_training(dataset)
|
||||||
|
self.train_prevalence[dataset] = training.prevalence()
|
||||||
|
return self.train_prevalence[dataset]
|
||||||
|
|
||||||
|
|
||||||
class UCIBinaryBenchmark(TypicalBenchmark):
|
class UCIBinaryBenchmark(TypicalBenchmark):
|
||||||
|
|
||||||
|
@ -259,6 +297,9 @@ class UCIBinaryBenchmark(TypicalBenchmark):
|
||||||
testprotModsel = APP(teEval, n_prevalences=21, repeats=100)
|
testprotModsel = APP(teEval, n_prevalences=21, repeats=100)
|
||||||
return trModsel, valprotModsel, trEval, testprotModsel
|
return trModsel, valprotModsel, trEval, testprotModsel
|
||||||
|
|
||||||
|
def get_training(self, dataset:str) -> LabelledCollection:
|
||||||
|
return qp.datasets.fetch_UCIBinaryDataset(dataset).training
|
||||||
|
|
||||||
def get_sample_size(self) -> int:
|
def get_sample_size(self) -> int:
|
||||||
return 100
|
return 100
|
||||||
|
|
||||||
|
@ -284,6 +325,9 @@ class UCIMultiBenchmark(TypicalBenchmark):
|
||||||
testprotModsel = UPP(teEval, repeats=1000)
|
testprotModsel = UPP(teEval, repeats=1000)
|
||||||
return trModsel, valprotModsel, trEval, testprotModsel
|
return trModsel, valprotModsel, trEval, testprotModsel
|
||||||
|
|
||||||
|
def get_training(self, dataset:str) -> LabelledCollection:
|
||||||
|
return qp.datasets.fetch_UCIMulticlassDataset(dataset).training
|
||||||
|
|
||||||
def get_sample_size(self) -> int:
|
def get_sample_size(self) -> int:
|
||||||
return 500
|
return 500
|
||||||
|
|
||||||
|
@ -291,6 +335,7 @@ class UCIMultiBenchmark(TypicalBenchmark):
|
||||||
return 'mae'
|
return 'mae'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
from quapy.benchmarking.typical import *
|
from quapy.benchmarking.typical import *
|
||||||
|
|
|
@ -259,7 +259,7 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
|
||||||
data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order)
|
data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order)
|
||||||
|
|
||||||
if method_order is None:
|
if method_order is None:
|
||||||
method_order = method_names
|
method_order = np.unique(method_names)
|
||||||
|
|
||||||
_set_colors(ax, n_methods=len(method_order))
|
_set_colors(ax, n_methods=len(method_order))
|
||||||
|
|
||||||
|
@ -329,10 +329,9 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
|
||||||
|
|
||||||
|
|
||||||
if show_legend:
|
if show_legend:
|
||||||
fig.legend(loc='lower center',
|
# fig.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), ncol=(len(method_order)//2)+1)
|
||||||
bbox_to_anchor=(1, 0.5),
|
fig.legend(loc='upper right', bbox_to_anchor=(1, 0.6))
|
||||||
ncol=(len(method_names)+1)//2)
|
|
||||||
|
|
||||||
_save_or_show(savepath)
|
_save_or_show(savepath)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue