forked from moreo/QuaPy
more uci datasets, plots improved (higher fonts), and evaluation script that shows numerical results in command line
This commit is contained in:
parent
e609c262b4
commit
1d89301089
|
@ -0,0 +1,28 @@
|
|||
import quapy as qp
|
||||
import settings
|
||||
import os
|
||||
import pickle
|
||||
from glob import glob
|
||||
import itertools
|
||||
import pathlib
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
|
||||
|
||||
resultdir = './results'
|
||||
methods = ['*']
|
||||
|
||||
def evaluate_results(methods, datasets, error_name):
|
||||
results_str = []
|
||||
error = qp.error.from_name(error_name)
|
||||
for method, dataset in itertools.product(methods, datasets):
|
||||
for experiment in glob(f'{resultdir}/{dataset}-{method}-{error_name}.pkl'):
|
||||
true_prevalences, estim_prevalences, tr_prev, te_prev, te_prev_estim, best_params = \
|
||||
pickle.load(open(experiment, 'rb'))
|
||||
result = error(true_prevalences, estim_prevalences)
|
||||
string = f'{pathlib.Path(experiment).name}: {result:.3f}'
|
||||
results_str.append(string)
|
||||
results_str = sorted(results_str)
|
||||
for r in results_str:
|
||||
print(r)
|
||||
|
||||
evaluate_results(methods=['epacc*mae1k'], datasets=['*'], error_name='mae')
|
|
@ -10,6 +10,7 @@ from os.path import join
|
|||
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
|
||||
plotext='png'
|
||||
|
||||
resultdir = './results'
|
||||
plotdir = './plots'
|
||||
|
@ -30,7 +31,7 @@ def gather_results(methods, error_name):
|
|||
def plot_error_by_drift(methods, error_name, logscale=False, path=None):
|
||||
print('plotting error by drift')
|
||||
if path is not None:
|
||||
path = join(path, f'error_by_drift_{error_name}.pdf')
|
||||
path = join(path, f'error_by_drift_{error_name}.{plotext}')
|
||||
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
|
||||
qp.plot.error_by_drift(
|
||||
method_names,
|
||||
|
@ -51,9 +52,9 @@ def diagonal_plot(methods, error_name, path=None):
|
|||
if path is not None:
|
||||
path = join(path, f'diag_{error_name}')
|
||||
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
|
||||
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', legend=False, show_std=False, savepath=path+'_neg.pdf')
|
||||
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', legend=False, show_std=False, savepath=path+'_neu.pdf')
|
||||
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', legend=True, show_std=False, savepath=path+'_pos.pdf')
|
||||
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', legend=False, show_std=False, savepath=f'{path}_neg.{plotext}')
|
||||
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', legend=False, show_std=False, savepath=f'{path}_neu.{plotext}')
|
||||
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', legend=True, show_std=False, savepath=f'{path}_pos.{plotext}')
|
||||
|
||||
|
||||
def binary_bias_global(methods, error_name, path=None):
|
||||
|
@ -61,9 +62,9 @@ def binary_bias_global(methods, error_name, path=None):
|
|||
if path is not None:
|
||||
path = join(path, f'globalbias_{error_name}')
|
||||
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
|
||||
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', savepath=path+'_neg.pdf')
|
||||
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', savepath=path+'_neu.pdf')
|
||||
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', savepath=path+'_pos.pdf')
|
||||
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', savepath=f'{path}_neg.{plotext}')
|
||||
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', savepath=f'{path}_neu.{plotext}')
|
||||
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', savepath=f'{path}_pos.{plotext}')
|
||||
|
||||
|
||||
def binary_bias_bins(methods, error_name, path=None):
|
||||
|
@ -71,24 +72,24 @@ def binary_bias_bins(methods, error_name, path=None):
|
|||
if path is not None:
|
||||
path = join(path, f'localbias_{error_name}')
|
||||
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
|
||||
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', legend=False, savepath=path+'_neg.pdf')
|
||||
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', legend=False, savepath=path+'_neu.pdf')
|
||||
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', legend=True, savepath=path+'_pos.pdf')
|
||||
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', legend=False, savepath=f'{path}_neg.{plotext}')
|
||||
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', legend=False, savepath=f'{path}_neu.{plotext}')
|
||||
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', legend=True, savepath=f'{path}_pos.{plotext}')
|
||||
|
||||
|
||||
gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld']
|
||||
new_methods_ae = ['svmmae' , 'epaccmaeptr', 'epaccmaemae', 'hdy', 'quanet']
|
||||
new_methods_rae = ['svmmrae' , 'epaccmraeptr', 'epaccmraemrae', 'hdy', 'quanet']
|
||||
|
||||
# plot_error_by_drift(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
|
||||
# plot_error_by_drift(gao_seb_methods+new_methods_rae, error_name='rae', logscale=True, path=plotdir)
|
||||
plot_error_by_drift(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
|
||||
plot_error_by_drift(gao_seb_methods+new_methods_rae, error_name='rae', logscale=True, path=plotdir)
|
||||
|
||||
# diagonal_plot(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
|
||||
# diagonal_plot(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
|
||||
diagonal_plot(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
|
||||
diagonal_plot(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
|
||||
|
||||
binary_bias_global(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
|
||||
binary_bias_global(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
|
||||
|
||||
# binary_bias_bins(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
|
||||
# binary_bias_bins(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
|
||||
#binary_bias_bins(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
|
||||
#binary_bias_bins(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
nice = {
|
||||
'mae':'AE',
|
||||
|
|
|
@ -10,6 +10,8 @@ from . import model_selection
|
|||
from . import classification
|
||||
from quapy.method.base import isprobabilistic, isaggregative
|
||||
|
||||
__version__ = '0.1'
|
||||
|
||||
environ = {
|
||||
'SAMPLE_SIZE': None,
|
||||
'UNK_TOKEN': '[UNK]',
|
||||
|
@ -18,6 +20,5 @@ environ = {
|
|||
'PAD_INDEX': 1,
|
||||
}
|
||||
|
||||
|
||||
def isbinary(x):
|
||||
return x.binary
|
|
@ -148,7 +148,11 @@ UCI_DATASETS = ['acute.a', 'acute.b',
|
|||
'pageblocks.5',
|
||||
#'phoneme', # <-- I haven't found this one...
|
||||
'semeion',
|
||||
'sonar'] # ongoing...
|
||||
'sonar',
|
||||
'spambase',
|
||||
'spectf',
|
||||
'tictactoe',
|
||||
'transfusion'] # ongoing...
|
||||
|
||||
def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3):
|
||||
|
||||
|
@ -180,8 +184,11 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
|
|||
'mammographic': 'Mammographic Mass',
|
||||
'pageblocks.5': 'Page Blocks Classification (5)',
|
||||
'semeion': 'Semeion Handwritten Digit (8)',
|
||||
'sonar': 'Sonar, Mines vs. Rocks'
|
||||
|
||||
'sonar': 'Sonar, Mines vs. Rocks',
|
||||
'spambase': 'Spambase Data Set',
|
||||
'spectf': 'SPECTF Heart Data',
|
||||
'tictactoe': 'Tic-Tac-Toe Endgame Database',
|
||||
'transfusion': 'Blood Transfusion Service Center Data Set '
|
||||
}
|
||||
|
||||
# the identifier is an alias for the dataset group, it's part of the url data-folder, and is the name we use
|
||||
|
@ -208,8 +215,11 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
|
|||
'mammographic': 'mammographic-masses',
|
||||
'pageblocks.5': 'page-blocks',
|
||||
'semeion': 'semeion',
|
||||
'sonar': 'undocumented/connectionist-bench/sonar'
|
||||
|
||||
'sonar': 'undocumented/connectionist-bench/sonar',
|
||||
'spambase': 'spambase',
|
||||
'spectf': 'spect',
|
||||
'tictactoe': 'tic-tac-toe',
|
||||
'transfusion': 'blood-transfusion'
|
||||
}
|
||||
|
||||
# the filename is the name of the file within the data_folder indexed by the identifier
|
||||
|
@ -219,7 +229,9 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
|
|||
'statlog/german': 'german.data-numeric',
|
||||
'mammographic-masses': 'mammographic_masses.data',
|
||||
'page-blocks': 'page-blocks.data.Z',
|
||||
'undocumented/connectionist-bench/sonar': 'sonar.all-data'
|
||||
'undocumented/connectionist-bench/sonar': 'sonar.all-data',
|
||||
'spect': ['SPECTF.train', 'SPECTF.test'],
|
||||
'blood-transfusion': 'transfusion.data'
|
||||
}
|
||||
|
||||
# the filename containing the dataset description (if any)
|
||||
|
@ -228,7 +240,9 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
|
|||
'00193': None,
|
||||
'statlog/german': 'german.doc',
|
||||
'mammographic-masses': 'mammographic_masses.names',
|
||||
'undocumented/connectionist-bench/sonar': 'sonar.names'
|
||||
'undocumented/connectionist-bench/sonar': 'sonar.names',
|
||||
'spect': 'SPECTF.names',
|
||||
'blood-transfusion': 'transfusion.names'
|
||||
}
|
||||
|
||||
identifier = identifier_map[dataset_name]
|
||||
|
@ -238,8 +252,9 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
|
|||
|
||||
URL = f'http://archive.ics.uci.edu/ml/machine-learning-databases/{identifier}'
|
||||
data_dir = join(data_home, 'uci_datasets', identifier)
|
||||
data_path = join(data_dir, filename)
|
||||
download_file_if_not_exists(f'{URL}/{filename}', data_path)
|
||||
if isinstance(filename, str): # filename could be a list of files, in which case it will be processed later
|
||||
data_path = join(data_dir, filename)
|
||||
download_file_if_not_exists(f'{URL}/{filename}', data_path)
|
||||
|
||||
if descfile:
|
||||
try:
|
||||
|
@ -368,11 +383,38 @@ def fetch_UCIDataset(dataset_name, data_home=None, verbose=False, test_split=0.3
|
|||
|
||||
if identifier == 'undocumented/connectionist-bench/sonar':
|
||||
df = pd.read_csv(data_path, header=None, sep=',')
|
||||
print(df)
|
||||
X = df.iloc[:, 0:60].astype(float).values
|
||||
y = df[60].values
|
||||
y = df[60].values
|
||||
y = binarize(y, pos_class='R')
|
||||
|
||||
if identifier == 'spambase':
|
||||
df = pd.read_csv(data_path, header=None, sep=',')
|
||||
X = df.iloc[:, 0:57].astype(float).values
|
||||
y = df[57].values
|
||||
y = binarize(y, pos_class=1)
|
||||
|
||||
if identifier == 'spect':
|
||||
dfs = []
|
||||
for file in filename:
|
||||
data_path = join(data_dir, file)
|
||||
download_file_if_not_exists(f'{URL}/{filename}', data_path)
|
||||
dfs.append(pd.read_csv(data_path, header=None, sep=','))
|
||||
df = pd.concat(dfs)
|
||||
X = df.iloc[:, 1:45].astype(float).values
|
||||
y = df[0].values
|
||||
y = binarize(y, pos_class=0)
|
||||
|
||||
if identifier == 'tic-tac-toe':
|
||||
df = pd.read_csv(data_path, header=None, sep=',')
|
||||
X = df.iloc[:, 0:9].replace('o',0).replace('b',1).replace('x',2).values
|
||||
y = df[9].values
|
||||
y = binarize(y, pos_class='negative')
|
||||
|
||||
if identifier == 'blood-transfusion':
|
||||
df = pd.read_csv(data_path, sep=',')
|
||||
X = df.iloc[:, 0:4].astype(float).values
|
||||
y = df.iloc[:, 4].values
|
||||
y = binarize(y, pos_class=1)
|
||||
|
||||
data = LabelledCollection(X, y)
|
||||
data.stats()
|
||||
|
|
|
@ -5,9 +5,11 @@ import numpy as np
|
|||
from matplotlib import cm
|
||||
|
||||
import quapy as qp
|
||||
from matplotlib.font_manager import FontProperties
|
||||
|
||||
plt.rcParams['figure.figsize'] = [12, 8]
|
||||
plt.rcParams['figure.dpi'] = 200
|
||||
plt.rcParams['font.size'] = 16
|
||||
|
||||
|
||||
def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=None, show_std=True, legend=True, savepath=None):
|
||||
|
@ -44,11 +46,11 @@ def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=No
|
|||
|
||||
|
||||
def binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title=None, savepath=None):
|
||||
method_names, true_prevs, estim_prevs = _merge(method_names, true_prevs, estim_prevs)
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
ax.grid()
|
||||
|
||||
method_names, true_prevs, estim_prevs = _merge(method_names, true_prevs, estim_prevs)
|
||||
|
||||
data, labels = [], []
|
||||
for method, true_prev, estim_prev in zip(method_names, true_prevs, estim_prevs):
|
||||
true_prev = true_prev[:,pos_class]
|
||||
|
|
4
test.py
4
test.py
|
@ -12,8 +12,8 @@ from classification.neural import NeuralClassifierTrainer, CNNnet
|
|||
from method.meta import EPACC
|
||||
from quapy.model_selection import GridSearchQ
|
||||
|
||||
# dataset = qp.datasets.fetch_UCIDataset('sonar', verbose=True)
|
||||
# sys.exit(0)
|
||||
dataset = qp.datasets.fetch_UCIDataset('transfusion', verbose=True)
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = 500
|
||||
|
|
Loading…
Reference in New Issue