forked from moreo/QuaPy
87 lines
3.3 KiB
Python
87 lines
3.3 KiB
Python
from copy import deepcopy
|
|
|
|
import numpy as np
|
|
from sklearn.linear_model import LogisticRegression
|
|
|
|
import quapy as qp
|
|
from method.non_aggregative import DMx
|
|
from protocol import APP
|
|
from quapy.method.aggregative import CC, ACC, DMy
|
|
from sklearn.svm import LinearSVC
|
|
|
|
qp.environ['SAMPLE_SIZE'] = 100
|
|
DATASETS = qp.datasets.UCI_DATASETS[10:]
|
|
|
|
def fit_eval_task(args):
|
|
model_name, model, train, test = args
|
|
with qp.util.temp_seed(0):
|
|
model = deepcopy(model)
|
|
model.fit(train)
|
|
true_prev, estim_prev = qp.evaluation.prediction(model, APP(test, repeats=100, random_state=0))
|
|
return model_name, true_prev, estim_prev
|
|
|
|
|
|
def gen_data():
|
|
|
|
def base_classifier():
|
|
return LogisticRegression()
|
|
#return LinearSVC(class_weight='balanced')
|
|
|
|
|
|
def models():
|
|
yield 'CC', CC(base_classifier())
|
|
yield 'ACC', ACC(base_classifier())
|
|
yield 'HDy', DMy(base_classifier(), val_split=10, nbins=10, n_jobs=-1)
|
|
yield 'HDx', DMx(nbins=10, n_jobs=-1)
|
|
|
|
# train, test = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=10).train_test
|
|
method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
|
|
|
|
for dataset_name in DATASETS:
|
|
train, test = qp.datasets.fetch_UCIDataset(dataset_name).train_test
|
|
print(dataset_name, train.X.shape)
|
|
|
|
outs = qp.util.parallel(
|
|
fit_eval_task,
|
|
((method_name, model, train, test) for method_name, model in models()),
|
|
seed=0,
|
|
n_jobs=-1
|
|
)
|
|
|
|
for method_name, true_prev, estim_prev in outs:
|
|
method_names.append(method_name)
|
|
true_prevs.append(true_prev)
|
|
estim_prevs.append(estim_prev)
|
|
tr_prevs.append(train.prevalence())
|
|
|
|
return method_names, true_prevs, estim_prevs, tr_prevs
|
|
|
|
method_names, true_prevs, estim_prevs, tr_prevs = qp.util.pickled_resource('../quick_experiment/pickled_plot_data.pkl', gen_data)
|
|
|
|
def remove_dataset(dataset_order, num_methods=4):
|
|
sel_names, sel_true, sel_estim, sel_tr = [],[],[],[]
|
|
for i, (name, true, estim, tr) in enumerate(zip(method_names, true_prevs, estim_prevs, tr_prevs)):
|
|
dataset_pos = i//num_methods
|
|
if dataset_pos not in dataset_order:
|
|
sel_names.append(name)
|
|
sel_true.append(true)
|
|
sel_estim.append(estim)
|
|
sel_tr.append(tr)
|
|
return np.asarray(sel_names), np.asarray(sel_true), np.asarray(sel_estim), np.asarray(sel_tr)
|
|
|
|
print(DATASETS)
|
|
selected = 10
|
|
for i in [selected]:
|
|
print(i, DATASETS[i])
|
|
all_ = set(range(len(DATASETS)))
|
|
remove_index = sorted(all_ - {i})
|
|
sel_names, sel_true, sel_estim, sel_tr = remove_dataset(dataset_order=remove_index, num_methods=4)
|
|
|
|
p=sel_tr[0][1]
|
|
sel_names = ['CC$_{'+str(p)+'}$' if x=='CC' else x for x in sel_names]
|
|
|
|
# qp.plot.binary_diagonal(sel_names, sel_true, sel_estim, train_prev=sel_tr[0], show_std=False, savepath=f'./plots/bin_diag_{i}.png')
|
|
qp.plot.error_by_drift(sel_names, sel_true, sel_estim, sel_tr, n_bins=10, savepath=f'./plots/err_drift_{i}.png', show_std=True, show_density=False, title="")
|
|
# qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, savepath='./plots/bin_bias.png')
|
|
# qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, nbins=3, savepath='./plots/bin_bias_bin.png')
|