forked from moreo/QuaPy
87 lines
2.4 KiB
Python
87 lines
2.4 KiB
Python
import numpy as np
|
|
from sklearn.linear_model import LogisticRegressionCV
|
|
|
|
from quapy.data import LabelledCollection
|
|
from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as MLPE
|
|
from quapy.method.aggregative import CC, PCC, ACC, PACC, EMQ
|
|
from commons import *
|
|
from table import Table
|
|
from tqdm import tqdm
|
|
import quapy as qp
|
|
|
|
np.set_printoptions(linewidth=np.inf)
|
|
|
|
def classifier():
|
|
#return LogisticRegressionCV(class_weight='balanced', Cs=10)
|
|
return LogisticRegressionCV()
|
|
|
|
def quantifiers():
|
|
cls = classifier()
|
|
yield 'MLPE', MLPE()
|
|
yield 'CC', CC(cls)
|
|
yield 'PCC', PCC(cls)
|
|
yield 'ACC', ACC(cls)
|
|
yield 'PACC', PACC(cls)
|
|
|
|
|
|
survey_y = './data/survey_y.csv'
|
|
|
|
Atr, Xtr, ytr = load_csv(survey_y, use_yhat=True)
|
|
|
|
preprocessor = Preprocessor()
|
|
Xtr = preprocessor.fit_transform(Xtr)
|
|
|
|
trains = get_dataset_by_area(Atr, Xtr, ytr)
|
|
n_areas = len(trains)
|
|
|
|
areas = [Ai for Ai, _, _ in trains]
|
|
|
|
tables = []
|
|
text_outputs = []
|
|
|
|
benchmarks = [f'te-{Ai}' for Ai in areas] # areas used as test
|
|
methods = [f'tr-{Ai}' for Ai in areas] # areas on which a quantifier is trained
|
|
|
|
|
|
for q_name, q in quantifiers():
|
|
|
|
table = Table(name=q_name, benchmarks=benchmarks, methods=methods, stat_test=None, color_mode='global')
|
|
table.format.mean_prec = 4
|
|
table.format.show_std = False
|
|
table.format.sta = False
|
|
table.format.remove_zero = True
|
|
table.with_mean = True
|
|
|
|
for i, (Ai, Xi, yi) in tqdm(enumerate(trains), total=n_areas):
|
|
tr = LabelledCollection(Xi, yi)
|
|
q.fit(tr)
|
|
len_tr = len(tr)
|
|
for j, (Aj, Xj, yj) in enumerate(trains):
|
|
if i==j: continue
|
|
te = LabelledCollection(Xj, yj)
|
|
qp.environ["SAMPLE_SIZE"] = len(te)
|
|
pred_prev = q.quantify(te.X)
|
|
true_prev = te.prevalence()
|
|
# err = qp.error.mrae(true_prev, pred_prev)
|
|
err = qp.error.mae(true_prev, pred_prev)
|
|
table.add(benchmark=f'te-{Aj}', method=f'tr-{Ai}', v=err)
|
|
|
|
for test in benchmarks:
|
|
values = table.get_benchmark_values(test)
|
|
table.add(benchmark=test, method='Best', v=min(values))
|
|
|
|
tables.append(table)
|
|
|
|
text_outputs.append(f'{q_name} got mean {table.all_mean():.5f}, best mean {table.get_method_values("Best").mean():.5f}')
|
|
|
|
|
|
Table.LatexPDF(f'./results/pairwise/doc.pdf', tables)
|
|
|
|
with open(f'./results/classifier/output.txt', 'tw') as foo:
|
|
foo.write('\n'.join(text_outputs))
|
|
|
|
|
|
|
|
|
|
|