1
0
Fork 0
QuaPy/Census/adjacentconcat_4.py

88 lines
2.5 KiB
Python

import numpy as np
from sklearn.linear_model import LogisticRegressionCV
from quapy.data import LabelledCollection
from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as MLPE
from quapy.method.aggregative import CC, PCC, ACC, PACC, EMQ
from commons import *
from table import Table
from tqdm import tqdm
import quapy as qp
np.set_printoptions(linewidth=np.inf)
def classifier():
return LogisticRegressionCV()
def quantifiers():
cls = classifier()
yield 'MLPE', MLPE()
yield 'CC', CC(cls)
yield 'PCC', PCC(cls)
yield 'ACC', ACC(cls)
yield 'PACC', PACC(cls)
yield 'SLD', EMQ(cls)
survey_y = './data/survey_y.csv'
Atr, Xtr, ytr = load_csv(survey_y, use_yhat=True)
preprocessor = Preprocessor()
Xtr = preprocessor.fit_transform(Xtr)
data = get_dataset_by_area(Atr, Xtr, ytr)
n_areas = len(data)
Madj = AdjMatrix('./data/matrice_adiacenza.csv')
areas = [Ai for Ai, _, _ in data]
q_names = [q_name for q_name, _ in quantifiers()]
# tables = []
text_outputs = []
benchmarks = [f'te-{Ai}' for Ai in areas] # areas used as test
# areas on which a quantifier is trained, e.g., 'PACC-w/o46' means a PACC quantifier
# has been trained on all areas but 46
methods = [f'{q_name}-cat' for q_name in q_names]
table = Table(name='adjacentconcat', benchmarks=benchmarks, methods=methods, stat_test=None, color_mode='local')
table.format.mean_prec = 4
table.format.show_std = False
table.format.sta = False
table.format.remove_zero = True
for q_name, q in quantifiers():
for i, (Ai, Xi, yi) in tqdm(enumerate(data), total=n_areas):
#training
trainings = [LabelledCollection(Xj, yj) for Aj, Xj, yj in data if Aj!=Ai and Aj in Madj.get_adjacent(Ai)]
print(f'for test Ai={Ai} there should be {Madj.get_adjacent(Ai)}: len={len(trainings)}')
tr = LabelledCollection.join(*trainings)
q.fit(tr)
#test
te = LabelledCollection(Xi, yi)
qp.environ["SAMPLE_SIZE"] = len(te)
pred_prev = q.quantify(te.X)
true_prev = te.prevalence()
err = qp.error.mae(true_prev, pred_prev)
method_name = f'{q_name}-cat'
table.add(benchmark=f'te-{Ai}', method=method_name, v=err)
# text_outputs.append(f'{q_name} got mean {table.all_mean():.5f}, best mean {table.get_method_values("Best").mean():.5f}')
Table.LatexPDF(f'./results/adjacentconcat/doc.pdf', [table])
# with open(f'./results/classifier/output.txt', 'tw') as foo:
# foo.write('\n'.join(text_outputs))