import numpy as np from sklearn.linear_model import LogisticRegressionCV, LogisticRegression from sklearn.svm import SVC, LinearSVC from commons import * from table import Table from tqdm import tqdm np.set_printoptions(linewidth=np.inf) def classifiers(): yield 'LR-opt', LogisticRegressionCV(Cs=10) yield 'LR-opt-bal', LogisticRegressionCV(class_weight='balanced', Cs=10) yield 'LR-def', LogisticRegression() yield 'SVM-linear', LinearSVC() yield 'SVM-rbf', SVC(kernel='rbf') survey_y = './data/survey_y.csv' Atr, Xtr, ytr = load_csv(survey_y, use_yhat=True) preprocessor = Preprocessor() Xtr = preprocessor.fit_transform(Xtr) trains = get_dataset_by_area(Atr, Xtr, ytr) n_areas = len(trains) areas = [Ai for Ai, _, _ in trains] tables = [] text_outputs = [] benchmarks = [f'te-{Ai}' for Ai in areas] # areas used as test methods = [f'tr-{Ai}' for Ai in areas] # areas on which a quantifier is trained for cls_name, c in classifiers(): table = Table(name=cls_name, benchmarks=benchmarks, methods=methods, stat_test=None, color_mode='local', lower_is_better=False) table.format.mean_prec = 4 table.format.show_std = False table.format.stat_test = False table.format.remove_zero = True for i, (Ai, Xi, yi) in tqdm(enumerate(trains), total=n_areas): c.fit(Xi, yi) for j, (Aj, Xj, yj) in enumerate(trains): if i==j: continue pred_labels = c.predict(Xj) true_labels = yj acc = (pred_labels==true_labels).mean() table.add(benchmark=f'te-{Aj}', method=f'tr-{Ai}', v=acc) for test in benchmarks: values = table.get_benchmark_values(test) table.add(benchmark=test, method='Best', v=max(values)) table.add(benchmark=test, method='Worst', v=min(values)) table.add(benchmark=test, method='AVE', v=np.mean(values)) tables.append(table) text_outputs.append(f'{cls_name} got mean {table.all_mean():.5f}') Table.LatexPDF(f'./results/classifier/doc.pdf', tables) with open(f'./results/classifier/output.txt', 'tw') as foo: foo.write('\n'.join(text_outputs))