diff --git a/Census/main.py b/Census/main.py index 73a040d..55ada7c 100644 --- a/Census/main.py +++ b/Census/main.py @@ -5,7 +5,8 @@ from sklearn.svm import LinearSVC from tqdm import tqdm import quapy as qp -from quapy.method.aggregative import EMQ, PACC, CC, PCC, MS2, MS +from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as MLPE +from quapy.method.aggregative import EMQ, PACC, CC, PCC, MS2, MS, ACC from quapy.data import LabelledCollection from sklearn.preprocessing import StandardScaler @@ -73,13 +74,7 @@ class Preprocessor: return self.fit(X, y).transform(X) -# cls = LinearSVC() -cls = LogisticRegressionCV(class_weight='balanced', Cs=10) -q = CC(cls) -# q = PCC(cls) -# q = PACC(cls) -# q = EMQ(cls) -# q = MS(cls) + # Ate, Xte = load_csv(cens_y) @@ -97,27 +92,46 @@ trains = get_dataset_by_area(Atr, Xtr, ytr) n_area = len(trains) -results = np.zeros(shape=(n_area, n_area)) +# cls = LinearSVC() +cls = LogisticRegression() +# cls = LogisticRegressionCV(class_weight='balanced', Cs=10) +# q = CC(cls) +# q = PCC(cls) +# q = PACC(cls) +q = EMQ(cls) +# q = MS(cls) +#q = MaximumLikelihoodPrevalenceEstimation() -for i, (Ai, Xi, yi) in tqdm(enumerate(trains), total=n_area): - # Xi = preprocessor.fit_transform(Xi) - tr = LabelledCollection(Xi, yi) - q.fit(tr) - len_tr = len(tr) - # len_tr = len(big_train) - for j, (Aj, Xj, yj) in enumerate(trains): - if i==j: continue - # Xj = preprocessor.transform(Xj) - te = LabelledCollection(Xj, yj) - pred_prev = q.quantify(te.X) - true_prev = te.prevalence() - err = qp.error.mae(true_prev, pred_prev) - print(f'{i=} {j=} [#train={len_tr}] true_prev={true_prev[1]:.3f} pred_prev={pred_prev[1]:.3f} {err=:.4f}') - results[i,j] = err +for q in [CC(cls), PCC(cls), ACC(cls), PACC(cls), EMQ(cls), MLPE()]: + results = np.zeros(shape=(n_area, n_area)) -print(results) -print(f'mean results = {results.mean():.4f}') + for i, (Ai, Xi, yi) in tqdm(enumerate(trains), total=n_area): + # Xi = preprocessor.fit_transform(Xi) + tr = LabelledCollection(Xi, yi) + q.fit(tr) + len_tr = len(tr) + # len_tr = len(big_train) + for j, (Aj, Xj, yj) in enumerate(trains): + if i==j: continue + # Xj = preprocessor.transform(Xj) + te = LabelledCollection(Xj, yj) + pred_prev = q.quantify(te.X) + true_prev = te.prevalence() + # qp.environ["SAMPLE_SIZE"] = len(te) + # err = qp.error.mrae(true_prev, pred_prev) + err = qp.error.mae(true_prev, pred_prev) + print(f'{i=} {j=} [#train={len_tr}] true_prev={true_prev[1]:.3f} pred_prev={pred_prev[1]:.3f} {err=:.4f}') + results[i,j] = err + + import sys; sys.exit() + + q_name = q.__class__.__name__ + # print(results) + print(f'{q_name} mean results = {results.mean():.4f}') + + results += np.eye(results.shape[0]) + print(results.min(axis=0).mean())