observations and plan of todo things

2024-03-18 12:19:45 +01:00 · 2024-03-18 12:19:45 +01:00 · 7ee224521a
parent 3d22270a4d
commit 7ee224521a
1 changed files with 40 additions and 26 deletions
--- a/Census/main.py
+++ b/Census/main.py
@ -5,7 +5,8 @@ from sklearn.svm import LinearSVC
 from tqdm import tqdm
 import quapy as qp
-from quapy.method.aggregative import EMQ, PACC, CC, PCC, MS2, MS
+from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as MLPE
 from quapy.method.aggregative import EMQ, PACC, CC, PCC, MS2, MS, ACC
 from quapy.data import LabelledCollection
 from sklearn.preprocessing import StandardScaler
@ -73,13 +74,7 @@ class Preprocessor:
        return self.fit(X, y).transform(X)
-# cls = LinearSVC()
+
 cls = LogisticRegressionCV(class_weight='balanced', Cs=10)
 q = CC(cls)
 # q = PCC(cls)
 # q = PACC(cls)
 # q = EMQ(cls)
 # q = MS(cls)
 # Ate, Xte = load_csv(cens_y)
@ -97,27 +92,46 @@ trains = get_dataset_by_area(Atr, Xtr, ytr)
 n_area = len(trains)
-results = np.zeros(shape=(n_area, n_area))
+# cls = LinearSVC()
 cls = LogisticRegression()
 # cls = LogisticRegressionCV(class_weight='balanced', Cs=10)
 # q = CC(cls)
 # q = PCC(cls)
 # q = PACC(cls)
 q = EMQ(cls)
 # q = MS(cls)
 #q = MaximumLikelihoodPrevalenceEstimation()
-for i, (Ai, Xi, yi) in tqdm(enumerate(trains), total=n_area):
+for q in [CC(cls), PCC(cls), ACC(cls), PACC(cls), EMQ(cls), MLPE()]:
    # Xi = preprocessor.fit_transform(Xi)
    tr = LabelledCollection(Xi, yi)
    q.fit(tr)
    len_tr = len(tr)
    # len_tr = len(big_train)
    for j, (Aj, Xj, yj) in enumerate(trains):
        if i==j: continue
        # Xj = preprocessor.transform(Xj)
        te = LabelledCollection(Xj, yj)
        pred_prev = q.quantify(te.X)
        true_prev = te.prevalence()
        err = qp.error.mae(true_prev, pred_prev)
        print(f'{i=} {j=} [#train={len_tr}] true_prev={true_prev[1]:.3f} pred_prev={pred_prev[1]:.3f} {err=:.4f}')
        results[i,j] = err
    results = np.zeros(shape=(n_area, n_area))
-print(results)
+    for i, (Ai, Xi, yi) in tqdm(enumerate(trains), total=n_area):
-print(f'mean results = {results.mean():.4f}')
+        # Xi = preprocessor.fit_transform(Xi)
        tr = LabelledCollection(Xi, yi)
        q.fit(tr)
        len_tr = len(tr)
        # len_tr = len(big_train)
        for j, (Aj, Xj, yj) in enumerate(trains):
            if i==j: continue
            # Xj = preprocessor.transform(Xj)
            te = LabelledCollection(Xj, yj)
            pred_prev = q.quantify(te.X)
            true_prev = te.prevalence()
            # qp.environ["SAMPLE_SIZE"] = len(te)
            # err = qp.error.mrae(true_prev, pred_prev)
            err = qp.error.mae(true_prev, pred_prev)
            print(f'{i=} {j=} [#train={len_tr}] true_prev={true_prev[1]:.3f} pred_prev={pred_prev[1]:.3f} {err=:.4f}')
            results[i,j] = err
    import sys; sys.exit()
    q_name = q.__class__.__name__
    # print(results)
    print(f'{q_name} mean results = {results.mean():.4f}')
    results += np.eye(results.shape[0])
    print(results.min(axis=0).mean())