observations and plan of todo things

2024-03-18 12:19:45 +01:00 · 2024-03-18 12:19:45 +01:00 · 7ee224521a
parent 3d22270a4d
commit 7ee224521a
1 changed files with 40 additions and 26 deletions
--- a/Census/main.py
+++ b/Census/main.py
@ -5,7 +5,8 @@ from sklearn.svm import LinearSVC
 from tqdm import tqdm

 import quapy as qp
-from quapy.method.aggregative import EMQ, PACC, CC, PCC, MS2, MS
+from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as MLPE
+from quapy.method.aggregative import EMQ, PACC, CC, PCC, MS2, MS, ACC
 from quapy.data import LabelledCollection
 from sklearn.preprocessing import StandardScaler

@ -73,13 +74,7 @@ class Preprocessor:
        return self.fit(X, y).transform(X)


-# cls = LinearSVC()
-cls = LogisticRegressionCV(class_weight='balanced', Cs=10)
-q = CC(cls)
-# q = PCC(cls)
-# q = PACC(cls)
-# q = EMQ(cls)
-# q = MS(cls)
+


 # Ate, Xte = load_csv(cens_y)
@ -97,27 +92,46 @@ trains = get_dataset_by_area(Atr, Xtr, ytr)

 n_area = len(trains)

-results = np.zeros(shape=(n_area, n_area))
+# cls = LinearSVC()
+cls = LogisticRegression()
+# cls = LogisticRegressionCV(class_weight='balanced', Cs=10)
+# q = CC(cls)
+# q = PCC(cls)
+# q = PACC(cls)
+q = EMQ(cls)
+# q = MS(cls)
+#q = MaximumLikelihoodPrevalenceEstimation()

-for i, (Ai, Xi, yi) in tqdm(enumerate(trains), total=n_area):
-    # Xi = preprocessor.fit_transform(Xi)
-    tr = LabelledCollection(Xi, yi)
-    q.fit(tr)
-    len_tr = len(tr)
-    # len_tr = len(big_train)
-    for j, (Aj, Xj, yj) in enumerate(trains):
-        if i==j: continue
-        # Xj = preprocessor.transform(Xj)
-        te = LabelledCollection(Xj, yj)
-        pred_prev = q.quantify(te.X)
-        true_prev = te.prevalence()
-        err = qp.error.mae(true_prev, pred_prev)
-        print(f'{i=} {j=} [#train={len_tr}] true_prev={true_prev[1]:.3f} pred_prev={pred_prev[1]:.3f} {err=:.4f}')
-        results[i,j] = err
+for q in [CC(cls), PCC(cls), ACC(cls), PACC(cls), EMQ(cls), MLPE()]:

+    results = np.zeros(shape=(n_area, n_area))

-print(results)
-print(f'mean results = {results.mean():.4f}')
+    for i, (Ai, Xi, yi) in tqdm(enumerate(trains), total=n_area):
+        # Xi = preprocessor.fit_transform(Xi)
+        tr = LabelledCollection(Xi, yi)
+        q.fit(tr)
+        len_tr = len(tr)
+        # len_tr = len(big_train)
+        for j, (Aj, Xj, yj) in enumerate(trains):
+            if i==j: continue
+            # Xj = preprocessor.transform(Xj)
+            te = LabelledCollection(Xj, yj)
+            pred_prev = q.quantify(te.X)
+            true_prev = te.prevalence()
+            # qp.environ["SAMPLE_SIZE"] = len(te)
+            # err = qp.error.mrae(true_prev, pred_prev)
+            err = qp.error.mae(true_prev, pred_prev)
+            print(f'{i=} {j=} [#train={len_tr}] true_prev={true_prev[1]:.3f} pred_prev={pred_prev[1]:.3f} {err=:.4f}')
+            results[i,j] = err
+
+    import sys; sys.exit()
+
+    q_name = q.__class__.__name__
+    # print(results)
+    print(f'{q_name} mean results = {results.mean():.4f}')
+
+    results += np.eye(results.shape[0])
+    print(results.min(axis=0).mean())