observations and plan of todo things

This commit is contained in:
Alejandro Moreo Fernandez 2024-03-18 12:19:45 +01:00
parent 3d22270a4d
commit 7ee224521a
1 changed files with 40 additions and 26 deletions

View File

@ -5,7 +5,8 @@ from sklearn.svm import LinearSVC
from tqdm import tqdm from tqdm import tqdm
import quapy as qp import quapy as qp
from quapy.method.aggregative import EMQ, PACC, CC, PCC, MS2, MS from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as MLPE
from quapy.method.aggregative import EMQ, PACC, CC, PCC, MS2, MS, ACC
from quapy.data import LabelledCollection from quapy.data import LabelledCollection
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
@ -73,13 +74,7 @@ class Preprocessor:
return self.fit(X, y).transform(X) return self.fit(X, y).transform(X)
# cls = LinearSVC()
cls = LogisticRegressionCV(class_weight='balanced', Cs=10)
q = CC(cls)
# q = PCC(cls)
# q = PACC(cls)
# q = EMQ(cls)
# q = MS(cls)
# Ate, Xte = load_csv(cens_y) # Ate, Xte = load_csv(cens_y)
@ -97,27 +92,46 @@ trains = get_dataset_by_area(Atr, Xtr, ytr)
n_area = len(trains) n_area = len(trains)
results = np.zeros(shape=(n_area, n_area)) # cls = LinearSVC()
cls = LogisticRegression()
# cls = LogisticRegressionCV(class_weight='balanced', Cs=10)
# q = CC(cls)
# q = PCC(cls)
# q = PACC(cls)
q = EMQ(cls)
# q = MS(cls)
#q = MaximumLikelihoodPrevalenceEstimation()
for i, (Ai, Xi, yi) in tqdm(enumerate(trains), total=n_area): for q in [CC(cls), PCC(cls), ACC(cls), PACC(cls), EMQ(cls), MLPE()]:
# Xi = preprocessor.fit_transform(Xi)
tr = LabelledCollection(Xi, yi)
q.fit(tr)
len_tr = len(tr)
# len_tr = len(big_train)
for j, (Aj, Xj, yj) in enumerate(trains):
if i==j: continue
# Xj = preprocessor.transform(Xj)
te = LabelledCollection(Xj, yj)
pred_prev = q.quantify(te.X)
true_prev = te.prevalence()
err = qp.error.mae(true_prev, pred_prev)
print(f'{i=} {j=} [#train={len_tr}] true_prev={true_prev[1]:.3f} pred_prev={pred_prev[1]:.3f} {err=:.4f}')
results[i,j] = err
results = np.zeros(shape=(n_area, n_area))
print(results) for i, (Ai, Xi, yi) in tqdm(enumerate(trains), total=n_area):
print(f'mean results = {results.mean():.4f}') # Xi = preprocessor.fit_transform(Xi)
tr = LabelledCollection(Xi, yi)
q.fit(tr)
len_tr = len(tr)
# len_tr = len(big_train)
for j, (Aj, Xj, yj) in enumerate(trains):
if i==j: continue
# Xj = preprocessor.transform(Xj)
te = LabelledCollection(Xj, yj)
pred_prev = q.quantify(te.X)
true_prev = te.prevalence()
# qp.environ["SAMPLE_SIZE"] = len(te)
# err = qp.error.mrae(true_prev, pred_prev)
err = qp.error.mae(true_prev, pred_prev)
print(f'{i=} {j=} [#train={len_tr}] true_prev={true_prev[1]:.3f} pred_prev={pred_prev[1]:.3f} {err=:.4f}')
results[i,j] = err
import sys; sys.exit()
q_name = q.__class__.__name__
# print(results)
print(f'{q_name} mean results = {results.mean():.4f}')
results += np.eye(results.shape[0])
print(results.min(axis=0).mean())