From a00224015c7b1f3438e7afcdebee5532b73a4ac4 Mon Sep 17 00:00:00 2001 From: Lorenzo Volpi Date: Wed, 22 Nov 2023 19:25:12 +0100 Subject: [PATCH] main_test updated --- quacc/main_test.py | 181 ++++++++++++++++++++++----------------------- 1 file changed, 89 insertions(+), 92 deletions(-) diff --git a/quacc/main_test.py b/quacc/main_test.py index 56aa3a0..1a78815 100644 --- a/quacc/main_test.py +++ b/quacc/main_test.py @@ -1,49 +1,95 @@ -from copy import deepcopy from time import time import numpy as np -from quapy.method.aggregative import SLD -from quapy.protocol import APP, UPP -from sklearn.linear_model import LogisticRegression +import scipy.sparse as sp +from quapy.protocol import APP +from sklearn.linear_model import LinearRegression, LogisticRegression +from sklearn.metrics import accuracy_score -import quacc as qc +from baselines.mandoline import estimate_performance from quacc.dataset import Dataset -from quacc.error import acc -from quacc.evaluation.baseline import ref -from quacc.evaluation.method import mulmc_sld -from quacc.evaluation.report import CompReport, EvaluationReport -from quacc.method.base import MCAE, BinaryQuantifierAccuracyEstimator -from quacc.method.model_selection import GridSearchAE -def test_gs(): +def test_lr(): d = Dataset(name="rcv1", target="CCAT", n_prevalences=1).get_raw() classifier = LogisticRegression() classifier.fit(*d.train.Xy) - quantifier = SLD(LogisticRegression()) - # estimator = MultiClassAccuracyEstimator(classifier, quantifier) - estimator = BinaryQuantifierAccuracyEstimator(classifier, quantifier) + val, _ = d.validation.split_stratified(0.5, random_state=0) + val_X, val_y = val.X, val.y + val_probs = classifier.predict_proba(val_X) - v_train, v_val = d.validation.split_stratified(0.6, random_state=0) - gs_protocol = UPP(v_val, sample_size=1000, repeats=100) - gs_estimator = GridSearchAE( - model=deepcopy(estimator), - param_grid={ - "q__classifier__C": np.logspace(-3, 3, 7), - "q__classifier__class_weight": [None, "balanced"], - "q__recalib": [None, "bcts", "ts"], - }, - refit=False, - protocol=gs_protocol, - verbose=True, - ).fit(v_train) + reg_X = sp.hstack([val_X, val_probs]) + reg_y = val_probs[np.arange(val_probs.shape[0]), val_y] + reg = LinearRegression() + reg.fit(reg_X, reg_y) - estimator.fit(d.validation) + _test_num = 10000 + test_X = d.test.X[:_test_num, :] + test_probs = classifier.predict_proba(test_X) + test_reg_X = sp.hstack([test_X, test_probs]) + reg_pred = reg.predict(test_reg_X) + + def threshold(pred): + # return np.mean( + # (reg.predict(test_reg_X) >= pred) + # == ( + # test_probs[np.arange(_test_num), d.test.y[:_test_num]] == np.max(test_probs, axis=1) + # ) + # ) + return np.mean( + (reg.predict(test_reg_X) >= pred) + == (np.argmax(test_probs, axis=1) == d.test.y[:_test_num]) + ) + + max_p, max_acc = 0, 0 + for p in reg_pred: + acc = threshold(p) + if acc > max_acc: + max_acc = acc + max_p = p + + print(f"{max_p = }, {max_acc = }") + reg_pred = reg_pred - max_p + 0.5 + print(reg_pred) + print(np.mean(reg_pred >= 0.5)) + print(np.mean(np.argmax(test_probs, axis=1) == d.test.y[:_test_num])) + + +def entropy(probas): + return -np.sum(np.multiply(probas, np.log(probas + 1e-20)), axis=1) + + +def get_slices(probas): + ln, ncl = probas.shape + preds = np.argmax(probas, axis=1) + pred_slices = np.full((ln, ncl), fill_value=-1, dtype="