From 970008c9f7c9d4d9887fb86143cc1e0b44f4a674 Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Thu, 4 Nov 2021 16:09:49 +0100 Subject: [PATCH] testing the class-reweight method on UCI datasets --- NewMethods/ClassWeightQuantification.py | 7 +++- NewMethods/class_weight_model.py | 45 +++++++++++++++++++++++++ quapy/data/base.py | 1 + 3 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 NewMethods/class_weight_model.py diff --git a/NewMethods/ClassWeightQuantification.py b/NewMethods/ClassWeightQuantification.py index 226b6c2..1b75f3e 100644 --- a/NewMethods/ClassWeightQuantification.py +++ b/NewMethods/ClassWeightQuantification.py @@ -113,6 +113,7 @@ def train_eval(class_weight, test): probabilistic = True +Prompter = PACC # the method creating the very first guess Baseline = PACC if probabilistic else ACC bname = Baseline.__name__ @@ -135,6 +136,10 @@ for ptr in train_prevs: reference_hyperplane = LogisticRegression().fit(*train.Xy) baseline = Baseline(LogisticRegression()).fit(train) + if Baseline != Prompter: + prompter = Prompter(LogisticRegression()).fit(train) + else: + prompter = baseline for pte in test_prevs: test = test_pool.sampling(10000, pte) @@ -145,7 +150,7 @@ for ptr in train_prevs: berrors.append(ae_baseline) # guessed_prevalence = train.prevalence() - guessed_prevalence = prev_estim_acc + guessed_prevalence = prompter.quantify(test.instances) niter=10 last_prev = None diff --git a/NewMethods/class_weight_model.py b/NewMethods/class_weight_model.py new file mode 100644 index 0000000..4b5410f --- /dev/null +++ b/NewMethods/class_weight_model.py @@ -0,0 +1,45 @@ +from sklearn.linear_model import LogisticRegression +import numpy as np + +import quapy as qp +from data import LabelledCollection +from method.base import BaseQuantifier +from quapy.method.aggregative import AggregativeQuantifier, AggregativeProbabilisticQuantifier, CC, ACC, PCC, PACC + + + +class ClassWeightPCC(BaseQuantifier): + + def __init__(self): + self.learner = None + + def fit(self, data: LabelledCollection, fit_learner=True): + self.train = data + self.prompt = PACC(LogisticRegression()).fit(self.train) + return self + + def quantify(self, instances): + guessed_prevalence = self.prompt.quantify(instances) + class_weight = self._get_class_weight(guessed_prevalence) + return PCC(LogisticRegression(class_weight=class_weight)).fit(self.train).quantify(instances) + + def _get_class_weight(self, prevalence): + # class_weight = compute_class_weight('balanced', classes=[0, 1], y=mock_y(prevalence)) + # return {0: class_weight[1], 1: class_weight[0]} + # weights = prevalence/prevalence.min() + weights = prevalence / self.train.prevalence() + normfactor = weights.min() + if normfactor <= 0: + normfactor = 1E-3 + weights /= normfactor + return {0:weights[0], 1:weights[1]} + + def set_params(self, **parameters): + pass + + def get_params(self, deep=True): + return self.prompt.get_params() + + @property + def classes_(self): + return self.train.classes_ \ No newline at end of file diff --git a/quapy/data/base.py b/quapy/data/base.py index 7a8df5c..775e55d 100644 --- a/quapy/data/base.py +++ b/quapy/data/base.py @@ -333,6 +333,7 @@ class Dataset: yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})') + def isbinary(data): if isinstance(data, Dataset) or isinstance(data, LabelledCollection): return data.binary