testing the class-reweight method on UCI datasets

2021-11-04 16:09:49 +01:00 · 2021-11-04 16:09:49 +01:00 · 970008c9f7
parent 3df55f3613
commit 970008c9f7
3 changed files with 52 additions and 1 deletions
--- a/NewMethods/ClassWeightQuantification.py
+++ b/NewMethods/ClassWeightQuantification.py
@ -113,6 +113,7 @@ def train_eval(class_weight, test):
 probabilistic = True
 Prompter = PACC  # the method creating the very first guess
 Baseline = PACC if probabilistic else ACC
 bname = Baseline.__name__
@ -135,6 +136,10 @@ for ptr in train_prevs:
    reference_hyperplane = LogisticRegression().fit(*train.Xy)
    baseline = Baseline(LogisticRegression()).fit(train)
    if Baseline != Prompter:
        prompter = Prompter(LogisticRegression()).fit(train)
    else:
        prompter = baseline
    for pte in test_prevs:
        test = test_pool.sampling(10000, pte)
@ -145,7 +150,7 @@ for ptr in train_prevs:
        berrors.append(ae_baseline)
        # guessed_prevalence = train.prevalence()
-        guessed_prevalence = prev_estim_acc
+        guessed_prevalence = prompter.quantify(test.instances)
        niter=10
        last_prev = None
--- a/NewMethods/class_weight_model.py
+++ b/NewMethods/class_weight_model.py
@ -0,0 +1,45 @@
 from sklearn.linear_model import LogisticRegression
 import numpy as np
 import quapy as qp
 from data import LabelledCollection
 from method.base import BaseQuantifier
 from quapy.method.aggregative import AggregativeQuantifier, AggregativeProbabilisticQuantifier, CC, ACC, PCC, PACC
 class ClassWeightPCC(BaseQuantifier):
    def __init__(self):
        self.learner = None
    def fit(self, data: LabelledCollection, fit_learner=True):
        self.train = data
        self.prompt = PACC(LogisticRegression()).fit(self.train)
        return self
    def quantify(self, instances):
        guessed_prevalence = self.prompt.quantify(instances)
        class_weight = self._get_class_weight(guessed_prevalence)
        return PCC(LogisticRegression(class_weight=class_weight)).fit(self.train).quantify(instances)
    def _get_class_weight(self, prevalence):
        # class_weight = compute_class_weight('balanced', classes=[0, 1], y=mock_y(prevalence))
        # return {0: class_weight[1], 1: class_weight[0]}
        # weights = prevalence/prevalence.min()
        weights = prevalence / self.train.prevalence()
        normfactor = weights.min()
        if normfactor <= 0:
            normfactor = 1E-3
        weights /= normfactor
        return {0:weights[0], 1:weights[1]}
    def set_params(self, **parameters):
        pass
    def get_params(self, deep=True):
        return self.prompt.get_params()
    @property
    def classes_(self):
        return self.train.classes_
--- a/quapy/data/base.py
+++ b/quapy/data/base.py
@ -333,6 +333,7 @@ class Dataset:
            yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})')
 def isbinary(data):
    if isinstance(data, Dataset) or isinstance(data, LabelledCollection):
        return data.binary