forked from moreo/QuaPy
testing the class-reweight method on UCI datasets
This commit is contained in:
parent
3df55f3613
commit
970008c9f7
|
@ -113,6 +113,7 @@ def train_eval(class_weight, test):
|
||||||
|
|
||||||
probabilistic = True
|
probabilistic = True
|
||||||
|
|
||||||
|
Prompter = PACC # the method creating the very first guess
|
||||||
Baseline = PACC if probabilistic else ACC
|
Baseline = PACC if probabilistic else ACC
|
||||||
bname = Baseline.__name__
|
bname = Baseline.__name__
|
||||||
|
|
||||||
|
@ -135,6 +136,10 @@ for ptr in train_prevs:
|
||||||
|
|
||||||
reference_hyperplane = LogisticRegression().fit(*train.Xy)
|
reference_hyperplane = LogisticRegression().fit(*train.Xy)
|
||||||
baseline = Baseline(LogisticRegression()).fit(train)
|
baseline = Baseline(LogisticRegression()).fit(train)
|
||||||
|
if Baseline != Prompter:
|
||||||
|
prompter = Prompter(LogisticRegression()).fit(train)
|
||||||
|
else:
|
||||||
|
prompter = baseline
|
||||||
|
|
||||||
for pte in test_prevs:
|
for pte in test_prevs:
|
||||||
test = test_pool.sampling(10000, pte)
|
test = test_pool.sampling(10000, pte)
|
||||||
|
@ -145,7 +150,7 @@ for ptr in train_prevs:
|
||||||
berrors.append(ae_baseline)
|
berrors.append(ae_baseline)
|
||||||
|
|
||||||
# guessed_prevalence = train.prevalence()
|
# guessed_prevalence = train.prevalence()
|
||||||
guessed_prevalence = prev_estim_acc
|
guessed_prevalence = prompter.quantify(test.instances)
|
||||||
|
|
||||||
niter=10
|
niter=10
|
||||||
last_prev = None
|
last_prev = None
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import quapy as qp
|
||||||
|
from data import LabelledCollection
|
||||||
|
from method.base import BaseQuantifier
|
||||||
|
from quapy.method.aggregative import AggregativeQuantifier, AggregativeProbabilisticQuantifier, CC, ACC, PCC, PACC
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ClassWeightPCC(BaseQuantifier):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.learner = None
|
||||||
|
|
||||||
|
def fit(self, data: LabelledCollection, fit_learner=True):
|
||||||
|
self.train = data
|
||||||
|
self.prompt = PACC(LogisticRegression()).fit(self.train)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def quantify(self, instances):
|
||||||
|
guessed_prevalence = self.prompt.quantify(instances)
|
||||||
|
class_weight = self._get_class_weight(guessed_prevalence)
|
||||||
|
return PCC(LogisticRegression(class_weight=class_weight)).fit(self.train).quantify(instances)
|
||||||
|
|
||||||
|
def _get_class_weight(self, prevalence):
|
||||||
|
# class_weight = compute_class_weight('balanced', classes=[0, 1], y=mock_y(prevalence))
|
||||||
|
# return {0: class_weight[1], 1: class_weight[0]}
|
||||||
|
# weights = prevalence/prevalence.min()
|
||||||
|
weights = prevalence / self.train.prevalence()
|
||||||
|
normfactor = weights.min()
|
||||||
|
if normfactor <= 0:
|
||||||
|
normfactor = 1E-3
|
||||||
|
weights /= normfactor
|
||||||
|
return {0:weights[0], 1:weights[1]}
|
||||||
|
|
||||||
|
def set_params(self, **parameters):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_params(self, deep=True):
|
||||||
|
return self.prompt.get_params()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def classes_(self):
|
||||||
|
return self.train.classes_
|
|
@ -333,6 +333,7 @@ class Dataset:
|
||||||
yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})')
|
yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def isbinary(data):
|
def isbinary(data):
|
||||||
if isinstance(data, Dataset) or isinstance(data, LabelledCollection):
|
if isinstance(data, Dataset) or isinstance(data, LabelledCollection):
|
||||||
return data.binary
|
return data.binary
|
||||||
|
|
Loading…
Reference in New Issue