forked from moreo/QuaPy
76 lines
2.5 KiB
Python
76 lines
2.5 KiB
Python
|
from sklearn.calibration import CalibratedClassifierCV
|
||
|
from sklearn.svm import LinearSVC
|
||
|
|
||
|
from NewMethods.fgsld.fine_grained_sld import FineGrainedSLD
|
||
|
from method.aggregative import EMQ, CC
|
||
|
from quapy.data import LabelledCollection
|
||
|
from quapy.method.base import BaseQuantifier
|
||
|
import quapy as qp
|
||
|
import quapy.functional as F
|
||
|
from sklearn.linear_model import LogisticRegression
|
||
|
|
||
|
|
||
|
class FakeFGLSD(BaseQuantifier):
|
||
|
def __init__(self, learner, nbins, isomerous):
|
||
|
self.learner = learner
|
||
|
self.nbins = nbins
|
||
|
self.isomerous = isomerous
|
||
|
|
||
|
def fit(self, data: LabelledCollection):
|
||
|
self.Xtr, self.ytr = data.Xy
|
||
|
self.learner.fit(self.Xtr, self.ytr)
|
||
|
return self
|
||
|
|
||
|
def quantify(self, instances):
|
||
|
tr_priors = F.prevalence_from_labels(self.ytr, n_classes=2)
|
||
|
fgsld = FineGrainedSLD(self.Xtr, instances, self.ytr, tr_priors, self.learner, n_bins=self.nbins)
|
||
|
priors, posteriors = fgsld.run(self.isomerous)
|
||
|
return priors
|
||
|
|
||
|
def get_params(self, deep=True):
|
||
|
pass
|
||
|
|
||
|
def set_params(self, **parameters):
|
||
|
pass
|
||
|
|
||
|
|
||
|
|
||
|
qp.environ['SAMPLE_SIZE'] = 500
|
||
|
|
||
|
dataset = qp.datasets.fetch_reviews('hp')
|
||
|
qp.data.preprocessing.text2tfidf(dataset, min_df=5, inplace=True)
|
||
|
|
||
|
training = dataset.training
|
||
|
test = dataset.test
|
||
|
|
||
|
cls = CalibratedClassifierCV(LinearSVC())
|
||
|
|
||
|
|
||
|
method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
|
||
|
|
||
|
for model, model_name in [
|
||
|
(CC(cls), 'CC'),
|
||
|
(FakeFGLSD(cls, nbins=1, isomerous=False), 'FGSLD-1'),
|
||
|
(FakeFGLSD(cls, nbins=2, isomerous=False), 'FGSLD-2'),
|
||
|
#(FakeFGLSD(cls, nbins=5, isomerous=False), 'FGSLD-5'),
|
||
|
#(FakeFGLSD(cls, nbins=10, isomerous=False), 'FGSLD-10'),
|
||
|
#(FakeFGLSD(cls, nbins=50, isomerous=False), 'FGSLD-50'),
|
||
|
#(FakeFGLSD(cls, nbins=100, isomerous=False), 'FGSLD-100'),
|
||
|
# (FakeFGLSD(cls, nbins=1, isomerous=False), 'FGSLD-1'),
|
||
|
#(FakeFGLSD(cls, nbins=10, isomerous=True), 'FGSLD-10-ISO'),
|
||
|
# (FakeFGLSD(cls, nbins=50, isomerous=False), 'FGSLD-50'),
|
||
|
(EMQ(cls), 'SLD'),
|
||
|
]:
|
||
|
print('running ', model_name)
|
||
|
model.fit(training)
|
||
|
true_prev, estim_prev = qp.evaluation.artificial_sampling_prediction(
|
||
|
model, test, qp.environ['SAMPLE_SIZE'], n_repetitions=10, n_prevpoints=21, n_jobs=-1
|
||
|
)
|
||
|
method_names.append(model_name)
|
||
|
true_prevs.append(true_prev)
|
||
|
estim_prevs.append(estim_prev)
|
||
|
tr_prevs.append(training.prevalence())
|
||
|
|
||
|
|
||
|
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, train_prev=tr_prevs[0], savepath='./plot_fglsd.png')
|