From 970008c9f7c9d4d9887fb86143cc1e0b44f4a674 Mon Sep 17 00:00:00 2001
From: Alex Moreo <alejandro.moreo@isti.cnr.it>
Date: Thu, 4 Nov 2021 16:09:49 +0100
Subject: [PATCH] testing the class-reweight method on UCI datasets

---
 NewMethods/ClassWeightQuantification.py |  7 +++-
 NewMethods/class_weight_model.py        | 45 +++++++++++++++++++++++++
 quapy/data/base.py                      |  1 +
 3 files changed, 52 insertions(+), 1 deletion(-)
 create mode 100644 NewMethods/class_weight_model.py

diff --git a/NewMethods/ClassWeightQuantification.py b/NewMethods/ClassWeightQuantification.py
index 226b6c2..1b75f3e 100644
--- a/NewMethods/ClassWeightQuantification.py
+++ b/NewMethods/ClassWeightQuantification.py
@@ -113,6 +113,7 @@ def train_eval(class_weight, test):
 
 probabilistic = True
 
+Prompter = PACC  # the method creating the very first guess
 Baseline = PACC if probabilistic else ACC
 bname = Baseline.__name__
 
@@ -135,6 +136,10 @@ for ptr in train_prevs:
 
     reference_hyperplane = LogisticRegression().fit(*train.Xy)
     baseline = Baseline(LogisticRegression()).fit(train)
+    if Baseline != Prompter:
+        prompter = Prompter(LogisticRegression()).fit(train)
+    else:
+        prompter = baseline
 
     for pte in test_prevs:
         test = test_pool.sampling(10000, pte)
@@ -145,7 +150,7 @@ for ptr in train_prevs:
         berrors.append(ae_baseline)
 
         # guessed_prevalence = train.prevalence()
-        guessed_prevalence = prev_estim_acc
+        guessed_prevalence = prompter.quantify(test.instances)
 
         niter=10
         last_prev = None
diff --git a/NewMethods/class_weight_model.py b/NewMethods/class_weight_model.py
new file mode 100644
index 0000000..4b5410f
--- /dev/null
+++ b/NewMethods/class_weight_model.py
@@ -0,0 +1,45 @@
+from sklearn.linear_model import LogisticRegression
+import numpy as np
+
+import quapy as qp
+from data import LabelledCollection
+from method.base import BaseQuantifier
+from quapy.method.aggregative import AggregativeQuantifier, AggregativeProbabilisticQuantifier, CC, ACC, PCC, PACC
+
+
+
+class ClassWeightPCC(BaseQuantifier):
+
+    def __init__(self):
+        self.learner = None
+
+    def fit(self, data: LabelledCollection, fit_learner=True):
+        self.train = data
+        self.prompt = PACC(LogisticRegression()).fit(self.train)
+        return self
+
+    def quantify(self, instances):
+        guessed_prevalence = self.prompt.quantify(instances)
+        class_weight = self._get_class_weight(guessed_prevalence)
+        return PCC(LogisticRegression(class_weight=class_weight)).fit(self.train).quantify(instances)
+
+    def _get_class_weight(self, prevalence):
+        # class_weight = compute_class_weight('balanced', classes=[0, 1], y=mock_y(prevalence))
+        # return {0: class_weight[1], 1: class_weight[0]}
+        # weights = prevalence/prevalence.min()
+        weights = prevalence / self.train.prevalence()
+        normfactor = weights.min()
+        if normfactor <= 0:
+            normfactor = 1E-3
+        weights /= normfactor
+        return {0:weights[0], 1:weights[1]}
+
+    def set_params(self, **parameters):
+        pass
+
+    def get_params(self, deep=True):
+        return self.prompt.get_params()
+
+    @property
+    def classes_(self):
+        return self.train.classes_
\ No newline at end of file
diff --git a/quapy/data/base.py b/quapy/data/base.py
index 7a8df5c..775e55d 100644
--- a/quapy/data/base.py
+++ b/quapy/data/base.py
@@ -333,6 +333,7 @@ class Dataset:
             yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})')
 
 
+
 def isbinary(data):
     if isinstance(data, Dataset) or isinstance(data, LabelledCollection):
         return data.binary