From 24c28edfd93acf56d19d8b0c242cbeb733d10fd3 Mon Sep 17 00:00:00 2001
From: Alejandro Moreo <alejandro.moreo@isti.cnr.it>
Date: Wed, 16 Oct 2024 16:00:23 +0200
Subject: [PATCH] adding scmq

---
 examples/ensembles.py       | 36 ++++++++++++++++++++
 quapy/method/aggregative.py |  1 -
 quapy/method/meta.py        | 67 +++++++++++++++++++++++++++++++++++--
 3 files changed, 101 insertions(+), 3 deletions(-)
 create mode 100644 examples/ensembles.py

diff --git a/examples/ensembles.py b/examples/ensembles.py
new file mode 100644
index 0000000..8c0d07e
--- /dev/null
+++ b/examples/ensembles.py
@@ -0,0 +1,36 @@
+from sklearn.linear_model import LogisticRegression
+from statsmodels.sandbox.distributions.genpareto import quant
+
+import quapy as qp
+from quapy.protocol import UPP
+from quapy.method.aggregative import PACC, DMy, EMQ, KDEyML
+from quapy.method.meta import SCMQ
+
+qp.environ["SAMPLE_SIZE"]=100
+
+def train_and_test_model(quantifier, train, test):
+    quantifier.fit(train)
+    report = qp.evaluation.evaluation_report(quantifier, UPP(test), error_metrics=['mae', 'mrae'])
+    print(quantifier.__class__.__name__)
+    print(report.mean(numeric_only=True))
+
+
+quantifiers = [
+    PACC(),
+    DMy(),
+    EMQ(),
+    KDEyML()
+]
+
+classifier = LogisticRegression()
+
+dataset_name = qp.datasets.UCI_MULTICLASS_DATASETS[0]
+data = qp.datasets.fetch_UCIMulticlassDataset(dataset_name)
+train, test = data.train_test
+
+scmq = SCMQ(classifier, quantifiers)
+
+train_and_test_model(scmq, train, test)
+
+for quantifier in quantifiers:
+    train_and_test_model(quantifier, train, test)
\ No newline at end of file
diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py
index 0838b82..4f7204d 100644
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@@ -591,7 +591,6 @@ class PACC(AggregativeSoftQuantifier):
         if self.norm not in ACC.NORMALIZATIONS:
             raise ValueError(f"unknown normalization; valid ones are {ACC.NORMALIZATIONS}")
 
-
     def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
         """
         Estimates the misclassification rates
diff --git a/quapy/method/meta.py b/quapy/method/meta.py
index 3a98dd0..0fc8916 100644
--- a/quapy/method/meta.py
+++ b/quapy/method/meta.py
@@ -1,6 +1,6 @@
 import itertools
 from copy import deepcopy
-from typing import Union
+from typing import Union, List
 import numpy as np
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import f1_score, make_scorer, accuracy_score
@@ -12,7 +12,7 @@ from quapy import functional as F
 from quapy.data import LabelledCollection
 from quapy.model_selection import GridSearchQ
 from quapy.method.base import BaseQuantifier, BinaryQuantifier
-from quapy.method.aggregative import CC, ACC, PACC, HDy, EMQ, AggregativeQuantifier
+from quapy.method.aggregative import CC, ACC, PACC, HDy, EMQ, AggregativeQuantifier, AggregativeSoftQuantifier
 
 try:
     from . import _neural
@@ -691,3 +691,66 @@ def EEMQ(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs):
     """
 
     return ensembleFactory(classifier, EMQ, param_grid, optim, param_mod_sel, **kwargs)
+
+
+class SCMQ(AggregativeSoftQuantifier):
+
+    MERGE_FUNCTIONS = ['median']
+
+    def __init__(self, classifier, quantifiers: List[AggregativeSoftQuantifier], merge_fun='median', val_split=5):
+        self.classifier = classifier
+        self.quantifiers = quantifiers
+        assert merge_fun in self.MERGE_FUNCTIONS, f'unknwon {merge_fun=}, valid ones are {self.MERGE_FUNCTIONS}'
+        self.merge_fun = merge_fun
+        self.val_split = val_split
+
+    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
+        for quantifier in self.quantifiers:
+            quantifier.classifier = self.classifier
+            quantifier.aggregation_fit(classif_predictions, data)
+        return self
+
+    def aggregate(self, classif_predictions: np.ndarray):
+        prev_predictions = []
+        for quantifier_i in self.quantifiers:
+            prevalence_i = quantifier_i.aggregate(classif_predictions)
+            prev_predictions.append(prevalence_i)
+        return self.merge(prev_predictions)
+
+    def merge(self, prev_predictions):
+        prev_predictions = np.asarray(prev_predictions)
+        if self.merge_fun == 'median':
+            prevalences = np.median(prev_predictions, axis=0)
+            prevalences = F.normalize_prevalence(prevalences, method='l1')
+        elif self.merge_fun == 'mean':
+            prevalences = np.mean(prev_predictions, axis=0)
+        else:
+            raise NotImplementedError(f'merge function {self.merge_fun} not implemented!')
+        return prevalences
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+