From 0e81117cfb4c5361ee5cf8abe3cb9a8b746fbdcb Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Sun, 17 Dec 2023 20:26:53 +0100 Subject: [PATCH] quick test kdex --- distribution_matching/method/kdex.py | 50 ++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 distribution_matching/method/kdex.py diff --git a/distribution_matching/method/kdex.py b/distribution_matching/method/kdex.py new file mode 100644 index 0000000..d2ab6bf --- /dev/null +++ b/distribution_matching/method/kdex.py @@ -0,0 +1,50 @@ +from quapy.method.base import BaseQuantifier +import numpy as np +from distribution_matching.method.kdey import KDEBase + +import quapy as qp +from quapy.data import LabelledCollection +import quapy.functional as F +from sklearn.preprocessing import StandardScaler + + +class KDExML(BaseQuantifier, KDEBase): + + def __init__(self, bandwidth=0.1, standardize=True): + self._check_bandwidth(bandwidth) + self.bandwidth = bandwidth + self.standardize = standardize + + def fit(self, data: LabelledCollection): + X, y = data.Xy + if self.standardize: + self.scaler = StandardScaler() + X = self.scaler.fit_transform(X) + + self.mix_densities = self.get_mixture_components(X, y, data.n_classes, self.bandwidth) + return self + + def quantify(self, X): + """ + Searches for the mixture model parameter (the sought prevalence values) that maximizes the likelihood + of the data (i.e., that minimizes the negative log-likelihood) + + :param X: instances in the sample + :return: a vector of class prevalence estimates + """ + epsilon = 1e-10 + n_classes = len(self.mix_densities) + if self.standardize: + X = self.scaler.transform(X) + test_densities = [self.pdf(kde_i, X) for kde_i in self.mix_densities] + + def neg_loglikelihood(prev): + test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities)) + test_loglikelihood = np.log(test_mixture_likelihood + epsilon) + return -np.sum(test_loglikelihood) + + return F.optim_minimize(neg_loglikelihood, n_classes) + + + +