diff --git a/Retrieval/methods.py b/Retrieval/methods.py new file mode 100644 index 0000000..1048f1b --- /dev/null +++ b/Retrieval/methods.py @@ -0,0 +1,88 @@ + +""" +This file implements some of the methods presented in the FAccT'22 paper by +Ghazimatin, Kleindessner, Russell, Abedjan, and Golebiowski, +Measuring Fairness of Rankings under Noisy Sensitive Information. + +In particular, it implements two variants of a method relying on M3=rND: +one in which the assumed graphical model is P(Â,A,S) = P(Â|A)*P(S|A) (called "b") +and another in which the assumed graphical model is P(Â,A,S) = P(Â|A)*P(S|Â) (called "d") +""" + +import numpy as np +from abc import ABC, abstractmethod +from sklearn.metrics import confusion_matrix + +from quapy.method.aggregative import CC + + +class AbstractM3rND(ABC): + def __init__(self, classifier): + self.quantifier = CC(classifier) + + def proxy_labels(self, instances): + return self.quantifier.classify(instances) + + def quantify(self, instances): + return self.quantifier.quantify(instances) + + @abstractmethod + def fair_measure_correction(self, rND_estim: float, conf_matrix: np.ndarray): + ... + + def get_confusion_matrix(self, X, y, additive_smoothing=0.5): + """ + Some confusion matrices may contain 0 values for certain classes, and this causes + instabilities in the correction. If requested, applies additive smoothing. Default + is adding half a count. + + :param X: array-like with the covariates + :param y: array-like with the true labels + :param additive_smoothing: float, default 0.5 + :return: the confusion matrix C with entries Cij=P(Y=i,Ŷ=j) + """ + proxy_labels = self.proxy_labels(X) + true_labels = y + labels = self.quantifier.classes_ + conf_matrix = confusion_matrix(true_labels, proxy_labels, labels=labels) + if additive_smoothing > 0: + conf_matrix = conf_matrix.astype(float) + additive_smoothing + return conf_matrix + + +class M3rND_ModelB(AbstractM3rND): + def __init__(self, classifier): + super().__init__(classifier) + + def fair_measure_correction(self, rND_estim: float, conf_matrix: np.ndarray): + # conf_matrix contains values Cij=P(Y=i,Ŷ=j) + # truecond_matrix contains values Cij=P(Ŷ=j|Y=i) (truecond stands for "conditioned on true labels") + truecond_matrix = conf_matrix / conf_matrix.sum(axis=1, keepdims=True) + p = truecond_matrix[0, 1] # P(hat{A}=1|A=0) + q = truecond_matrix[1, 0] # P(hat{A}=0|A=1) + den = (1 - p - q) + if den != 0: + corr = 1./den + rND_estim = rND_estim * corr + return rND_estim + + +class M3rND_ModelD(AbstractM3rND): + def __init__(self, classifier): + super().__init__(classifier) + + def fair_measure_correction(self, rND_estim: float, conf_matrix: np.ndarray): + # conf_matrix contains values Cij=P(Y=i,Ŷ=j) + # truecond_matrix contains values Cij=P(Ŷ=j|Y=i) (truecond stands for "conditioned on true labels") + truecond_matrix = conf_matrix / conf_matrix.sum(axis=1, keepdims=True) + prev_A = conf_matrix.sum(axis=1) + beta = prev_A[1] # P(A) + p = truecond_matrix[0, 1] # P(hat{A}=1|A=0) + q = truecond_matrix[1, 0] # P(hat{A}=0|A=1) + x = (1 - q) * beta + p * (1 - beta) + y = q * beta + (1 - p) * (1 - beta) + if x != 0 and y != 0: + corr = ((((1 - q) * beta) / x) - (q * beta / y)) + rND_estim = rND_estim * corr + return rND_estim +