many aggregative methods added

2020-12-03 18:12:28 +01:00 · 2020-12-03 18:12:28 +01:00 · a882424eeb
parent 78988c87f9
commit a882424eeb
10 changed files with 1095 additions and 1 deletions
--- a/TODO.txt
+++ b/TODO.txt
@ -1,2 +1,3 @@
 Documentation with sphinx
-The parallel training in svmperf seems not to work
+The parallel training in svmperf seems not to work
 Add "prepare svmperf for quantification" script
--- a/prepare_svmperf.sh
+++ b/prepare_svmperf.sh
@ -0,0 +1,28 @@
 #!/bin/bash
 set -x
 URL=http://download.joachims.org/svm_perf/current/svm_perf.tar.gz
 FILE=./svm_perf.tar.gz
 wget $URL $FILE
 mkdir ./svm_perf
 tar xvzf $FILE -C ./svm_perf
 rm $FILE
 #para crear el patch [para mi]
 #diff -Naur svm_perf svm_perf_quantification > svm-perf-quantification-ext.patch
 #para crear la modificacion
 #cp svm_perf svm_perf_quantification -r [ESTO NO HACE FALTA]
 patch -s -p0 < svm-perf-quantification-ext.patch
 mv svm_perf svm_perf_quantification
 cd svm_perf_quantification
 make
--- a/quapy/init.py
+++ b/quapy/init.py
@ -0,0 +1,6 @@
 from .dataset import *
 from . import functional
 from . import method
 from . import error
--- a/quapy/error.py
+++ b/quapy/error.py
@ -0,0 +1,47 @@
 from sklearn.metrics import f1_score
 from settings import SAMPLE_SIZE
 def f1e(y_true, y_pred):
    return 1. - f1_score(y_true, y_pred, average='macro')
 def acce(y_true, y_pred):
    acc = (y_true == y_pred).mean()
    return 1. - acc
 def mae(prevs, prevs_hat):
    return ae(prevs, prevs_hat).mean()
 def ae(p, p_hat):
    assert p.shape == p_hat.shape, 'wrong shape'
    return abs(p_hat-p).mean(axis=-1)
 def mrae(p, p_hat, eps=1./(2. * SAMPLE_SIZE)):
    return rae(p, p_hat, eps).mean()
 def rae(p, p_hat, eps=1./(2. * SAMPLE_SIZE)):
    p = smooth(p, eps)
    p_hat = smooth(p_hat, eps)
    return (abs(p-p_hat)/p).mean(axis=-1)
 def smooth(p, eps):
    n_classes = p.shape[-1]
    return (p+eps)/(eps*n_classes + 1)
 CLASSIFICATION_ERROR = {f1e, acce}
 QUANTIFICATION_ERROR = {mae, mrae}
 f1_error = f1e
 acc_error = acce
 mean_absolute_error = mae
 absolute_error = ae
 mean_relative_absolute_error = mrae
 relative_absolute_error = rae
--- a/quapy/functional.py
+++ b/quapy/functional.py
@ -0,0 +1,49 @@
 from collections import defaultdict
 import numpy as np
 import itertools
 def artificial_prevalence_sampling(dimensions, n_prevalences=21, repeat=1, return_constrained_dim=False):
    s = np.linspace(0., 1., n_prevalences, endpoint=True)
    s = [s] * (dimensions - 1)
    prevs = [p for p in itertools.product(*s, repeat=1) if sum(p)<=1]
    if return_constrained_dim:
        prevs = [p+(1-sum(p),) for p in prevs]
    prevs = np.asarray(prevs).reshape(len(prevs), -1)
    if repeat>1:
        prevs = np.repeat(prevs, repeat, axis=0)
    return prevs
 def prevalence_from_labels(labels, n_classes):
    unique, counts = np.unique(labels, return_counts=True)
    by_class = defaultdict(lambda:0, dict(zip(unique, counts)))
    prevalences = np.asarray([by_class[ci] for ci in range(n_classes)], dtype=np.float)
    prevalences /= prevalences.sum()
    return prevalences
 def prevalence_from_probabilities(posteriors, binarize: bool = False):
    if binarize:
        predictions = np.argmax(posteriors, axis=-1)
        return prevalence_from_labels(predictions, n_classes=posteriors.shape[1])
    else:
        prevalences = posteriors.mean(axis=0)
        prevalences /= prevalences.sum()
        return prevalences
 def strprev(prevalences, prec=3):
    return '['+ ', '.join([f'{p:.{prec}f}' for p in prevalences]) + ']'
 def adjusted_quantification(prevalence_estim, tpr, fpr, clip=True):
    den = tpr - fpr
    if den == 0:
        den += 1e-8
    adjusted = (prevalence_estim - fpr) / den
    if clip:
        adjusted = np.clip(adjusted, 0., 1.)
    return adjusted
--- a/quapy/method/init.py
+++ b/quapy/method/init.py
@ -0,0 +1,30 @@
 from . import aggregative as agg
 from . import non_aggregative as nagg
 AGGREGATIVE_METHODS = {
    agg.ClassifyAndCount,
    agg.AdjustedClassifyAndCount,
    agg.ProbabilisticClassifyAndCount,
    agg.ProbabilisticAdjustedClassifyAndCount,
    agg.ExplicitLossMinimisation,
    agg.ExpectationMaximizationQuantifier,
 }
 NON_AGGREGATIVE_METHODS = {
    nagg.MaximumLikelihoodPrevalenceEstimation
 }
 QUANTIFICATION_METHODS = AGGREGATIVE_METHODS | NON_AGGREGATIVE_METHODS
 # common alisases
 CC = agg.ClassifyAndCount
 ACC = agg.AdjustedClassifyAndCount
 PCC = agg.ProbabilisticClassifyAndCount
 PACC = agg.ProbabilisticAdjustedClassifyAndCount
 ELM = agg.ExplicitLossMinimisation
 EMQ = agg.ExpectationMaximizationQuantifier
 MLPE = nagg.MaximumLikelihoodPrevalenceEstimation
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -0,0 +1,351 @@
 import numpy as np
 from .base import *
 from ..error import mae
 import functional as F
 from ..classification.svmperf import SVMperf
 from ..dataset import LabelledCollection
 from sklearn.metrics import confusion_matrix
 from sklearn.calibration import CalibratedClassifierCV
 from joblib import Parallel, delayed
 # Abstract classes
 # ------------------------------------
 class AggregativeQuantifier(BaseQuantifier):
    """
    Abstract class for quantification methods that base their estimations on the aggregation of classification
    results. Aggregative Quantifiers thus implement a _classify_ method and maintain a _learner_ attribute.
    """
    @abstractmethod
    def fit(self, data: LabelledCollection, fit_learner=True, *args): ...
    def classify(self, documents):
        return self.learner.predict(documents)
    def get_params(self, deep=True):
        return self.learner.get_params()
    def set_params(self, **parameters):
        self.learner.set_params(**parameters)
    @property
    def n_classes(self):
        return len(self.classes)
    @property
    def classes(self):
        return self.learner.classes_
 class AggregativeProbabilisticQuantifier(AggregativeQuantifier):
    """
    Abstract class for quantification methods that base their estimations on the aggregation of posterior probabilities
    as returned by a probabilistic classifier. Aggregative Probabilistic Quantifiers thus extend Aggregative
    Quantifiersimplement by implementing a _soft_classify_ method returning values in [0,1] -- the posterior
    probabilities.
    """
    def soft_classify(self, data):
        return self.learner.predict_proba(data)
    def set_params(self, **parameters):
        if isinstance(self.learner, CalibratedClassifierCV):
            parameters={'base_estimator__'+k:v for k,v in parameters.items()}
        self.learner.set_params(**parameters)
 # Helper
 # ------------------------------------
 def training_helper(learner,
                    data: LabelledCollection,
                    fit_learner: bool = True,
                    ensure_probabilistic=False,
                    train_val_split=None):
    """
    Training procedure common to all Aggregative Quantifiers.
    :param learner: the learner to be fit
    :param data: the data on which to fit the learner. If requested, the data will be split before fitting the learner.
    :param fit_learner: whether or not to fit the learner
    :param ensure_probabilistic: if True, guarantees that the resulting classifier implements predict_proba (if the
    learner is not probabilistic, then a CalibratedCV instance of it is trained)
    :param train_val_split: if specified, indicates the proportion of training documents on which to fit the learner
    :return: the learner trained on the training set, and the unused data (a _LabelledCollection_ if train_val_split>0
    or None otherwise)
    """
    if fit_learner:
        if ensure_probabilistic:
            if not hasattr(learner, 'predict_proba'):
                print(f'The learner {learner.__class__.__name__} does not seem to be probabilistic. '
                      f'The learner will be calibrated.')
                learner = CalibratedClassifierCV(learner, cv=5)
        if train_val_split is not None:
            if not (0 < train_val_split < 1):
                raise ValueError(f'train/val split {train_val_split} out of range, must be in (0,1)')
            train, unused = data.split_stratified(train_prop=train_val_split)
        else:
            train, unused = data, None
        learner.fit(train.instances, train.labels)
    else:
        if ensure_probabilistic:
            if not hasattr(learner, 'predict_proba'):
                raise AssertionError('error: the learner cannot be calibrated since fit_learner is set to False')
        unused = data
    return learner, unused
 # Methods
 # ------------------------------------
 class ClassifyAndCount(AggregativeQuantifier):
    """
    The most basic Quantification method. One that simply classifies all instances and countes how many have been
    attributed each of the classes in order to compute class prevalence estimates.
    """
    def __init__(self, learner):
        self.learner = learner
    def fit(self, data: LabelledCollection, fit_learner=True, *args):
        """
        Trains the Classify & Count method unless _fit_learner_ is False, in which case it is assumed to be already fit.
        :param data: training data
        :param fit_learner: if False, the classifier is assumed to be fit
        :param args: unused
        :return: self
        """
        self.learner, _ = training_helper(self.learner, data, fit_learner)
        return self
    def quantify(self, documents, *args):
        classification = self.classify(documents)  # classify
        return F.prevalence_from_labels(classification, self.n_classes)  # & count
 class AdjustedClassifyAndCount(AggregativeQuantifier):
    def __init__(self, learner):
        self.learner = learner
    def fit(self, data: LabelledCollection, fit_learner=True, train_val_split=0.6):
        self.learner, validation = training_helper(self.learner, data, fit_learner, train_val_split=train_val_split)
        self.cc = ClassifyAndCount(self.learner)
        y_ = self.cc.classify(validation.instances)
        y  = validation.labels
        # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
        # document that belongs to yj ends up being classified as belonging to yi
        self.Pte_cond_estim_ = confusion_matrix(y,y_).T / validation.counts()
        return self
    def quantify(self, documents, *args):
        prevs_estim = self.cc.quantify(documents)
        # solve for the linear system Ax = B with A=Pte_cond_estim and B = prevs_estim
        A = self.Pte_cond_estim_
        B = prevs_estim
        try:
            adjusted_prevs = np.linalg.solve(A, B)
            adjusted_prevs = np.clip(adjusted_prevs, 0, 1)
            adjusted_prevs /= adjusted_prevs.sum()
        except np.linalg.LinAlgError:
            adjusted_prevs = prevs_estim  # no way to adjust them!
        return adjusted_prevs
    def classify(self, data):
        return self.cc.classify(data)
 class ProbabilisticClassifyAndCount(AggregativeProbabilisticQuantifier):
    def __init__(self, learner):
        self.learner = learner
    def fit(self, data : LabelledCollection, fit_learner=True, *args):
        self.learner, _ = training_helper(self.learner, data, fit_learner, ensure_probabilistic=True)
        return self
    def quantify(self, documents, *args):
        posteriors = self.soft_classify(documents)                        # classify
        prevalences = F.prevalence_from_probabilities(posteriors, binarize=False)  # & count
        return prevalences
 class ProbabilisticAdjustedClassifyAndCount(AggregativeQuantifier):
    def __init__(self, learner):
        self.learner = learner
    def fit(self, data: LabelledCollection, fit_learner=True, train_val_split=0.6):
        self.learner, validation = training_helper(
            self.learner, data, fit_learner, ensure_probabilistic=True, train_val_split=train_val_split
        )
        self.pcc = ProbabilisticClassifyAndCount(self.learner)
        y_ = self.pcc.classify(validation.instances)
        y = validation.labels
        # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
        # document that belongs to yj ends up being classified as belonging to yi
        self.Pte_cond_estim_ = confusion_matrix(y, y_).T / validation.counts()
        return self
    def quantify(self, documents, *args):
        prevs_estim = self.pcc.quantify(documents)
        A = self.Pte_cond_estim_
        B = prevs_estim
        try:
            adjusted_prevs = np.linalg.solve(A, B)
            adjusted_prevs = np.clip(adjusted_prevs, 0, 1)
            adjusted_prevs /= adjusted_prevs.sum()
        except np.linalg.LinAlgError:
            adjusted_prevs = prevs_estim  # no way to adjust them!
        return adjusted_prevs
    def classify(self, data):
        return self.pcc.classify(data)
 class ExpectationMaximizationQuantifier(AggregativeProbabilisticQuantifier):
    MAX_ITER = 1000
    EPSILON = 1e-4
    def __init__(self, learner, verbose=False):
        self.learner = learner
        self.verbose = verbose
    def fit(self, data: LabelledCollection, fit_learner=True, *args):
        self.learner, _ = training_helper(self.learner, data, fit_learner, ensure_probabilistic=True)
        self.train_prevalence = F.prevalence_from_labels(data.labels, self.n_classes)
        return self
    def quantify(self, X, epsilon=EPSILON):
        tr_prev=self.train_prevalence
        posteriors = self.soft_classify(X)
        return self.EM(tr_prev, posteriors, self.verbose, epsilon)
    @classmethod
    def EM(cls, tr_prev, posterior_probabilities, verbose=False, epsilon=EPSILON):
        Px = posterior_probabilities
        Ptr = np.copy(tr_prev)
        qs = np.copy(Ptr)  # qs (the running estimate) is initialized as the training prevalence
        s, converged = 0, False
        qs_prev_ = None
        while not converged and s < ExpectationMaximizationQuantifier.MAX_ITER:
            # E-step: ps is Ps(y=+1|xi)
            ps_unnormalized = (qs / Ptr) * Px
            ps = ps_unnormalized / ps_unnormalized.sum(axis=1).reshape(-1,1)
            # M-step: qs_pos is Ps+1(y=+1)
            qs = ps.mean(axis=0)
            if qs_prev_ is not None and mae(qs, qs_prev_) < epsilon and s>10:
                converged = True
            qs_prev_ = qs
            s += 1
        if verbose:
            print('-'*80)
        if not converged:
            raise UserWarning('the method has reached the maximum number of iterations; it might have not converged')
        return qs
 # todo: from here
 def train_task(c, learners, data):
    learners[c].fit(data.documents, data.labels == c)
 def binary_quant_task(c, learners, X):
    predictions_ci = learners[c].predict(X)
    return predictions_ci.mean()  # since the predictions array is binary
 class OneVsAllELM(AggregativeQuantifier):
    def __init__(self, svmperf_base, loss, n_jobs=-1, **kwargs):
        self.svmperf_base = svmperf_base
        self.loss = loss
        self.n_jobs = n_jobs
        self.kwargs = kwargs
    def fit(self, data: LabelledCollection, fit_learner=True, *args):
        assert fit_learner, 'the method requires that fit_learner=True'
        self.learners = {c: SVMperf(self.svmperf_base, loss=self.loss, **self.kwargs) for c in data.classes_}
        Parallel(n_jobs=self.n_jobs, backend='threading')(
            delayed(train_task)(c, self.learners, data) for c in self.learners.keys()
        )
        return self
    def quantify(self, X, y=None):
        prevalences = np.asarray(
            Parallel(n_jobs=self.n_jobs, backend='threading')(
                delayed(binary_quant_task)(c, self.learners, X) for c in self.learners.keys()
            )
        )
        prevalences /= prevalences.sum()
        return prevalences
    @property
    def classes(self):
        return sorted(self.learners.keys())
    def preclassify_collection(self, data: LabelledCollection):
        classifications = []
        for class_ in data.classes_:
            classifications.append(self.learners[class_].predict(data.instances))
        classifications = np.vstack(classifications).T
        precomputed = LabelledCollection(classifications, data.labels)
        return precomputed
    def set_params(self, **parameters):
        self.kwargs=parameters
    def get_params(self, deep=True):
        return self.kwargs
 class ExplicitLossMinimisation(AggregativeQuantifier):
    def __init__(self, svmperf_base, loss, **kwargs):
        self.learner = SVMperf(svmperf_base, loss=loss, **kwargs)
    def fit(self, data: LabelledCollection, fit_learner=True, *args):
        assert fit_learner, 'the method requires that fit_learner=True'
        self.learner.fit(data.instances, data.labels)
        return self
    def quantify(self, X, y=None):
        predictions = self.learner.predict(X)
        return F.prevalence_from_labels(predictions, self.learner.n_classes_)
    def classify(self, X, y=None):
        return self.learner.predict(X)
 class SVMQ(ExplicitLossMinimisation):
    def __init__(self, svmperf_base, **kwargs):
        super(SVMQ, self).__init__(svmperf_base, loss='q', **kwargs)
 class SVMKLD(ExplicitLossMinimisation):
    def __init__(self, svmperf_base, **kwargs):
        super(SVMKLD, self).__init__(svmperf_base, loss='kld', **kwargs)
 class SVMNKLD(ExplicitLossMinimisation):
    def __init__(self, svmperf_base, **kwargs):
        super(SVMNKLD, self).__init__(svmperf_base, loss='nkld', **kwargs)
 class SVMAE(ExplicitLossMinimisation):
    def __init__(self, svmperf_base, **kwargs):
        super(SVMAE, self).__init__(svmperf_base, loss='mae', **kwargs)
 class SVMRAE(ExplicitLossMinimisation):
    def __init__(self, svmperf_base, **kwargs):
        super(SVMRAE, self).__init__(svmperf_base, loss='mrae', **kwargs)
--- a/quapy/method/base.py
+++ b/quapy/method/base.py
@ -0,0 +1,21 @@
 from abc import ABCMeta, abstractmethod
 import quapy as qp
 # Base Quantifier abstract class
 # ------------------------------------
 class BaseQuantifier(metaclass=ABCMeta):
    @abstractmethod
    def fit(self, data: qp.LabelledCollection, *args): ...
    @abstractmethod
    def quantify(self, documents, *args): ...
    @abstractmethod
    def set_params(self, **parameters): ...
    @abstractmethod
    def get_params(self, deep=True): ...
--- a/quapy/method/non_aggregative.py
+++ b/quapy/method/non_aggregative.py
@ -0,0 +1,21 @@
 from quapy import LabelledCollection
 from .base import BaseQuantifier
 class MaximumLikelihoodPrevalenceEstimation(BaseQuantifier):
    def __init__(self, **kwargs):
        pass
    def fit(self, data: LabelledCollection, *args):
        self.estimated_prevalence = data.prevalence()
    def quantify(self, documents, *args):
        return self.estimated_prevalence
    def get_params(self):
        pass
    def set_params(self, **parameters):
        pass
--- a/svm-perf-quantification-ext.patch
+++ b/svm-perf-quantification-ext.patch
@ -0,0 +1,540 @@
 diff -ruN svm_perf/svm_struct/svm_struct_main.c svm_perf_quantification/svm_struct/svm_struct_main.c
 --- svm_perf/svm_struct/svm_struct_main.c	2020-10-28 12:23:19.000000000 +0100
 +++ svm_perf_quantification/svm_struct/svm_struct_main.c	2020-10-28 12:23:53.000000000 +0100
@@ -128,7 +128,8 @@
   struct_parm->newconstretrain=100;
   struct_parm->ccache_size=5;
   struct_parm->batch_size=100;
 -
 +  struct_parm->loss_parm=1.0;
 +  struct_parm->beta=1.0; // AIC-QBETA
   strcpy (modelfile, "svm_struct_model");
   strcpy (learn_parm->predfile, "trans_predictions");
   strcpy (learn_parm->alphafile, "");
@@ -170,6 +171,7 @@
       case 'p': i++; struct_parm->slack_norm=atol(argv[i]); break;
       case 'e': i++; struct_parm->epsilon=atof(argv[i]); break;
       case 'k': i++; struct_parm->newconstretrain=atol(argv[i]); break;
 +      case 'j': i++; struct_parm->beta=atof(argv[i]); break; // AIC-QBETA
       case 'h': i++; learn_parm->svm_iter_to_shrink=atol(argv[i]); break;
       case '#': i++; learn_parm->maxiter=atol(argv[i]); break;
       case 'm': i++; learn_parm->kernel_cache_size=atol(argv[i]); break;
@@ -189,6 +191,7 @@
       case '-': strcpy(struct_parm->custom_argv[struct_parm->custom_argc++],argv[i]);i++; strcpy(struct_parm->custom_argv[struct_parm->custom_argc++],argv[i]);break; 
       case 'v': i++; (*struct_verbosity)=atol(argv[i]); break;
       case 'y': i++; (*verbosity)=atol(argv[i]); break;
 +      case '!': i++; struct_parm->loss_parm=atof(argv[i]); break;
       default: printf("\nUnrecognized option %s!\n\n",argv[i]);
 	       print_help();
 	       exit(0);
@@ -396,6 +399,9 @@
   printf("                        (in the same order as in the training set)\n");
   printf("Application-Specific Options:\n");
   print_struct_help();
 +  printf("*************************************************\n"); // AIC-QBETA
 +  printf("         -j float    -> parameter beta for qbeta-based loss functions (default: 1.0)\n");
 +  printf("*************************************************\n");
   wait_any_key();
   printf("\nMore details in:\n");
 diff -ruN svm_perf/svm_struct_api.c svm_perf_quantification/svm_struct_api.c
 --- svm_perf/svm_struct_api.c	2020-10-28 12:23:19.000000000 +0100
 +++ svm_perf_quantification/svm_struct_api.c	2020-10-28 12:23:53.000000000 +0100
@@ -20,6 +20,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 +#include <math.h>
 #include "svm_struct_api.h"
 #include "svm_light/svm_common.h"
 #include "svm_struct/svm_struct_common.h"
@@ -27,7 +28,9 @@
 #define MAX(x,y)      ((x) < (y) ? (y) : (x))
 #define MIN(x,y)      ((x) > (y) ? (y) : (x))
 +#define ABS(x)      ((x) < (0) ? (-(x)) : (x))
 #define SIGN(x)       ((x) > (0) ? (1) : (((x) < (0) ? (-1) : (0))))
 +#define PI (3.141592653589793)
 int compareup(const void *a, const void *b) 
 {
@@ -72,6 +75,16 @@
 double rocarea(LABEL y, LABEL ybar);
 double prbep(LABEL y, LABEL ybar);
 double avgprec(LABEL y, LABEL ybar);
 +/* AIC-QBETA */
 +double gm(int a, int b, int c, int d);
 +double nae(int a, int b, int c, int d);
 +double ae(int a, int b, int c, int d);
 +double rae(int a, int b, int c, int d);
 +double Qbeta(int a, int b, int c, int d, double beta);
 +double Qbeta_acc(int a, int b, int c, int d, double beta);
 +double Qbeta_f1(int a, int b, int c, int d, double beta);
 +double Qbeta_gm(int a, int b, int c, int d, double beta);
 +/* AIC-QBETA */
 double zeroone_loss(int a, int b, int c, int d);
 double fone_loss(int a, int b, int c, int d);
@@ -82,6 +95,23 @@
 double swappedpairs_loss(LABEL y, LABEL ybar);
 double avgprec_loss(LABEL y, LABEL ybar);
 +double kldiv(int a, int b, int c, int d); // KLD
 +double kldiv_loss(int a, int b, int c, int d); // KLD
 +double nkldiv_loss(int a, int b, int c, int d); // KLD
 +
 +double milli_loss(int a, int b, int c, int d); //MILL
 +
 +/* AIC-QBETA */
 +double gm_loss(int a, int b, int c, int d);
 +double nae_loss(int a, int b, int c, int d);
 +double ae_loss(int a, int b, int c, int d);
 +double rae_loss(int a, int b, int c, int d);
 +double Qbeta_loss(int a, int b, int c, int d, double beta);
 +double Qbeta_acc_loss(int a, int b, int c, int d, double beta);
 +double Qbeta_f1_loss(int a, int b, int c, int d, double beta);
 +double Qbeta_gm_loss(int a, int b, int c, int d, double beta);
 +/* AIC-QBETA */
 +
 void        svm_struct_learn_api_init(int argc, char* argv[])
 {
   /* Called in learning part before anything else is done to allow
@@ -181,10 +211,22 @@
   /* change label value for better scaling using thresholdmetrics */
   if((sparm->loss_function == ZEROONE) 
      || (sparm->loss_function == FONE) 
 +	 || (sparm->loss_function == GM) // AIC-QBETA 
 +	 || (sparm->loss_function == NAE) // AIC-QBETA 
 +	 || (sparm->loss_function == AE) // AIC-QBETA 
 +	 || (sparm->loss_function == RAE) // AIC-QBETA 
 +	 || (sparm->loss_function == QBETA) // AIC-QBETA 
 +	 || (sparm->loss_function == QBETA_ACC) // AIC-QBETA
 +	 || (sparm->loss_function == QBETA_F1) // AIC-QBETA 
 +	 || (sparm->loss_function == QBETA_GM) // AIC-QBETA
      || (sparm->loss_function == ERRORRATE)
      || (sparm->loss_function == PRBEP) 
      || (sparm->loss_function == PREC_K) 
 -     || (sparm->loss_function == REC_K)) {
 +     || (sparm->loss_function == REC_K)
 +     || (sparm->loss_function == KLD)
 +     || (sparm->loss_function == NKLD)
 +     || (sparm->loss_function == MILLI)
 +     ) {
     for(i=0;i<sample.examples[0].x.totdoc;i++) {
       if(sample.examples[0].y.class[i]>0)
 	sample.examples[0].y.class[i]=0.5*100.0/(numn+nump);
@@ -520,10 +562,22 @@
   LABEL ybar;
   if((sparm->loss_function == ZEROONE) 
      || (sparm->loss_function == FONE) 
 +	 || (sparm->loss_function == GM) // AIC-QBETA 
 +	 || (sparm->loss_function == NAE) // AIC-QBETA
 +	 || (sparm->loss_function == AE) // AIC-QBETA 
 +	 || (sparm->loss_function == RAE) // AIC-QBETA
 +	 || (sparm->loss_function == QBETA) // AIC-QBETA 
 +	 || (sparm->loss_function == QBETA_ACC) // AIC-QBETA
 +	 || (sparm->loss_function == QBETA_F1) // AIC-QBETA 
 +	 || (sparm->loss_function == QBETA_GM) // AIC-QBETA
      || (sparm->loss_function == ERRORRATE)
      || (sparm->loss_function == PRBEP) 
      || (sparm->loss_function == PREC_K) 
 -     || (sparm->loss_function == REC_K)) {
 +     || (sparm->loss_function == REC_K)
 +     || (sparm->loss_function == KLD)
 +     || (sparm->loss_function == NKLD)
 +     || (sparm->loss_function == MILLI)
 +     ) {
     ybar=find_most_violated_constraint_thresholdmetric(x,y,sm,sparm,
 						       sparm->loss_type);
   }
@@ -562,9 +616,21 @@
      sparm->loss_type); */
   else if((sparm->loss_function == ZEROONE) 
      || (sparm->loss_function == FONE) 
 +	 || (sparm->loss_function == GM) // AIC-QBETA 
 +	 || (sparm->loss_function == NAE) // AIC-QBETA 
 +	 || (sparm->loss_function == AE) // AIC-QBETA 
 +	 || (sparm->loss_function == RAE) // AIC-QBETA 
 +	 || (sparm->loss_function == QBETA) // AIC-QBETA 
 +	 || (sparm->loss_function == QBETA_ACC) // AIC-QBETA
 +	 || (sparm->loss_function == QBETA_F1) // AIC-QBETA 
 +	 || (sparm->loss_function == QBETA_GM) // AIC-QBETA
      || (sparm->loss_function == PRBEP) 
      || (sparm->loss_function == PREC_K) 
 -     || (sparm->loss_function == REC_K)) 
 +     || (sparm->loss_function == REC_K)
 +     || (sparm->loss_function == KLD)
 +     || (sparm->loss_function == NKLD)
 +     || (sparm->loss_function == MILLI)
 +	  ) 
     ybar=find_most_violated_constraint_thresholdmetric(x,y,sm,sparm,
 						       sparm->loss_type);
   else if((sparm->loss_function == SWAPPEDPAIRS))
@@ -741,7 +807,23 @@
       if(sparm->loss_function == ZEROONE)
 	loss=zeroone_loss(a,numn-d,nump-a,d);
       else if(sparm->loss_function == FONE)
 -	loss=fone_loss(a,numn-d,nump-a,d);
 +		  loss=fone_loss(a,numn-d,nump-a,d);
 +	  else if(sparm->loss_function == GM)  // AIC-QBETA
 +		  loss=gm_loss(a,numn-d,nump-a,d);
 +	  else if(sparm->loss_function == NAE)  // AIC-QBETA
 +		  loss=nae_loss(a,numn-d,nump-a,d);
 +	  else if(sparm->loss_function == AE)  // AIC-QBETA
 +		  loss=ae_loss(a,numn-d,nump-a,d);
 +	  else if(sparm->loss_function == RAE)  // AIC-QBETA
 +		  loss=rae_loss(a,numn-d,nump-a,d);
 +	  else if(sparm->loss_function == QBETA)  // AIC-QBETA
 +		  loss=Qbeta_loss(a,numn-d,nump-a,d,sparm->beta);
 +	  else if(sparm->loss_function == QBETA_ACC)  // AIC-QBETA
 +		  loss=Qbeta_acc_loss(a,numn-d,nump-a,d,sparm->beta);
 +	  else if(sparm->loss_function == QBETA_F1)  // AIC-QBETA
 +		  loss=Qbeta_f1_loss(a,numn-d,nump-a,d,sparm->beta);
 +	  else if(sparm->loss_function == QBETA_GM)  // AIC-QBETA
 +		  loss=Qbeta_gm_loss(a,numn-d,nump-a,d,sparm->beta);
       else if(sparm->loss_function == ERRORRATE)
 	loss=errorrate_loss(a,numn-d,nump-a,d);
       else if((sparm->loss_function == PRBEP) && (a+numn-d == nump))
@@ -750,6 +832,12 @@
 	loss=prec_k_loss(a,numn-d,nump-a,d);
       else if((sparm->loss_function == REC_K) && (a+numn-d <= prec_rec_k)) 
 	loss=rec_k_loss(a,numn-d,nump-a,d);
 +      else if(sparm->loss_function == KLD) //KLD
 +	loss=kldiv_loss(a,numn-d,nump-a,d); //KLD
 +      else if(sparm->loss_function == NKLD) //KLD
 +	loss=nkldiv_loss(a,numn-d,nump-a,d); //KLD
 +      else if(sparm->loss_function == MILLI) //MILLI
 +	loss=milli_loss(a,numn-d,nump-a,d); //MILLI
       else {
 	loss=0;
       }
@@ -1213,6 +1301,7 @@
     }
     /* printf("%f %f\n",y.class[i],ybar.class[i]); */
   }
 +	//printf("********** loss %d (a,b,c,d) (%d,%d,%d,%d) beta=%f\n",sparm->loss_function,a,b,c,d,sparm->beta);
   /* Return the loss according to the selected loss function. */
   if(sparm->loss_function == ZEROONE) { /* type 0 loss: 0/1 loss */
                                   /* return 0, if y==ybar. return 1 else */
@@ -1221,6 +1310,30 @@
   else if(sparm->loss_function == FONE) {
     loss=fone_loss(a,b,c,d);
   }
 +  else if(sparm->loss_function == GM) { // AIC-QBETA
 +	  loss=gm_loss(a,b,c,d);
 +  }
 +  else if(sparm->loss_function == NAE) { // AIC-QBETA
 +	  loss=nae_loss(a,b,c,d);
 +  }
 +  else if(sparm->loss_function == AE) { // AIC-QBETA
 +	  loss=ae_loss(a,b,c,d);
 +  }
 +  else if(sparm->loss_function == RAE) { // AIC-QBETA
 +	  loss=rae_loss(a,b,c,d);
 +  }
 +  else if(sparm->loss_function == QBETA) { // AIC-QBETA
 +	  loss=Qbeta_loss(a,b,c,d,sparm->beta);
 +  }
 +  else if(sparm->loss_function == QBETA_ACC) { // AIC-QBETA
 +	  loss=Qbeta_acc_loss(a,b,c,d,sparm->beta);
 +  }
 +  else if(sparm->loss_function == QBETA_F1) { // AIC-QBETA
 +	  loss=Qbeta_f1_loss(a,b,c,d,sparm->beta);
 +  }
 +  else if(sparm->loss_function == QBETA_GM) { // AIC-QBETA
 +	  loss=Qbeta_gm_loss(a,b,c,d,sparm->beta);
 +  }
   else if(sparm->loss_function == ERRORRATE) {
     loss=errorrate_loss(a,b,c,d);
   }
@@ -1242,6 +1355,15 @@
   else if(sparm->loss_function == AVGPREC) {
     loss=avgprec_loss(y,ybar);
   }
 +  else if(sparm->loss_function == KLD) { //KLD
 +    loss=kldiv_loss(a,b,c,d); //KLD
 +  } //KLD
 +  else if(sparm->loss_function == NKLD) { //KLD
 +    loss=nkldiv_loss(a,b,c,d); //KLD
 +  } //KLD
 +  else if(sparm->loss_function == MILLI) { //MILLI
 +    loss=milli_loss(a,b,c,d); //MILLI
 +  } //MILLI
   else {
     /* Put your code for different loss functions here. But then
        find_most_violated_constraint_???(x, y, sm) has to return the
@@ -1320,6 +1442,16 @@
     printf("PRBEP    : %5.2f\n",teststats->prbep);
     printf("ROCArea  : %5.2f\n",teststats->rocarea);
     printf("AvgPrec  : %5.2f\n",teststats->avgprec);
 +	printf("Qb       : %5.2f\n",teststats->Qbeta);
 +	printf("Qb (Acc) : %5.2f\n",teststats->Qbeta_acc);
 +	printf("Qb (F1)  : %5.2f\n",teststats->Qbeta_f1);
 +	printf("Qb (GM)  : %5.2f\n",teststats->Qbeta_gm);
 +	printf("NAE      : %5.2f\n",teststats->nae);
 +	printf("AE       : %5.2f\n",teststats->ae);
 +	printf("RAE      : %5.2f\n",teststats->rae);
 +    printf("GM       : %5.2f\n",teststats->gm);
 +    printf("KLD       : %5.2f\n",teststats->kld);
 +    printf("NKLD       : %5.2f\n",teststats->nkld);
   }
   else {
     printf("NOTE: %ld test examples are unlabeled, so performance cannot be computed. The\n",teststats->test_data_unlabeled);
@@ -1352,6 +1484,29 @@
     teststats->recall=100.0-loss(ex.y,ypred,sparm);
     sparm->loss_function=FONE;
     teststats->fone=100.0-loss(ex.y,ypred,sparm);
 +	  
 +    sparm->loss_function=GM;  // AIC-QBETA
 +    teststats->gm=100.0-loss(ex.y,ypred,sparm);
 +    sparm->loss_function=NAE;  // AIC-QBETA
 +    teststats->nae=100.0-loss(ex.y,ypred,sparm);
 +    sparm->loss_function=AE;  // AIC-QBETA
 +    teststats->ae=100.0-loss(ex.y,ypred,sparm);
 +    sparm->loss_function=RAE;  // AIC-QBETA
 +    teststats->rae=100.0-loss(ex.y,ypred,sparm);
 +    sparm->loss_function=QBETA;  // AIC-QBETA
 +    teststats->Qbeta=100.0-loss(ex.y,ypred,sparm);
 +    sparm->loss_function=QBETA_ACC;  // AIC-QBETA
 +    teststats->Qbeta_acc=100.0-loss(ex.y,ypred,sparm);
 +    sparm->loss_function=QBETA_F1;  // AIC-QBETA
 +    teststats->Qbeta_f1=100.0-loss(ex.y,ypred,sparm);
 +    sparm->loss_function=QBETA_GM;  // AIC-QBETA
 +    teststats->Qbeta_gm=100.0-loss(ex.y,ypred,sparm);
 +
 +    sparm->loss_function=KLD; // KLD
 +    teststats->kld=100-loss(ex.y,ypred,sparm);
 +    sparm->loss_function=NKLD; // KLD
 +    teststats->nkld=100.0-loss(ex.y,ypred,sparm);
 +	
     teststats->prbep=prbep(ex.y,ypred);
     teststats->rocarea=rocarea(ex.y,ypred);
     teststats->avgprec=avgprec(ex.y,ypred);
@@ -1403,6 +1558,7 @@
   STRUCTMODEL sm;
   sm.svm_model=read_model(file);
 +	sparm->beta = 1;                      // AIC-QBETA *****************************
   sparm->loss_function=ERRORRATE;
   sparm->bias=0;
   sparm->bias_featurenum=0;
@@ -1514,6 +1670,18 @@
   printf("    %2d  Prec@k: 100 minus precision at k in percent.\n",PREC_K);
   printf("    %2d  Rec@k: 100 minus recall at k in percent.\n",REC_K);
   printf("    %2d  ROCArea: Percentage of swapped pos/neg pairs (i.e. 100 - ROCArea).\n\n",SWAPPEDPAIRS);
 +  printf("    %2d  Kullback-Leibler divergence.\n",KLD); //KLD
 +  printf("    %2d  Normalized Kullback-Leibler divergence.\n",NKLD); //KLD
 +  printf("    %2d  MILLI.\n",MILLI); //MILLI
 +  printf("    %2d  GM: geometric mean of tpr and tnr\n",GM); // AIC-QBETA
 +  printf("    %2d  NAE: normalized absolute error (Esuli & Sebastiani)\n",NAE); // AIC-QBETA
 +  printf("    %2d  AE: absolute error (Esuli & Sebastiani)\n",AE); // AIC-QBETA
 +  printf("    %2d  RAE: relative absolute error (Esuli & Sebastiani)\n",RAE); // AIC-QBETA
 +  printf("    %2d  Qbeta: 100 minus the Qbeta-score in percent (with recall)\n",QBETA); // AIC-QBETA
 +  printf("    %2d  Qbeta_acc: 100 minus the Qbeta-score in percent (with acc)\n",QBETA_ACC); // AIC-QBETA
 +  printf("    %2d  Qbeta_f1: 100 minus the Qbeta-score in percent (with F1)\n",QBETA_F1); // AIC-QBETA
 +  printf("    %2d  Qbeta_gm: 100 minus the Qbeta-score in percent (with GM)\n",QBETA_GM); // AIC-QBETA
 +	
   printf("NOTE: The '-c' parameters in SVM-light and SVM-perf are related as\n");
   printf("      c_light = c_perf*100/n for the 'Errorrate' loss function, where n is the\n");
   printf("      number of training examples.\n\n");
@@ -1785,7 +1953,65 @@
   free(predset);
   return(100.0*(apr/(double)(nump)));
 }
 -
 +/* AIC-QBETA */
 +double tnr(int a, int b, int c, int d) 
 +{
 +	/* Returns tnr as fractional value. */
 +	if((b+d) == 0) return(0.0);
 +	return((double)d/(double)(b+d));
 +}
 +double gm(int a, int b, int c, int d) 
 +{
 +	double tprate = rec(a,b,c,d);
 +	double tnrate = tnr(a,b,c,d);
 +	return sqrt( tprate * tnrate );
 +}
 +double nae(int a, int b, int c, int d) 
 +{
 +	double maximo = (a+c > b+d? a+c: b+d);
 +	return 1.0 - ( (double)abs(c-b) / maximo);
 +	//return 1.0 - ( (double)abs(c-b) / (double)(a+b+c+d)); // ABSERR
 +}
 +double ae(int a, int b, int c, int d) 
 +{
 +	return (double)abs(c-b) / (double)(a+b+c+d) ; // ABSERR
 +}
 +double rae(int a, int b, int c, int d) 
 +{
 +	double absdif = (double)abs(c-b) ;
 +	double smooth_rae_pos = absdif / ((double)(a+c+0.5)) ;
 +	double smooth_rae_neg = absdif / ((double)(b+d+0.5)) ;
 +	return 0.5*(smooth_rae_pos + smooth_rae_neg) ; 
 +}
 +double Qbeta(int a, int b, int c, int d, double beta) 
 +{
 +	if(a+c == 0) return(0.0);
 +	double qperf=nae(a,b,c,d);
 +	double cperf=rec(a,b,c,d);
 +	return((1+beta*beta)*qperf*cperf/((beta*beta*cperf)+qperf));
 +}
 +double Qbeta_acc(int a, int b, int c, int d, double beta) 
 +{
 +	if(a+c == 0) return(0.0);
 +	double qperf=nae(a,b,c,d);
 +	double cperf=1.0-errorrate(a,b,c,d);
 +	return((1+beta*beta)*qperf*cperf/((beta*beta*cperf)+qperf));
 +}
 +double Qbeta_f1(int a, int b, int c, int d, double beta) 
 +{
 +	if(a+c == 0) return(0.0);
 +	double qperf=nae(a,b,c,d);
 +	double cperf=fone(a,b,c,d);
 +	return((1+beta*beta)*qperf*cperf/((beta*beta*cperf)+qperf));
 +}
 +double Qbeta_gm(int a, int b, int c, int d, double beta) 
 +{
 +	if(a+c == 0) return(0.0);
 +	double qperf=nae(a,b,c,d);
 +	double cperf=gm(a,b,c,d);
 +	return((1+beta*beta)*qperf*cperf/((beta*beta*cperf)+qperf));
 +}
 +/* AIC-QBETA */
 /*------- Loss functions based on performance measures --------*/
 double zeroone_loss(int a, int b, int c, int d) 
@@ -1846,4 +2072,70 @@
 }
 +//KLD
 +double kldiv(int a, int b, int c, int d)
 +{
 +  double sum = a+b+c+d+1.0;
 +  double pab = (a+b+0.5)/sum;
 +  double pac = (a+c+0.5)/sum;
 +  double pbd = (b+d+0.5)/sum;
 +  double pcd = (c+d+0.5)/sum;
 +
 +  double kl = pac*log(pac/pab)+pbd*log(pbd/pcd);
 +
 +  return kl;
 +}
 +
 +//KLD
 +double kldiv_loss(int a, int b, int c, int d)
 +{
 +  return kldiv(a,b,c,d);
 +}
 +
 +//KLD
 +double nkldiv_loss(int a, int b, int c, int d)
 +{
 +  return 100.0-(100.0*2.0/(1.0+exp(kldiv(a,b,c,d))));
 +}
 +
 +//MILLI
 +double milli_loss(int a, int b, int c, int d)
 +{
 +  int sum = a+b+c+d;
 +  return 100.0*(b+c)*ABS(b-c);
 +}
 +/* AIC-QBETA */
 +double gm_loss(int a, int b, int c, int d) 
 +{	
 +	return  100.0 * (1.0-gm(a,b,c,d));
 +}
 +double nae_loss(int a, int b, int c, int d) 
 +{	
 +	return  100.0 * (1.0-nae(a,b,c,d));
 +}
 +double ae_loss(int a, int b, int c, int d) 
 +{	
 +	return  100.0 * ae(a,b,c,d);
 +}
 +double rae_loss(int a, int b, int c, int d) 
 +{	
 +	return  100.0 * rae(a,b,c,d);
 +}
 +double Qbeta_loss(int a, int b, int c, int d,double beta) 
 +{
 +	return(100.0*(1.0-Qbeta(a,b,c,d,beta)));
 +}
 +double Qbeta_acc_loss(int a, int b, int c, int d,double beta) 
 +{
 +	return(100.0*(1.0-Qbeta_acc(a,b,c,d,beta)));
 +}
 +double Qbeta_f1_loss(int a, int b, int c, int d,double beta) 
 +{
 +	return(100.0*(1.0-Qbeta_f1(a,b,c,d,beta)));
 +}
 +double Qbeta_gm_loss(int a, int b, int c, int d,double beta) 
 +{
 +	return(100.0*(1.0-Qbeta_gm(a,b,c,d,beta)));
 +}
 +/* AIC-QBETA */
 diff -ruN svm_perf/svm_struct_api_types.h svm_perf_quantification/svm_struct_api_types.h
 --- svm_perf/svm_struct_api_types.h	2020-10-28 12:23:19.000000000 +0100
 +++ svm_perf_quantification/svm_struct_api_types.h	2020-10-28 12:23:53.000000000 +0100
@@ -28,14 +28,25 @@
 # define INST_VERSION_DATE  "15.07.2009"
 /* Identifiers for loss functions */
 -#define ZEROONE      0
 -#define FONE         1
 -#define ERRORRATE    2
 -#define PRBEP        3
 -#define PREC_K       4
 -#define REC_K        5
 -#define SWAPPEDPAIRS 10
 -#define AVGPREC      11
 +#define ZEROONE       0
 +#define FONE          1
 +#define ERRORRATE     2
 +#define PRBEP         3
 +#define PREC_K        4
 +#define REC_K         5
 +#define SWAPPEDPAIRS  10
 +#define AVGPREC       11
 +#define KLD         12 //KLD
 +#define NKLD         13 //KLD
 +#define MILLI         16 //MILLI
 +#define GM			 20 // AIC-QBETA
 +#define NAE			 21 // AIC-QBETA
 +#define QBETA        22 // AIC-QBETA
 +#define QBETA_ACC    23 // AIC-QBETA
 +#define QBETA_F1     24 // AIC-QBETA
 +#define QBETA_GM     25 // AIC-QBETA
 +#define AE			 26 // AIC-QBETA
 +#define RAE			 27 // AIC-QBETA
 /* default precision for solving the optimization problem */
 # define DEFAULT_EPS         0.1 
@@ -169,6 +180,8 @@
 				  svm_perf_classify. This uses more
 				  memory, but is faster if the support
 				  vectors in the model are dense. */
 +  double loss_parm;
 +  double beta; /* AIC-QBETA */
 } STRUCT_LEARN_PARM;
 typedef struct struct_test_stats {
@@ -183,6 +196,16 @@
   double prbep;
   double rocarea;
   double avgprec;
 +  double kld; //KLD
 +  double nkld; //KLD
 +  double gm; // AIC-QBETA
 +  double nae; // AIC-QBETA
 +  double ae; // AIC-QBETA
 +  double rae; // AIC-QBETA
 +  double Qbeta; // AIC-QBETA
 +  double Qbeta_acc; // AIC-QBETA
 +  double Qbeta_f1; // AIC-QBETA
 +  double Qbeta_gm; // AIC-QBETA
 } STRUCT_TEST_STATS;
 typedef struct struct_id_score {