24 changed files with 208 additions and 1027 deletions
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@ -1,23 +0,0 @@
 name: Pylint
 on: [push]
 jobs:
  build:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ["3.8", "3.9", "3.10"]
    steps:
    - uses: actions/checkout@v3
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v3
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install pylint
    - name: Analysing the code with pylint
      run: |
        pylint $(git ls-files '*.py')
--- a/TODO.txt
+++ b/TODO.txt
@ -1,6 +1,3 @@
 ensembles seem to be broken; they have an internal model selection which takes the parameters, but since quapy now
    works with protocols it would need to know the validation set in order to pass something like
    "protocol: APP(val, etc.)"
 sample_size should not be mandatory when qp.environ['SAMPLE_SIZE'] has been specified
 clean all the cumbersome methods that have to be implemented for new quantifiers (e.g., n_classes_ prop, etc.)
 make truly parallel the GridSearchQ
--- a/docs/build/html/genindex.html
+++ b/docs/build/html/genindex.html
@ -224,6 +224,8 @@
      <li><a href="quapy.html#quapy.util.create_parent_dir">create_parent_dir() (in module quapy.util)</a>
 </li>
      <li><a href="quapy.method.html#quapy.method.aggregative.cross_generate_predictions">cross_generate_predictions() (in module quapy.method.aggregative)</a>
 </li>
      <li><a href="quapy.method.html#quapy.method.aggregative.cross_generate_predictions_depr">cross_generate_predictions_depr() (in module quapy.method.aggregative)</a>
 </li>
      <li><a href="quapy.html#quapy.model_selection.cross_val_predict">cross_val_predict() (in module quapy.model_selection)</a>
 </li>
--- a/docs/build/html/objects.inv
+++ b/docs/build/html/objects.inv
--- a/docs/build/html/quapy.classification.html
+++ b/docs/build/html/quapy.classification.html
@ -316,14 +316,11 @@ fitting <cite>TruncatedSVD</cite> and then <cite>LogisticRegression</cite> on th
 <dl class="py method">
 <dt class="sig sig-object py" id="quapy.classification.methods.LowRankLogisticRegression.get_params">
-<span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">deep</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.get_params" title="Permalink to this definition">¶</a></dt>
+<span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.get_params" title="Permalink to this definition">¶</a></dt>
 <dd><p>Get hyper-parameters for this estimator.</p>
 <dl class="field-list simple">
-<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dt class="field-odd">Returns<span class="colon">:</span></dt>
-<dd class="field-odd"><p><strong>deep</strong> – compatibility with sklearn</p>
+<dd class="field-odd"><p>a dictionary with parameter names mapped to their values</p>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
 <dd class="field-even"><p>a dictionary with parameter names mapped to their values</p>
 </dd>
 </dl>
 </dd></dl>
@ -527,7 +524,7 @@ dimensionality of the embedding</p>
 <dl class="py class">
 <dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer">
-<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.classification.neural.</span></span><span class="sig-name descname"><span class="pre">NeuralClassifierTrainer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">net</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.classification.neural.TextClassifierNet" title="quapy.classification.neural.TextClassifierNet"><span class="pre">TextClassifierNet</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">lr</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.001</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">weight_decay</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">patience</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">epochs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">200</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">64</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size_test</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">512</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">padding_length</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">300</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'cuda'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">checkpointpath</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'../checkpoint/classifier_net.dat'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer" title="Permalink to this definition">¶</a></dt>
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.classification.neural.</span></span><span class="sig-name descname"><span class="pre">NeuralClassifierTrainer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">net</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.classification.neural.TextClassifierNet" title="quapy.classification.neural.TextClassifierNet"><span class="pre">TextClassifierNet</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">lr</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.001</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">weight_decay</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">patience</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">epochs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">200</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">64</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size_test</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">512</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">padding_length</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">300</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'cpu'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">checkpointpath</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'../checkpoint/classifier_net.dat'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
 <p>Trains a neural network for text classification.</p>
 <dl class="field-list simple">
--- a/docs/build/html/quapy.data.html
+++ b/docs/build/html/quapy.data.html
@ -447,8 +447,8 @@ index.</p>
 <span class="sig-name descname"><span class="pre">sampling_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shuffle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling_index" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the
 prevalence values are not specified, then returns the index of a uniform sampling.
-For each class, the sampling is drawn with replacement if the requested prevalence is larger than
+For each class, the sampling is drawn without replacement if the requested prevalence is larger than
-the actual prevalence of the class, or without replacement otherwise.</p>
+the actual prevalence of the class, or with replacement otherwise.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
@ -534,7 +534,7 @@ values for each class)</p>
 <dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.uniform_sampling">
 <span class="sig-name descname"><span class="pre">uniform_sampling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.uniform_sampling" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns a uniform sample (an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>) of desired size. The sampling is drawn
-with replacement if the requested size is greater than the number of instances, or without replacement
+without replacement if the requested size is greater than the number of instances, or with replacement
 otherwise.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
@ -553,7 +553,7 @@ otherwise.</p>
 <dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.uniform_sampling_index">
 <span class="sig-name descname"><span class="pre">uniform_sampling_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.uniform_sampling_index" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn
-with replacement if the requested size is greater than the number of instances, or without replacement
+without replacement if the requested size is greater than the number of instances, or with replacement
 otherwise.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
--- a/docs/build/html/quapy.html
+++ b/docs/build/html/quapy.html
@ -61,7 +61,6 @@
 </section>
 <section id="module-quapy.error">
 <span id="quapy-error"></span><h2>quapy.error<a class="headerlink" href="#module-quapy.error" title="Permalink to this heading">¶</a></h2>
 <p>Implementation of error measures used for quantification</p>
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.error.absolute_error">
 <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.absolute_error" title="Permalink to this definition">¶</a></dt>
@ -87,9 +86,8 @@ where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.error.acc_error">
 <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">acc_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.acc_error" title="Permalink to this definition">¶</a></dt>
-<dd><p>Computes the error in terms of 1-accuracy. The accuracy is computed as
+<dd><p>Computes the error in terms of 1-accuracy. The accuracy is computed as <span class="math notranslate nohighlight">\(\frac{tp+tn}{tp+fp+fn+tn}\)</span>, with
-<span class="math notranslate nohighlight">\(\frac{tp+tn}{tp+fp+fn+tn}\)</span>, with <cite>tp</cite>, <cite>fp</cite>, <cite>fn</cite>, and <cite>tn</cite> standing
+<cite>tp</cite>, <cite>fp</cite>, <cite>fn</cite>, and <cite>tn</cite> standing for true positives, false positives, false negatives, and true negatives,
 for true positives, false positives, false negatives, and true negatives,
 respectively</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
@ -107,9 +105,8 @@ respectively</p>
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.error.acce">
 <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">acce</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.acce" title="Permalink to this definition">¶</a></dt>
-<dd><p>Computes the error in terms of 1-accuracy. The accuracy is computed as
+<dd><p>Computes the error in terms of 1-accuracy. The accuracy is computed as <span class="math notranslate nohighlight">\(\frac{tp+tn}{tp+fp+fn+tn}\)</span>, with
-<span class="math notranslate nohighlight">\(\frac{tp+tn}{tp+fp+fn+tn}\)</span>, with <cite>tp</cite>, <cite>fp</cite>, <cite>fn</cite>, and <cite>tn</cite> standing
+<cite>tp</cite>, <cite>fp</cite>, <cite>fn</cite>, and <cite>tn</cite> standing for true positives, false positives, false negatives, and true negatives,
 for true positives, false positives, false negatives, and true negatives,
 respectively</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
@ -149,12 +146,10 @@ where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.error.f1_error">
 <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">f1_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.f1_error" title="Permalink to this definition">¶</a></dt>
-<dd><p>F1 error: simply computes the error in terms of macro <span class="math notranslate nohighlight">\(F_1\)</span>, i.e.,
+<dd><p>F1 error: simply computes the error in terms of macro <span class="math notranslate nohighlight">\(F_1\)</span>, i.e., <span class="math notranslate nohighlight">\(1-F_1^M\)</span>,
-<span class="math notranslate nohighlight">\(1-F_1^M\)</span>, where <span class="math notranslate nohighlight">\(F_1\)</span> is the harmonic mean of precision and recall,
+where <span class="math notranslate nohighlight">\(F_1\)</span> is the harmonic mean of precision and recall, defined as <span class="math notranslate nohighlight">\(\frac{2tp}{2tp+fp+fn}\)</span>,
-defined as <span class="math notranslate nohighlight">\(\frac{2tp}{2tp+fp+fn}\)</span>, with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing
+with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing for true positives, false positives, and false negatives, respectively.
-for true positives, false positives, and false negatives, respectively.
+<cite>Macro</cite> averaging means the <span class="math notranslate nohighlight">\(F_1\)</span> is computed for each category independently, and then averaged.</p>
 <cite>Macro</cite> averaging means the <span class="math notranslate nohighlight">\(F_1\)</span> is computed for each category independently,
 and then averaged.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
@ -171,12 +166,10 @@ and then averaged.</p>
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.error.f1e">
 <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">f1e</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.f1e" title="Permalink to this definition">¶</a></dt>
-<dd><p>F1 error: simply computes the error in terms of macro <span class="math notranslate nohighlight">\(F_1\)</span>, i.e.,
+<dd><p>F1 error: simply computes the error in terms of macro <span class="math notranslate nohighlight">\(F_1\)</span>, i.e., <span class="math notranslate nohighlight">\(1-F_1^M\)</span>,
-<span class="math notranslate nohighlight">\(1-F_1^M\)</span>, where <span class="math notranslate nohighlight">\(F_1\)</span> is the harmonic mean of precision and recall,
+where <span class="math notranslate nohighlight">\(F_1\)</span> is the harmonic mean of precision and recall, defined as <span class="math notranslate nohighlight">\(\frac{2tp}{2tp+fp+fn}\)</span>,
-defined as <span class="math notranslate nohighlight">\(\frac{2tp}{2tp+fp+fn}\)</span>, with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing
+with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing for true positives, false positives, and false negatives, respectively.
-for true positives, false positives, and false negatives, respectively.
+<cite>Macro</cite> averaging means the <span class="math notranslate nohighlight">\(F_1\)</span> is computed for each category independently, and then averaged.</p>
 <cite>Macro</cite> averaging means the <span class="math notranslate nohighlight">\(F_1\)</span> is computed for each category independently,
 and then averaged.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
@ -193,8 +186,7 @@ and then averaged.</p>
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.error.from_name">
 <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">from_name</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">err_name</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.from_name" title="Permalink to this definition">¶</a></dt>
-<dd><p>Gets an error function from its name. E.g., <cite>from_name(“mae”)</cite>
+<dd><p>Gets an error function from its name. E.g., <cite>from_name(“mae”)</cite> will return function <a class="reference internal" href="#quapy.error.mae" title="quapy.error.mae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.mae()</span></code></a></p>
 will return function <a class="reference internal" href="#quapy.error.mae" title="quapy.error.mae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.mae()</span></code></a></p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><p><strong>err_name</strong> – string, the error name</p>
@ -207,13 +199,11 @@ will return function <a class="reference internal" href="#quapy.error.mae" title
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.error.kld">
-<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">kld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.kld" title="Permalink to this definition">¶</a></dt>
+<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">kld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.kld" title="Permalink to this definition">¶</a></dt>
 <dd><dl class="simple">
-<dt>Computes the Kullback-Leibler divergence between the two prevalence distributions.</dt><dd><p>Kullback-Leibler divergence between two prevalence distributions <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span>
+<dt>Computes the Kullback-Leibler divergence between the two prevalence distributions.</dt><dd><p>Kullback-Leibler divergence between two prevalence distributions <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
-is computed as
+<span class="math notranslate nohighlight">\(KLD(p,\hat{p})=D_{KL}(p||\hat{p})=\sum_{y\in \mathcal{Y}} p(y)\log\frac{p(y)}{\hat{p}(y)}\)</span>, where
-<span class="math notranslate nohighlight">\(KLD(p,\hat{p})=D_{KL}(p||\hat{p})=
+<span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
 \sum_{y\in \mathcal{Y}} p(y)\log\frac{p(y)}{\hat{p}(y)}\)</span>,
 where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
 The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
 </dd>
 </dl>
@ -222,10 +212,9 @@ The distributions are smoothed using the <cite>eps</cite> factor (see <a class="
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>prevs</strong> – array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
 <li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
-<li><p><strong>eps</strong> – smoothing factor. KLD is not defined in cases in which the distributions contain
+<li><p><strong>eps</strong> – smoothing factor. KLD is not defined in cases in which the distributions contain zeros; <cite>eps</cite>
-zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size.
+is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
-If <cite>eps=None</cite>, the sample size will be taken from the environment variable <cite>SAMPLE_SIZE</cite>
+will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
 (which has thus to be set beforehand).</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -242,8 +231,7 @@ If <cite>eps=None</cite>, the sample size will be taken from the environment var
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
-<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted
+<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
 prevalence values</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -260,8 +248,7 @@ prevalence values</p></li>
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
-<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted
+<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
 prevalence values</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -272,21 +259,17 @@ prevalence values</p></li>
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.error.mean_relative_absolute_error">
-<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mean_relative_absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mean_relative_absolute_error" title="Permalink to this definition">¶</a></dt>
+<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mean_relative_absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mean_relative_absolute_error" title="Permalink to this definition">¶</a></dt>
-<dd><p>Computes the mean relative absolute error (see <a class="reference internal" href="#quapy.error.rae" title="quapy.error.rae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.rae()</span></code></a>) across
+<dd><p>Computes the mean relative absolute error (see <a class="reference internal" href="#quapy.error.rae" title="quapy.error.rae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.rae()</span></code></a>) across the sample pairs.
-the sample pairs. The distributions are smoothed using the <cite>eps</cite> factor (see
+The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
 <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true
+<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
-prevalence values</p></li>
+<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
-<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted
+<li><p><strong>eps</strong> – smoothing factor. <cite>mrae</cite> is not defined in cases in which the true distribution contains zeros; <cite>eps</cite>
-prevalence values</p></li>
+is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
-<li><p><strong>eps</strong> – smoothing factor. <cite>mrae</cite> is not defined in cases in which the true
+will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
 distribution contains zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>,
 with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size will be taken from
 the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -298,20 +281,16 @@ the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set befo
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.error.mkld">
 <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mkld" title="Permalink to this definition">¶</a></dt>
-<dd><p>Computes the mean Kullback-Leibler divergence (see <a class="reference internal" href="#quapy.error.kld" title="quapy.error.kld"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.kld()</span></code></a>) across the
+<dd><p>Computes the mean Kullback-Leibler divergence (see <a class="reference internal" href="#quapy.error.kld" title="quapy.error.kld"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.kld()</span></code></a>) across the sample pairs.
-sample pairs. The distributions are smoothed using the <cite>eps</cite> factor
+The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
 (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true
+<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
-prevalence values</p></li>
+<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
-<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted
+<li><p><strong>eps</strong> – smoothing factor. KLD is not defined in cases in which the distributions contain zeros; <cite>eps</cite>
-prevalence values</p></li>
+is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
-<li><p><strong>eps</strong> – smoothing factor. KLD is not defined in cases in which the distributions contain
+will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
 zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size.
 If <cite>eps=None</cite>, the sample size will be taken from the environment variable <cite>SAMPLE_SIZE</cite>
 (which has thus to be set beforehand).</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -323,19 +302,16 @@ If <cite>eps=None</cite>, the sample size will be taken from the environment var
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.error.mnkld">
 <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mnkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mnkld" title="Permalink to this definition">¶</a></dt>
-<dd><p>Computes the mean Normalized Kullback-Leibler divergence (see <a class="reference internal" href="#quapy.error.nkld" title="quapy.error.nkld"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.nkld()</span></code></a>)
+<dd><p>Computes the mean Normalized Kullback-Leibler divergence (see <a class="reference internal" href="#quapy.error.nkld" title="quapy.error.nkld"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.nkld()</span></code></a>) across the sample pairs.
-across the sample pairs. The distributions are smoothed using the <cite>eps</cite> factor
+The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
 (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
-<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted
+<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
-prevalence values</p></li>
+<li><p><strong>eps</strong> – smoothing factor. NKLD is not defined in cases in which the distributions contain zeros; <cite>eps</cite>
-<li><p><strong>eps</strong> – smoothing factor. NKLD is not defined in cases in which the distributions contain
+is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
-zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size.
+will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
 If <cite>eps=None</cite>, the sample size will be taken from the environment variable <cite>SAMPLE_SIZE</cite>
 (which has thus to be set beforehand).</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -346,21 +322,17 @@ If <cite>eps=None</cite>, the sample size will be taken from the environment var
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.error.mrae">
-<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mrae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mrae" title="Permalink to this definition">¶</a></dt>
+<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mrae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mrae" title="Permalink to this definition">¶</a></dt>
-<dd><p>Computes the mean relative absolute error (see <a class="reference internal" href="#quapy.error.rae" title="quapy.error.rae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.rae()</span></code></a>) across
+<dd><p>Computes the mean relative absolute error (see <a class="reference internal" href="#quapy.error.rae" title="quapy.error.rae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.rae()</span></code></a>) across the sample pairs.
-the sample pairs. The distributions are smoothed using the <cite>eps</cite> factor (see
+The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
 <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true
+<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
-prevalence values</p></li>
+<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
-<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted
+<li><p><strong>eps</strong> – smoothing factor. <cite>mrae</cite> is not defined in cases in which the true distribution contains zeros; <cite>eps</cite>
-prevalence values</p></li>
+is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
-<li><p><strong>eps</strong> – smoothing factor. <cite>mrae</cite> is not defined in cases in which the true
+will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
 distribution contains zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>,
 with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size will be taken from
 the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -376,10 +348,8 @@ the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set befo
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the
+<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
-true prevalence values</p></li>
+<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
 <li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the
 predicted prevalence values</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -390,12 +360,10 @@ predicted prevalence values</p></li>
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.error.nkld">
-<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">nkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.nkld" title="Permalink to this definition">¶</a></dt>
+<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">nkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.nkld" title="Permalink to this definition">¶</a></dt>
 <dd><dl class="simple">
-<dt>Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.</dt><dd><p>Normalized Kullback-Leibler divergence between two prevalence distributions <span class="math notranslate nohighlight">\(p\)</span> and
+<dt>Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.</dt><dd><p>Normalized Kullback-Leibler divergence between two prevalence distributions <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span>
-<span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
+is computed as <span class="math notranslate nohighlight">\(NKLD(p,\hat{p}) = 2\frac{e^{KLD(p,\hat{p})}}{e^{KLD(p,\hat{p})}+1}-1\)</span>, where
 math:<cite>NKLD(p,hat{p}) = 2frac{e^{KLD(p,hat{p})}}{e^{KLD(p,hat{p})}+1}-1</cite>,
 where
 <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
 The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
 </dd>
@ -405,10 +373,9 @@ The distributions are smoothed using the <cite>eps</cite> factor (see <a class="
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>prevs</strong> – array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
 <li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
-<li><p><strong>eps</strong> – smoothing factor. NKLD is not defined in cases in which the distributions
+<li><p><strong>eps</strong> – smoothing factor. NKLD is not defined in cases in which the distributions contain zeros; <cite>eps</cite>
-contain zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample
+is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
-size. If <cite>eps=None</cite>, the sample size will be taken from the environment variable
+will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
 <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -419,12 +386,10 @@ size. If <cite>eps=None</cite>, the sample size will be taken from the environme
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.error.rae">
-<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">rae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.rae" title="Permalink to this definition">¶</a></dt>
+<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">rae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.rae" title="Permalink to this definition">¶</a></dt>
 <dd><dl class="simple">
-<dt>Computes the absolute relative error between the two prevalence vectors.</dt><dd><p>Relative absolute error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span>
+<dt>Computes the absolute relative error between the two prevalence vectors.</dt><dd><p>Relative absolute error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span>  is computed as
-is computed as
+<span class="math notranslate nohighlight">\(RAE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}\frac{|\hat{p}(y)-p(y)|}{p(y)}\)</span>,
 <span class="math notranslate nohighlight">\(RAE(p,\hat{p})=
 \frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}\frac{|\hat{p}(y)-p(y)|}{p(y)}\)</span>,
 where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
 The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
 </dd>
@ -434,10 +399,9 @@ The distributions are smoothed using the <cite>eps</cite> factor (see <a class="
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>prevs</strong> – array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
 <li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
-<li><p><strong>eps</strong> – smoothing factor. <cite>rae</cite> is not defined in cases in which the true distribution
+<li><p><strong>eps</strong> – smoothing factor. <cite>rae</cite> is not defined in cases in which the true distribution contains zeros; <cite>eps</cite>
-contains zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the
+is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
-sample size. If <cite>eps=None</cite>, the sample size will be taken from the environment variable
+will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
 <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -448,12 +412,10 @@ sample size. If <cite>eps=None</cite>, the sample size will be taken from the en
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.error.relative_absolute_error">
-<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">relative_absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.relative_absolute_error" title="Permalink to this definition">¶</a></dt>
+<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">relative_absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.relative_absolute_error" title="Permalink to this definition">¶</a></dt>
 <dd><dl class="simple">
-<dt>Computes the absolute relative error between the two prevalence vectors.</dt><dd><p>Relative absolute error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span>
+<dt>Computes the absolute relative error between the two prevalence vectors.</dt><dd><p>Relative absolute error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span>  is computed as
-is computed as
+<span class="math notranslate nohighlight">\(RAE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}\frac{|\hat{p}(y)-p(y)|}{p(y)}\)</span>,
 <span class="math notranslate nohighlight">\(RAE(p,\hat{p})=
 \frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}\frac{|\hat{p}(y)-p(y)|}{p(y)}\)</span>,
 where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
 The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
 </dd>
@ -463,10 +425,9 @@ The distributions are smoothed using the <cite>eps</cite> factor (see <a class="
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>prevs</strong> – array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
 <li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
-<li><p><strong>eps</strong> – smoothing factor. <cite>rae</cite> is not defined in cases in which the true distribution
+<li><p><strong>eps</strong> – smoothing factor. <cite>rae</cite> is not defined in cases in which the true distribution contains zeros; <cite>eps</cite>
-contains zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the
+is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
-sample size. If <cite>eps=None</cite>, the sample size will be taken from the environment variable
+will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
 <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -477,11 +438,10 @@ sample size. If <cite>eps=None</cite>, the sample size will be taken from the en
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.error.se">
-<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">se</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.se" title="Permalink to this definition">¶</a></dt>
+<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">se</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.se" title="Permalink to this definition">¶</a></dt>
 <dd><dl class="simple">
 <dt>Computes the squared error between the two prevalence vectors.</dt><dd><p>Squared error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span>  is computed as
-<span class="math notranslate nohighlight">\(SE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}(\hat{p}(y)-p(y))^2\)</span>,
+<span class="math notranslate nohighlight">\(SE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}(\hat{p}(y)-p(y))^2\)</span>, where
 where
 <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.</p>
 </dd>
 </dl>
@ -502,8 +462,7 @@ where
 <dt class="sig sig-object py" id="quapy.error.smooth">
 <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">smooth</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.smooth" title="Permalink to this definition">¶</a></dt>
 <dd><p>Smooths a prevalence distribution with <span class="math notranslate nohighlight">\(\epsilon\)</span> (<cite>eps</cite>) as:
-<span class="math notranslate nohighlight">\(\underline{p}(y)=\frac{\epsilon+p(y)}{\epsilon|\mathcal{Y}|+
+<span class="math notranslate nohighlight">\(\underline{p}(y)=\frac{\epsilon+p(y)}{\epsilon|\mathcal{Y}|+\displaystyle\sum_{y\in \mathcal{Y}}p(y)}\)</span></p>
 \displaystyle\sum_{y\in \mathcal{Y}}p(y)}\)</span></p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
@ -642,7 +601,7 @@ convenient or not. Set to False to deactivate.</p></li>
 </div>
 <span class="target" id="module-quapy.protocol"></span><dl class="py class">
 <dt class="sig sig-object py" id="quapy.protocol.APP">
-<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.protocol.</span></span><span class="sig-name descname"><span class="pre">APP</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_prevalences</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">21</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">smooth_limits_epsilon</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">sanity_check</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10000</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">return_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'sample_prev'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.protocol.APP" title="Permalink to this definition">¶</a></dt>
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.protocol.</span></span><span class="sig-name descname"><span class="pre">APP</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_prevalences</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">21</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">smooth_limits_epsilon</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">return_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'sample_prev'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.protocol.APP" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bases: <a class="reference internal" href="#quapy.protocol.AbstractStochasticSeededProtocol" title="quapy.protocol.AbstractStochasticSeededProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">AbstractStochasticSeededProtocol</span></code></a>, <a class="reference internal" href="#quapy.protocol.OnLabelledCollectionProtocol" title="quapy.protocol.OnLabelledCollectionProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">OnLabelledCollectionProtocol</span></code></a></p>
 <p>Implementation of the artificial prevalence protocol (APP).
 The APP consists of exploring a grid of prevalence values containing <cite>n_prevalences</cite> points (e.g.,
@ -662,8 +621,6 @@ grid (default is 21)</p></li>
 <li><p><strong>smooth_limits_epsilon</strong> – the quantity to add and subtract to the limits 0 and 1</p></li>
 <li><p><strong>random_state</strong> – allows replicating samples across runs (default 0, meaning that the sequence of samples
 will be the same every time the protocol is called)</p></li>
 <li><p><strong>sanity_check</strong> – int, raises an exception warning the user that the number of examples to be generated exceed
 this number; set to None for skipping this check</p></li>
 <li><p><strong>return_type</strong> – set to “sample_prev” (default) to get the pairs of (sample, prevalence) at each iteration, or
 to “labelled_collection” to get instead instances of LabelledCollection</p></li>
 </ul>
@ -1862,7 +1819,6 @@ this function is invoked, it loads the pickled resource. Example:</p>
 </section>
 <section id="module-quapy">
 <span id="module-contents"></span><h2>Module contents<a class="headerlink" href="#module-quapy" title="Permalink to this heading">¶</a></h2>
 <p>QuaPy module for quantification</p>
 </section>
 </section>
--- a/docs/build/html/quapy.method.html
+++ b/docs/build/html/quapy.method.html
@ -1064,6 +1064,11 @@ validation data, or as an integer, indicating that the misclassification rates s
 <span class="sig-prename descclassname"><span class="pre">quapy.method.aggregative.</span></span><span class="sig-name descname"><span class="pre">cross_generate_predictions</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">val_split</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">probabilistic</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">fit_classifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.cross_generate_predictions" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.method.aggregative.cross_generate_predictions_depr">
 <span class="sig-prename descclassname"><span class="pre">quapy.method.aggregative.</span></span><span class="sig-name descname"><span class="pre">cross_generate_predictions_depr</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">val_split</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">probabilistic</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">fit_classifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">method_name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.cross_generate_predictions_depr" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>
 <dl class="py function">
 <dt class="sig sig-object py" id="quapy.method.aggregative.newELM">
 <span class="sig-prename descclassname"><span class="pre">quapy.method.aggregative.</span></span><span class="sig-name descname"><span class="pre">newELM</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">svmperf_base</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">loss</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'01'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">C</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.newELM" title="Permalink to this definition">¶</a></dt>
--- a/docs/build/html/searchindex.js
+++ b/docs/build/html/searchindex.js
--- a/examples/uci_experiments.py
+++ b/examples/uci_experiments.py
@ -1,152 +0,0 @@
 from copy import deepcopy
 import quapy as qp
 from sklearn.calibration import CalibratedClassifierCV
 from sklearn.linear_model import LogisticRegression
 from quapy.classification.methods import LowRankLogisticRegression
 from quapy.method.meta import QuaNet
 from quapy.protocol import APP
 from quapy.method.aggregative import CC, ACC, PCC, PACC, MAX, MS, MS2, EMQ, HDy, newSVMAE
 from quapy.method.meta import EHDy
 import numpy as np
 import os
 import pickle
 import itertools
 import argparse
 import torch
 import shutil
 N_JOBS = -1
 CUDA_N_JOBS = 2
 ENSEMBLE_N_JOBS = -1
 qp.environ['SAMPLE_SIZE'] = 100
 def newLR():
    return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
 def calibratedLR():
    return CalibratedClassifierCV(LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1))
 __C_range = np.logspace(-3, 3, 7)
 lr_params = {'classifier__C': __C_range, 'classifier__class_weight': [None, 'balanced']}
 svmperf_params = {'classifier__C': __C_range}
 def quantification_models():
    yield 'cc', CC(newLR()), lr_params
    yield 'acc', ACC(newLR()), lr_params
    yield 'pcc', PCC(newLR()), lr_params
    yield 'pacc', PACC(newLR()), lr_params
    yield 'MAX', MAX(newLR()), lr_params
    yield 'MS', MS(newLR()), lr_params
    yield 'MS2', MS2(newLR()), lr_params
    yield 'sldc', EMQ(newLR(), recalib='platt'), lr_params
    yield 'svmmae', newSVMAE(), svmperf_params
    yield 'hdy', HDy(newLR()), lr_params
 def quantification_cuda_models():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f'Running QuaNet in {device}')
    learner = LowRankLogisticRegression()
    yield 'quanet', QuaNet(learner, checkpointdir=args.checkpointdir, device=device), lr_params
 def evaluate_experiment(true_prevalences, estim_prevalences):
    print('\nEvaluation Metrics:\n' + '=' * 22)
    for eval_measure in [qp.error.mae, qp.error.mrae]:
        err = eval_measure(true_prevalences, estim_prevalences)
        print(f'\t{eval_measure.__name__}={err:.4f}')
    print()
 def result_path(path, dataset_name, model_name, run, optim_loss):
    return os.path.join(path, f'{dataset_name}-{model_name}-run{run}-{optim_loss}.pkl')
 def is_already_computed(dataset_name, model_name, run, optim_loss):
    return os.path.exists(result_path(args.results, dataset_name, model_name, run, optim_loss))
 def save_results(dataset_name, model_name, run, optim_loss, *results):
    rpath = result_path(args.results, dataset_name, model_name, run, optim_loss)
    qp.util.create_parent_dir(rpath)
    with open(rpath, 'wb') as foo:
        pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL)
 def run(experiment):
    optim_loss, dataset_name, (model_name, model, hyperparams) = experiment
    if dataset_name in ['acute.a', 'acute.b', 'iris.1']: return
    collection = qp.datasets.fetch_UCILabelledCollection(dataset_name)
    for run, data in enumerate(qp.data.Dataset.kFCV(collection, nfolds=5, nrepeats=1)):
        if is_already_computed(dataset_name, model_name, run=run, optim_loss=optim_loss):
            print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} run={run+1}/5 already computed.')
            continue
        print(f'running dataset={dataset_name} model={model_name} loss={optim_loss} run={run+1}/5')
        # model selection (hyperparameter optimization for a quantification-oriented loss)
        train, test = data.train_test
        train, val = train.split_stratified()
        if hyperparams is not None:
            model_selection = qp.model_selection.GridSearchQ(
                deepcopy(model),
                param_grid=hyperparams,
                protocol=APP(val, n_prevalences=21, repeats=25),
                error=optim_loss,
                refit=True,
                timeout=60*60,
                verbose=True
            )
            model_selection.fit(data.training)
            model = model_selection.best_model()
            best_params = model_selection.best_params_
        else:
            model.fit(data.training)
            best_params = {}
        # model evaluation
        true_prevalences, estim_prevalences = qp.evaluation.prediction(
            model,
            protocol=APP(test, n_prevalences=21, repeats=100)
        )
        test_true_prevalence = data.test.prevalence()
        evaluate_experiment(true_prevalences, estim_prevalences)
        save_results(dataset_name, model_name, run, optim_loss,
                     true_prevalences, estim_prevalences,
                     data.training.prevalence(), test_true_prevalence,
                     best_params)
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
    parser.add_argument('results', metavar='RESULT_PATH', type=str,
                        help='path to the directory where to store the results')
    parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='../svm_perf_quantification',
                        help='path to the directory with svmperf')
    parser.add_argument('--checkpointdir', metavar='PATH', type=str, default='./checkpoint',
                        help='path to the directory where to dump QuaNet checkpoints')
    args = parser.parse_args()
    print(f'Result folder: {args.results}')
    np.random.seed(0)
    qp.environ['SVMPERF_HOME'] = args.svmperfpath
    optim_losses = ['mae']
    datasets = qp.datasets.UCI_DATASETS
    models = quantification_models()
    qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS)
    models = quantification_cuda_models()
    qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=CUDA_N_JOBS)
    shutil.rmtree(args.checkpointdir, ignore_errors=True)
--- a/laboratory/custom_vectorizers.py
+++ b/laboratory/custom_vectorizers.py
@ -1,244 +0,0 @@
 from scipy.sparse import csc_matrix, csr_matrix
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.feature_extraction.text import TfidfTransformer, TfidfVectorizer, CountVectorizer
 import numpy as np
 from joblib import Parallel, delayed
 import sklearn
 import math
 from scipy.stats import t
 class ContTable:
    def __init__(self, tp=0, tn=0, fp=0, fn=0):
        self.tp=tp
        self.tn=tn
        self.fp=fp
        self.fn=fn
    def get_d(self): return self.tp + self.tn + self.fp + self.fn
    def get_c(self): return self.tp + self.fn
    def get_not_c(self): return self.tn + self.fp
    def get_f(self): return self.tp + self.fp
    def get_not_f(self): return self.tn + self.fn
    def p_c(self): return (1.0*self.get_c())/self.get_d()
    def p_not_c(self): return 1.0-self.p_c()
    def p_f(self): return (1.0*self.get_f())/self.get_d()
    def p_not_f(self): return 1.0-self.p_f()
    def p_tp(self): return (1.0*self.tp) / self.get_d()
    def p_tn(self): return (1.0*self.tn) / self.get_d()
    def p_fp(self): return (1.0*self.fp) / self.get_d()
    def p_fn(self): return (1.0*self.fn) / self.get_d()
    def tpr(self):
        c = 1.0*self.get_c()
        return self.tp / c if c > 0.0 else 0.0
    def fpr(self):
        _c = 1.0*self.get_not_c()
        return self.fp / _c if _c > 0.0 else 0.0
 def __ig_factor(p_tc, p_t, p_c):
    den = p_t * p_c
    if den != 0.0 and p_tc != 0:
        return p_tc * math.log(p_tc / den, 2)
    else:
        return 0.0
 def information_gain(cell):
    return __ig_factor(cell.p_tp(), cell.p_f(), cell.p_c()) + \
           __ig_factor(cell.p_fp(), cell.p_f(), cell.p_not_c()) +\
           __ig_factor(cell.p_fn(), cell.p_not_f(), cell.p_c()) + \
           __ig_factor(cell.p_tn(), cell.p_not_f(), cell.p_not_c())
 def squared_information_gain(cell):
    return information_gain(cell)**2
 def posneg_information_gain(cell):
    ig = information_gain(cell)
    if cell.tpr() < cell.fpr():
        return -ig
    else:
        return ig
 def pos_information_gain(cell):
    if cell.tpr() < cell.fpr():
        return 0
    else:
        return information_gain(cell)
 def pointwise_mutual_information(cell):
    return __ig_factor(cell.p_tp(), cell.p_f(), cell.p_c())
 def gss(cell):
    return cell.p_tp()*cell.p_tn() - cell.p_fp()*cell.p_fn()
 def chi_square(cell):
    den = cell.p_f() * cell.p_not_f() * cell.p_c() * cell.p_not_c()
    if den==0.0: return 0.0
    num = gss(cell)**2
    return num / den
 def conf_interval(xt, n):
    if n>30:
        z2 = 3.84145882069 # norm.ppf(0.5+0.95/2.0)**2
    else:
        z2 = t.ppf(0.5 + 0.95 / 2.0, df=max(n-1,1)) ** 2
    p = (xt + 0.5 * z2) / (n + z2)
    amplitude = 0.5 * z2 * math.sqrt((p * (1.0 - p)) / (n + z2))
    return p, amplitude
 def strength(minPosRelFreq, minPos, maxNeg):
    if minPos > maxNeg:
        return math.log(2.0 * minPosRelFreq, 2.0)
    else:
        return 0.0
 #set cancel_features=True to allow some features to be weighted as 0 (as in the original article)
 #however, for some extremely imbalanced dataset caused all documents to be 0
 def conf_weight(cell, cancel_features=False):
    c = cell.get_c()
    not_c = cell.get_not_c()
    tp = cell.tp
    fp = cell.fp
    pos_p, pos_amp = conf_interval(tp, c)
    neg_p, neg_amp = conf_interval(fp, not_c)
    min_pos = pos_p-pos_amp
    max_neg = neg_p+neg_amp
    den = (min_pos + max_neg)
    minpos_relfreq = min_pos / (den if den != 0 else 1)
    str_tplus = strength(minpos_relfreq, min_pos, max_neg);
    if str_tplus == 0 and not cancel_features:
        return 1e-20
    return str_tplus;
 def get_tsr_matrix(cell_matrix, tsr_score_funtion):
    nC = len(cell_matrix)
    nF = len(cell_matrix[0])
    tsr_matrix = [[tsr_score_funtion(cell_matrix[c,f]) for f in range(nF)] for c in range(nC)]
    return np.array(tsr_matrix)
 def feature_label_contingency_table(positive_document_indexes, feature_document_indexes, nD):
    tp_ = len(positive_document_indexes & feature_document_indexes)
    fp_ = len(feature_document_indexes - positive_document_indexes)
    fn_ = len(positive_document_indexes - feature_document_indexes)
    tn_ = nD - (tp_ + fp_ + fn_)
    return ContTable(tp=tp_, tn=tn_, fp=fp_, fn=fn_)
 def category_tables(feature_sets, category_sets, c, nD, nF):
    return [feature_label_contingency_table(category_sets[c], feature_sets[f], nD) for f in range(nF)]
 def get_supervised_matrix(coocurrence_matrix, label_matrix, n_jobs=-1):
    """
    Computes the nC x nF supervised matrix M where Mcf is the 4-cell contingency table for feature f and class c.
    Efficiency O(nF x nC x log(S)) where S is the sparse factor
    """
    nD, nF = coocurrence_matrix.shape
    nD2, nC = label_matrix.shape
    if nD != nD2:
        raise ValueError('Number of rows in coocurrence matrix shape %s and label matrix shape %s is not consistent' %
                         (coocurrence_matrix.shape,label_matrix.shape))
    def nonzero_set(matrix, col):
        return set(matrix[:, col].nonzero()[0])
    if isinstance(coocurrence_matrix, csr_matrix):
        coocurrence_matrix = csc_matrix(coocurrence_matrix)
    feature_sets = [nonzero_set(coocurrence_matrix, f) for f in range(nF)]
    category_sets = [nonzero_set(label_matrix, c) for c in range(nC)]
    cell_matrix = Parallel(n_jobs=n_jobs, backend="threading")(delayed(category_tables)(feature_sets, category_sets, c, nD, nF) for c in range(nC))
    return np.array(cell_matrix)
 class TSRweighting(BaseEstimator,TransformerMixin):
    """
    Supervised Term Weighting function based on any Term Selection Reduction (TSR) function (e.g., information gain,
    chi-square, etc.) or, more generally, on any function that could be computed on the 4-cell contingency table for
    each category-feature pair.
    The supervised_4cell_matrix (a CxF matrix containing the 4-cell contingency tables
    for each category-feature pair) can be pre-computed (e.g., during the feature selection phase) and passed as an
    argument.
    When C>1, i.e., in multiclass scenarios, a global_policy is used in order to determine a single feature-score which
    informs about its relevance. Accepted policies include "max" (takes the max score across categories), "ave" and "wave"
    (take the average, or weighted average, across all categories -- weights correspond to the class prevalence), and "sum"
    (which sums all category scores).
    """
    def __init__(self, tsr_function, global_policy='max', supervised_4cell_matrix=None, sublinear_tf=True, norm='l2', min_df=3, n_jobs=-1):
        if global_policy not in ['max', 'ave', 'wave', 'sum']: raise ValueError('Global policy should be in {"max", "ave", "wave", "sum"}')
        self.tsr_function = tsr_function
        self.global_policy = global_policy
        self.supervised_4cell_matrix = supervised_4cell_matrix
        self.sublinear_tf=sublinear_tf
        self.norm=norm
        self.min_df = min_df
        self.n_jobs=n_jobs
    def fit(self, X, y):
        self.count_vectorizer = CountVectorizer(min_df=self.min_df)
        X = self.count_vectorizer.fit_transform(X)
        self.tf_vectorizer = TfidfTransformer(
            norm=None, use_idf=False, smooth_idf=False, sublinear_tf=self.sublinear_tf).fit(X)
        if len(y.shape) == 1:
            y = np.expand_dims(y, axis=1)
        nD, nC = y.shape
        nF = len(self.tf_vectorizer.get_feature_names_out())
        if self.supervised_4cell_matrix is None:
            self.supervised_4cell_matrix = get_supervised_matrix(X, y, n_jobs=self.n_jobs)
        else:
            if self.supervised_4cell_matrix.shape != (nC, nF): raise ValueError("Shape of supervised information matrix is inconsistent with X and y")
        tsr_matrix = get_tsr_matrix(self.supervised_4cell_matrix, self.tsr_function)
        if self.global_policy == 'ave':
            self.global_tsr_vector = np.average(tsr_matrix, axis=0)
        elif self.global_policy == 'wave':
            category_prevalences = [sum(y[:,c])*1.0/nD for c in range(nC)]
            self.global_tsr_vector = np.average(tsr_matrix, axis=0, weights=category_prevalences)
        elif self.global_policy == 'sum':
            self.global_tsr_vector = np.sum(tsr_matrix, axis=0)
        elif self.global_policy == 'max':
            self.global_tsr_vector = np.amax(tsr_matrix, axis=0)
        return self
    def fit_transform(self, X, y):
        return self.fit(X,y).transform(X)
    def transform(self, X):
        if not hasattr(self, 'global_tsr_vector'): raise NameError('TSRweighting: transform method called before fit.')
        X = self.count_vectorizer.transform(X)
        tf_X = self.tf_vectorizer.transform(X).toarray()
        weighted_X = np.multiply(tf_X, self.global_tsr_vector)
        if self.norm is not None and self.norm!='none':
            weighted_X = sklearn.preprocessing.normalize(weighted_X, norm=self.norm, axis=1, copy=False)
        return csr_matrix(weighted_X)
--- a/laboratory/dataset_adapter.py
+++ b/laboratory/dataset_adapter.py
--- a/laboratory/main.py
+++ b/laboratory/main.py
--- a/laboratory/method_dxs.py
+++ b/laboratory/method_dxs.py
@ -1,148 +0,0 @@
 from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
 from sklearn.linear_model import LogisticRegression
 import quapy as qp
 from data import LabelledCollection
 import numpy as np
 from laboratory.custom_vectorizers import *
 from protocol import APP
 from quapy.method.aggregative import _get_divergence, HDy, DistributionMatching
 from quapy.method.base import BaseQuantifier
 from scipy import optimize
 import pandas as pd
 # TODO: explore the bernoulli (term presence/absence) variant
 # TODO: explore the multinomial (term frequency) variant
 # TODO: explore the multinomial + length normalization variant
 # TODO: consolidate the TSR-variant (e.g., using information gain) variant;
 #   - works better with the idf?
 #   - works better with length normalization?
 #   - etc
 class DxS(BaseQuantifier):
    def __init__(self, vectorizer=None, divergence='topsoe'):
        self.vectorizer = vectorizer
        self.divergence = divergence
    # def __as_distribution(self, instances):
    #     return np.asarray(instances.sum(axis=0) / instances.sum()).flatten()
    def __as_distribution(self, instances):
        dist = instances.sum(axis=0) / instances.sum()
        return np.asarray(dist).flatten()
    def fit(self, data: LabelledCollection):
        text_instances, labels = data.Xy
        if self.vectorizer is not None:
            text_instances = self.vectorizer.fit_transform(text_instances, y=labels)
        distributions = []
        for class_i in data.classes_:
            distributions.append(self.__as_distribution(text_instances[labels == class_i]))
        self.validation_distribution = np.asarray(distributions)
        return self
    def quantify(self, text_instances):
        if self.vectorizer is not None:
            text_instances = self.vectorizer.transform(text_instances)
        test_distribution = self.__as_distribution(text_instances)
        divergence = _get_divergence(self.divergence)
        n_classes, n_feats = self.validation_distribution.shape
        def match(prev):
            prev = np.expand_dims(prev, axis=0)
            mixture_distribution = (prev @ self.validation_distribution).flatten()
            return divergence(test_distribution, mixture_distribution)
        # the initial point is set as the uniform distribution
        uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
        # solutions are bounded to those contained in the unit-simplex
        bounds = tuple((0, 1) for x in range(n_classes))  # values in [0,1]
        constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)})  # values summing up to 1
        r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
        return r.x
 if __name__ == '__main__':
    qp.environ['SAMPLE_SIZE'] = 250
    qp.environ['N_JOBS'] = -1
    min_df = 10
    # dataset = 'imdb'
    repeats = 10
    error = 'mae'
    div = 'HD'
    # generates tuples (dataset, method, method_name)
    # (the dataset is needed for methods that process the dataset differently)
    def gen_methods():
        for dataset in qp.datasets.REVIEWS_SENTIMENT_DATASETS:
            data = qp.datasets.fetch_reviews(dataset, tfidf=False)
            bernoulli_vectorizer = CountVectorizer(min_df=min_df, binary=True)
            dxs = DxS(divergence=div, vectorizer=bernoulli_vectorizer)
            yield data, dxs, 'DxS-Bernoulli'
            multinomial_vectorizer = CountVectorizer(min_df=min_df, binary=False)
            dxs = DxS(divergence=div, vectorizer=multinomial_vectorizer)
            yield data, dxs, 'DxS-multinomial'
            tf_vectorizer = TfidfVectorizer(sublinear_tf=False, use_idf=False, min_df=min_df, norm=None)
            dxs = DxS(divergence=div, vectorizer=tf_vectorizer)
            yield data, dxs, 'DxS-TF'
            logtf_vectorizer = TfidfVectorizer(sublinear_tf=True, use_idf=False, min_df=min_df, norm=None)
            dxs = DxS(divergence=div, vectorizer=logtf_vectorizer)
            yield data, dxs, 'DxS-logTF'
            tfidf_vectorizer = TfidfVectorizer(use_idf=True, min_df=min_df, norm=None)
            dxs = DxS(divergence=div, vectorizer=tfidf_vectorizer)
            yield data, dxs, 'DxS-TFIDF'
            tfidf_vectorizer = TfidfVectorizer(use_idf=True, min_df=min_df, norm='l2')
            dxs = DxS(divergence=div, vectorizer=tfidf_vectorizer)
            yield data, dxs, 'DxS-TFIDF-l2'
            tsr_vectorizer = TSRweighting(tsr_function=information_gain, min_df=min_df, norm='l2')
            dxs = DxS(divergence=div, vectorizer=tsr_vectorizer)
            yield data, dxs, 'DxS-TFTSR-l2'
            data = qp.datasets.fetch_reviews(dataset, tfidf=True, min_df=min_df)
            hdy = HDy(LogisticRegression())
            yield data, hdy, 'HDy'
            dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=5)
            yield data, dm, 'DM-5b'
            dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=10)
            yield data, dm, 'DM-10b'
    result_path = 'results.csv'
    with open(result_path, 'wt') as csv:
        csv.write(f'Method\tDataset\tMAE\tMRAE\n')
        for data, quantifier, quant_name in gen_methods():
            quantifier.fit(data.training)
            report = qp.evaluation.evaluation_report(quantifier, APP(data.test, repeats=repeats), error_metrics=['mae','mrae'], verbose=True)
            means = report.mean()
            csv.write(f'{quant_name}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
    df = pd.read_csv(result_path, sep='\t')
    # print(df)
    pv = df.pivot_table(index='Method', columns="Dataset", values=["MAE", "MRAE"])
    print(pv)
--- a/laboratory/method_kdey.py
+++ b/laboratory/method_kdey.py
@ -1,168 +0,0 @@
 from typing import Union, Callable
 import numpy as np
 from sklearn.base import BaseEstimator
 from sklearn.linear_model import LogisticRegression
 import pandas as pd
 from sklearn.neighbors import KernelDensity
 import quapy as qp
 from data import LabelledCollection
 from protocol import APP, UPP
 from quapy.method.aggregative import AggregativeProbabilisticQuantifier, _training_helper, cross_generate_predictions, \
    DistributionMatching, _get_divergence
 import scipy
 from scipy import optimize
 class KDEy(AggregativeProbabilisticQuantifier):
    BANDWIDTH_METHOD = ['auto', 'scott', 'silverman']
    ENGINE = ['scipy', 'sklearn']
    def __init__(self, classifier: BaseEstimator, val_split=0.4, divergence: Union[str, Callable]='HD',
                 bandwidth_method='scott', engine='sklearn', n_jobs=None):
        self.classifier = classifier
        self.val_split = val_split
        self.divergence = divergence
        self.bandwidth_method = bandwidth_method
        self.engine = engine
        self.n_jobs = n_jobs
        assert bandwidth_method in KDEy.BANDWIDTH_METHOD, f'unknown bandwidth_method, valid ones are {KDEy.BANDWIDTH_METHOD}'
        assert engine in KDEy.ENGINE, f'unknown engine, valid ones are {KDEy.ENGINE}'
    def get_kde(self, posteriors):
        if self.engine == 'scipy':
            # scipy treats columns as datapoints, and need the datapoints not to lie in a lower-dimensional subspace, which
            # requires removing the last dimension which is constrained
            posteriors = posteriors[:,:-1].T
            kde = scipy.stats.gaussian_kde(posteriors)
            kde.set_bandwidth(self.bandwidth_method)
        elif self.engine == 'sklearn':
            kde = KernelDensity(bandwidth=self.bandwidth_method).fit(posteriors)
        return kde
    def pdf(self, kde, posteriors):
        if self.engine == 'scipy':
            return kde(posteriors[:,:-1].T)
        elif self.engine == 'sklearn':
            return np.exp(kde.score_samples(posteriors))
    def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
        """
        Trains the classifier (if requested) and generates the validation distributions out of the training data.
        The validation distributions have shape `(n, ch, nbins)`, with `n` the number of classes, `ch` the number of
        channels (a channel is a description, in form of a histogram, of a specific class -- there are as many channels
        as classes, although in the binary case one can use only one channel, since the other one is constrained),
        and `nbins` the number of bins. In particular, let `V` be the validation distributions; `di=V[i]`
        are the distributions obtained from training data labelled with class `i`; `dij = di[j]` is the discrete
        distribution of posterior probabilities `P(Y=j|X=x)` for training data labelled with class `i`, and `dij[k]`
        is the fraction of instances with a value in the `k`-th bin.
        :param data: the training set
        :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)
        :param val_split: either a float in (0,1) indicating the proportion of training instances to use for
         validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection
         indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV
         to estimate the parameters
        """
        if val_split is None:
            val_split = self.val_split
        self.classifier, y, posteriors, classes, class_count = cross_generate_predictions(
            data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
        )
        self.val_densities = [self.get_kde(posteriors[y == cat]) for cat in range(data.n_classes)]
        self.val_posteriors = posteriors
        return self
    def val_pdf(self, prev):
        """
        Returns a function that computes the mixture model with the given prev as mixture factor
        :param prev: a prevalence vector, ndarray
        :return: a function implementing the validation distribution with fixed mixture factor
        """
        return lambda posteriors: sum(prev_i * self.pdf(kde_i, posteriors) for kde_i, prev_i in zip(self.val_densities, prev))
    def aggregate(self, posteriors: np.ndarray):
        """
        Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution
        (the mixture) that best matches the test distribution, in terms of the divergence measure of choice.
        In the multiclass case, with `n` the number of classes, the test and mixture distributions contain
        `n` channels (proper distributions of binned posterior probabilities), on which the divergence is computed
        independently. The matching is computed as an average of the divergence across all channels.
        :param instances: instances in the sample
        :return: a vector of class prevalence estimates
        """
        test_density = self.get_kde(posteriors)
        # val_test_posteriors = np.concatenate([self.val_posteriors, posteriors])
        test_likelihood = self.pdf(test_density, posteriors)
        divergence = _get_divergence(self.divergence)
        n_classes = len(self.val_densities)
        def match(prev):
            val_pdf = self.val_pdf(prev)
            val_likelihood  = val_pdf(posteriors)
            return divergence(val_likelihood, test_likelihood)
        # the initial point is set as the uniform distribution
        uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
        # solutions are bounded to those contained in the unit-simplex
        bounds = tuple((0, 1) for _ in range(n_classes))  # values in [0,1]
        constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)})  # values summing up to 1
        r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
        return r.x
 if __name__ == '__main__':
    qp.environ['SAMPLE_SIZE'] = 100
    qp.environ['N_JOBS'] = -1
    div = 'HD'
    # generates tuples (dataset, method, method_name)
    # (the dataset is needed for methods that process the dataset differently)
    def gen_methods():
        for dataset in qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST:
            data = qp.datasets.fetch_twitter(dataset, min_df=3, pickle=True)
            # kdey = KDEy(LogisticRegression(), divergence=div, bandwidth_method='scott')
            # yield data, kdey, f'KDEy-{div}-scott'
            kdey = KDEy(LogisticRegression(), divergence=div, bandwidth_method='silverman', engine='sklearn')
            yield data, kdey, f'KDEy-{div}-silverman'
            dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=5)
            yield data, dm, f'DM-5b-{div}'
            # dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=10)
            # yield data, dm, f'DM-10b-{div}'
    result_path = 'results_kdey.csv'
    with open(result_path, 'wt') as csv:
        csv.write(f'Method\tDataset\tMAE\tMRAE\n')
        for data, quantifier, quant_name in gen_methods():
            quantifier.fit(data.training)
            protocol = UPP(data.test, repeats=100)
            report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae','mrae'], verbose=True)
            means = report.mean()
            csv.write(f'{quant_name}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
            csv.flush()
    df = pd.read_csv(result_path, sep='\t')
    # print(df)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', None)
    pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE", "MRAE"])
    print(pv)
--- a/quapy/init.py
+++ b/quapy/init.py
@ -1,7 +1,6 @@
 """QuaPy module for quantification"""
 from quapy.data import datasets
 from . import error
 from . import data
 from quapy.data import datasets
 from . import functional
 # from . import method
 from . import evaluation
@ -26,8 +25,7 @@ environ = {
 def _get_njobs(n_jobs):
    """
-    If `n_jobs` is None, then it returns `environ['N_JOBS']`;
+    If `n_jobs` is None, then it returns `environ['N_JOBS']`; if otherwise, returns `n_jobs`.
    if otherwise, returns `n_jobs`.
    :param n_jobs: the number of `n_jobs` or None if not specified
    :return: int
@ -37,8 +35,7 @@ def _get_njobs(n_jobs):
 def _get_sample_size(sample_size):
    """
-    If `sample_size` is None, then it returns `environ['SAMPLE_SIZE']`;
+    If `sample_size` is None, then it returns `environ['SAMPLE_SIZE']`; if otherwise, returns `sample_size`.
    if otherwise, returns `sample_size`.
    If none of these are set, then a ValueError exception is raised.
    :param sample_size: integer or None
@ -48,3 +45,6 @@ def _get_sample_size(sample_size):
    if sample_size is None:
        raise ValueError('neither sample_size nor qp.environ["SAMPLE_SIZE"] have been specified')
    return sample_size
--- a/quapy/classification/methods.py
+++ b/quapy/classification/methods.py
@ -19,7 +19,7 @@ class LowRankLogisticRegression(BaseEstimator):
    def __init__(self, n_components=100, **kwargs):
        self.n_components = n_components
-        self.classifier = LogisticRegression(**kwargs)
+        self.learner = LogisticRegression(**kwargs)
    def get_params(self):
        """
@ -28,7 +28,7 @@ class LowRankLogisticRegression(BaseEstimator):
        :return: a dictionary with parameter names mapped to their values
        """
        params = {'n_components': self.n_components}
-        params.update(self.classifier.get_params())
+        params.update(self.learner.get_params())
        return params
    def set_params(self, **params):
@ -43,7 +43,7 @@ class LowRankLogisticRegression(BaseEstimator):
        if 'n_components' in params_:
            self.n_components = params_['n_components']
            del params_['n_components']
-        self.classifier.set_params(**params_)
+        self.learner.set_params(**params_)
    def fit(self, X, y):
        """
@ -59,8 +59,8 @@ class LowRankLogisticRegression(BaseEstimator):
        if nF > self.n_components:
            self.pca = TruncatedSVD(self.n_components).fit(X)
        X = self.transform(X)
-        self.classifier.fit(X, y)
+        self.learner.fit(X, y)
-        self.classes_ = self.classifier.classes_
+        self.classes_ = self.learner.classes_
        return self
    def predict(self, X):
@ -72,7 +72,7 @@ class LowRankLogisticRegression(BaseEstimator):
            instances in `X`
        """
        X = self.transform(X)
-        return self.classifier.predict(X)
+        return self.learner.predict(X)
    def predict_proba(self, X):
        """
@ -82,7 +82,7 @@ class LowRankLogisticRegression(BaseEstimator):
        :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities
        """
        X = self.transform(X)
-        return self.classifier.predict_proba(X)
+        return self.learner.predict_proba(X)
    def transform(self, X):
        """
--- a/quapy/data/datasets.py
+++ b/quapy/data/datasets.py
@ -207,7 +207,7 @@ def fetch_UCIDataset(dataset_name, data_home=None, test_split=0.3, verbose=False
    return Dataset(*data.split_stratified(1 - test_split, random_state=0))
-def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) -> LabelledCollection:
+def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) -> Dataset:
    """
    Loads a UCI collection as an instance of :class:`quapy.data.base.LabelledCollection`, as used in
    `Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017).
@ -223,7 +223,7 @@ def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) ->
    >>> import quapy as qp
    >>> collection = qp.datasets.fetch_UCILabelledCollection("yeast")
-    >>> for data in qp.domains.Dataset.kFCV(collection, nfolds=5, nrepeats=2):
+    >>> for data in qp.data.Dataset.kFCV(collection, nfolds=5, nrepeats=2):
    >>>     ...
    The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS`
@ -233,7 +233,7 @@ def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) ->
        ~/quay_data/ directory)
    :param test_split: proportion of documents to be included in the test set. The rest conforms the training set
    :param verbose: set to True (default is False) to get information (from the UCI ML repository) about the datasets
-    :return: a :class:`quapy.data.base.LabelledCollection` instance
+    :return: a :class:`quapy.data.base.Dataset` instance
    """
    assert dataset_name in UCI_DATASETS, \
--- a/quapy/error.py
+++ b/quapy/error.py
@ -1,13 +1,10 @@
-"""Implementation of error measures used for quantification"""
+import quapy as qp
 import numpy as np
 from sklearn.metrics import f1_score
 import quapy as qp
 def from_name(err_name):
-    """Gets an error function from its name. E.g., `from_name("mae")`
+    """Gets an error function from its name. E.g., `from_name("mae")` will return function :meth:`quapy.error.mae`
    will return function :meth:`quapy.error.mae`
    :param err_name: string, the error name
    :return: a callable implementing the requested error
@ -18,12 +15,10 @@ def from_name(err_name):
 def f1e(y_true, y_pred):
-    """F1 error: simply computes the error in terms of macro :math:`F_1`, i.e.,
+    """F1 error: simply computes the error in terms of macro :math:`F_1`, i.e., :math:`1-F_1^M`,
-    :math:`1-F_1^M`, where :math:`F_1` is the harmonic mean of precision and recall,
+    where :math:`F_1` is the harmonic mean of precision and recall, defined as :math:`\\frac{2tp}{2tp+fp+fn}`,
-    defined as :math:`\\frac{2tp}{2tp+fp+fn}`, with `tp`, `fp`, and `fn` standing
+    with `tp`, `fp`, and `fn` standing for true positives, false positives, and false negatives, respectively.
-    for true positives, false positives, and false negatives, respectively.
+    `Macro` averaging means the :math:`F_1` is computed for each category independently, and then averaged.
    `Macro` averaging means the :math:`F_1` is computed for each category independently,
    and then averaged.
    :param y_true: array-like of true labels
    :param y_pred: array-like of predicted labels
@ -33,9 +28,8 @@ def f1e(y_true, y_pred):
 def acce(y_true, y_pred):
-    """Computes the error in terms of 1-accuracy. The accuracy is computed as
+    """Computes the error in terms of 1-accuracy. The accuracy is computed as :math:`\\frac{tp+tn}{tp+fp+fn+tn}`, with
-    :math:`\\frac{tp+tn}{tp+fp+fn+tn}`, with `tp`, `fp`, `fn`, and `tn` standing
+    `tp`, `fp`, `fn`, and `tn` standing for true positives, false positives, false negatives, and true negatives,
    for true positives, false positives, false negatives, and true negatives,
    respectively
    :param y_true: array-like of true labels
@ -49,8 +43,7 @@ def mae(prevs, prevs_hat):
    """Computes the mean absolute error (see :meth:`quapy.error.ae`) across the sample pairs.
    :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
-    :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
+    :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
        prevalence values
    :return: mean absolute error
    """
    return ae(prevs, prevs_hat).mean()
@ -59,7 +52,7 @@ def mae(prevs, prevs_hat):
 def ae(prevs, prevs_hat):
    """Computes the absolute error between the two prevalence vectors.
     Absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}`  is computed as
-     :math:`AE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}|\\hat{p}(y)-p(y)|`,
+     :math:`AE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\in \mathcal{Y}}|\\hat{p}(y)-p(y)|`,
     where :math:`\\mathcal{Y}` are the classes of interest.
    :param prevs: array-like of shape `(n_classes,)` with the true prevalence values
@ -73,153 +66,129 @@ def ae(prevs, prevs_hat):
 def mse(prevs, prevs_hat):
    """Computes the mean squared error (see :meth:`quapy.error.se`) across the sample pairs.
-    :param prevs: array-like of shape `(n_samples, n_classes,)` with the
+    :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
-        true prevalence values
+    :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
    :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the
        predicted prevalence values
    :return: mean squared error
    """
    return se(prevs, prevs_hat).mean()
-def se(prevs, prevs_hat):
+def se(p, p_hat):
    """Computes the squared error between the two prevalence vectors.
     Squared error between two prevalence vectors :math:`p` and :math:`\\hat{p}`  is computed as
-     :math:`SE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}(\\hat{p}(y)-p(y))^2`,
+     :math:`SE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\in \mathcal{Y}}(\\hat{p}(y)-p(y))^2`, where
     where
     :math:`\\mathcal{Y}` are the classes of interest.
    :param prevs: array-like of shape `(n_classes,)` with the true prevalence values
    :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
    :return: absolute error
    """
-    return ((prevs_hat - prevs) ** 2).mean(axis=-1)
+    return ((p_hat-p)**2).mean(axis=-1)
 def mkld(prevs, prevs_hat, eps=None):
-    """Computes the mean Kullback-Leibler divergence (see :meth:`quapy.error.kld`) across the
+    """Computes the mean Kullback-Leibler divergence (see :meth:`quapy.error.kld`) across the sample pairs.
-    sample pairs. The distributions are smoothed using the `eps` factor
+    The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
    (see :meth:`quapy.error.smooth`).
-    :param prevs: array-like of shape `(n_samples, n_classes,)` with the true
+    :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
-        prevalence values
+    :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
-    :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
+    :param eps: smoothing factor. KLD is not defined in cases in which the distributions contain zeros; `eps`
-        prevalence values
+        is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
-    :param eps: smoothing factor. KLD is not defined in cases in which the distributions contain
+        will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
        zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.
        If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`
        (which has thus to be set beforehand).
    :return: mean Kullback-Leibler distribution
    """
    return kld(prevs, prevs_hat, eps).mean()
-def kld(prevs, prevs_hat, eps=None):
+def kld(p, p_hat, eps=None):
    """Computes the Kullback-Leibler divergence between the two prevalence distributions.
-     Kullback-Leibler divergence between two prevalence distributions :math:`p` and :math:`\\hat{p}`
+     Kullback-Leibler divergence between two prevalence distributions :math:`p` and :math:`\\hat{p}` is computed as
-     is computed as
+     :math:`KLD(p,\\hat{p})=D_{KL}(p||\\hat{p})=\\sum_{y\\in \\mathcal{Y}} p(y)\\log\\frac{p(y)}{\\hat{p}(y)}`, where
     :math:`KLD(p,\\hat{p})=D_{KL}(p||\\hat{p})=
     \\sum_{y\\in \\mathcal{Y}} p(y)\\log\\frac{p(y)}{\\hat{p}(y)}`,
     where :math:`\\mathcal{Y}` are the classes of interest.
     The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
    :param prevs: array-like of shape `(n_classes,)` with the true prevalence values
    :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
    :param eps: smoothing factor. KLD is not defined in cases in which the distributions contain
        zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.
        If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`
        (which has thus to be set beforehand).
    :return: Kullback-Leibler divergence between the two distributions
    """
    eps = __check_eps(eps)
    smooth_prevs = prevs + eps
    smooth_prevs_hat = prevs_hat + eps
    return (smooth_prevs*np.log(smooth_prevs/smooth_prevs_hat)).sum(axis=-1)
 def mnkld(prevs, prevs_hat, eps=None):
    """Computes the mean Normalized Kullback-Leibler divergence (see :meth:`quapy.error.nkld`)
    across the sample pairs. The distributions are smoothed using the `eps` factor
    (see :meth:`quapy.error.smooth`).
    :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
    :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
        prevalence values
    :param eps: smoothing factor. NKLD is not defined in cases in which the distributions contain
        zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.
        If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`
        (which has thus to be set beforehand).
    :return: mean Normalized Kullback-Leibler distribution
    """
    return nkld(prevs, prevs_hat, eps).mean()
 def nkld(prevs, prevs_hat, eps=None):
    """Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.
     Normalized Kullback-Leibler divergence between two prevalence distributions :math:`p` and
     :math:`\\hat{p}` is computed as
     math:`NKLD(p,\\hat{p}) = 2\\frac{e^{KLD(p,\\hat{p})}}{e^{KLD(p,\\hat{p})}+1}-1`,
     where
     :math:`\\mathcal{Y}` are the classes of interest.
     The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
    :param prevs: array-like of shape `(n_classes,)` with the true prevalence values
    :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
-    :param eps: smoothing factor. NKLD is not defined in cases in which the distributions
+    :param eps: smoothing factor. KLD is not defined in cases in which the distributions contain zeros; `eps`
-        contain zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample
+        is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
-        size. If `eps=None`, the sample size will be taken from the environment variable
+        will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
-        `SAMPLE_SIZE` (which has thus to be set beforehand).
+    :return: Kullback-Leibler divergence between the two distributions
    """
    eps = __check_eps(eps)
    sp = p+eps
    sp_hat = p_hat + eps
    return (sp*np.log(sp/sp_hat)).sum(axis=-1)
 def mnkld(prevs, prevs_hat, eps=None):
    """Computes the mean Normalized Kullback-Leibler divergence (see :meth:`quapy.error.nkld`) across the sample pairs.
    The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
    :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
    :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
    :param eps: smoothing factor. NKLD is not defined in cases in which the distributions contain zeros; `eps`
        is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
        will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
    :return: mean Normalized Kullback-Leibler distribution
    """
    return nkld(prevs, prevs_hat, eps).mean()
 def nkld(p, p_hat, eps=None):
    """Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.
     Normalized Kullback-Leibler divergence between two prevalence distributions :math:`p` and :math:`\\hat{p}`
     is computed as :math:`NKLD(p,\\hat{p}) = 2\\frac{e^{KLD(p,\\hat{p})}}{e^{KLD(p,\\hat{p})}+1}-1`, where
     :math:`\\mathcal{Y}` are the classes of interest.
     The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
    :param prevs: array-like of shape `(n_classes,)` with the true prevalence values
    :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
    :param eps: smoothing factor. NKLD is not defined in cases in which the distributions contain zeros; `eps`
        is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
        will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
    :return: Normalized Kullback-Leibler divergence between the two distributions
    """
-    ekld = np.exp(kld(prevs, prevs_hat, eps))
+    ekld = np.exp(kld(p, p_hat, eps))
    return 2. * ekld / (1 + ekld) - 1.
-def mrae(prevs, prevs_hat, eps=None):
+def mrae(p, p_hat, eps=None):
-    """Computes the mean relative absolute error (see :meth:`quapy.error.rae`) across
+    """Computes the mean relative absolute error (see :meth:`quapy.error.rae`) across the sample pairs.
-    the sample pairs. The distributions are smoothed using the `eps` factor (see
+    The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
    :meth:`quapy.error.smooth`).
-    :param prevs: array-like of shape `(n_samples, n_classes,)` with the true
+    :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
-        prevalence values
+    :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
-    :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
+    :param eps: smoothing factor. `mrae` is not defined in cases in which the true distribution contains zeros; `eps`
-        prevalence values
+        is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
-    :param eps: smoothing factor. `mrae` is not defined in cases in which the true
+        will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
        distribution contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`,
        with :math:`T` the sample size. If `eps=None`, the sample size will be taken from
        the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
    :return: mean relative absolute error
    """
-    return rae(prevs, prevs_hat, eps).mean()
+    return rae(p, p_hat, eps).mean()
-def rae(prevs, prevs_hat, eps=None):
+def rae(p, p_hat, eps=None):
    """Computes the absolute relative error between the two prevalence vectors.
-     Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}`
+     Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}`  is computed as
-     is computed as
+     :math:`RAE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\in \mathcal{Y}}\\frac{|\\hat{p}(y)-p(y)|}{p(y)}`,
     :math:`RAE(p,\\hat{p})=
     \\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}\\frac{|\\hat{p}(y)-p(y)|}{p(y)}`,
     where :math:`\\mathcal{Y}` are the classes of interest.
     The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
    :param prevs: array-like of shape `(n_classes,)` with the true prevalence values
    :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
-    :param eps: smoothing factor. `rae` is not defined in cases in which the true distribution
+    :param eps: smoothing factor. `rae` is not defined in cases in which the true distribution contains zeros; `eps`
-        contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the
+        is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
-        sample size. If `eps=None`, the sample size will be taken from the environment variable
+        will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
        `SAMPLE_SIZE` (which has thus to be set beforehand).
    :return: relative absolute error
    """
    eps = __check_eps(eps)
-    prevs = smooth(prevs, eps)
+    p = smooth(p, eps)
-    prevs_hat = smooth(prevs_hat, eps)
+    p_hat = smooth(p_hat, eps)
-    return (abs(prevs - prevs_hat) / prevs).mean(axis=-1)
+    return (abs(p-p_hat)/p).mean(axis=-1)
 def smooth(prevs, eps):
-    """ Smooths a prevalence distribution with :math:`\\epsilon` (`eps`) as:
+    """ Smooths a prevalence distribution with :math:`\epsilon` (`eps`) as:
-    :math:`\\underline{p}(y)=\\frac{\\epsilon+p(y)}{\\epsilon|\\mathcal{Y}|+
+    :math:`\\underline{p}(y)=\\frac{\\epsilon+p(y)}{\\epsilon|\\mathcal{Y}|+\\displaystyle\\sum_{y\\in \\mathcal{Y}}p(y)}`
    \\displaystyle\\sum_{y\\in \\mathcal{Y}}p(y)}`
    :param prevs: array-like of shape `(n_classes,)` with the true prevalence values
    :param eps: smoothing factor
@ -231,10 +200,12 @@ def smooth(prevs, eps):
 def __check_eps(eps=None):
    if eps is None:
        import quapy as qp
        sample_size = qp.environ['SAMPLE_SIZE']
        if sample_size is None:
            raise ValueError('eps was not defined, and qp.environ["SAMPLE_SIZE"] was not set')
-        eps = 1. / (2. * sample_size)
+        else:
            eps = 1. / (2. * sample_size)
    return eps
@ -246,8 +217,7 @@ CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
 QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
 QUANTIFICATION_ERROR_SINGLE_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR_SINGLE}
 QUANTIFICATION_ERROR_SMOOTH_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR_SMOOTH}
-ERROR_NAMES = \
+ERROR_NAMES = CLASSIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_SINGLE_NAMES
    CLASSIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_SINGLE_NAMES
 f1_error = f1e
 acc_error = acce
@ -255,3 +225,4 @@ mean_absolute_error = mae
 absolute_error = ae
 mean_relative_absolute_error = mrae
 relative_absolute_error = rae
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -444,28 +444,24 @@ class EMQ(AggregativeProbabilisticQuantifier):
    def __init__(self, classifier: BaseEstimator, exact_train_prev=True, recalib=None):
        self.classifier = classifier
        self.non_calibrated = classifier
        self.exact_train_prev = exact_train_prev
        self.recalib = recalib
    def fit(self, data: LabelledCollection, fit_classifier=True):
        if self.recalib is not None:
            if self.recalib == 'nbvs':
-                self.classifier = NBVSCalibration(self.non_calibrated)
+                self.classifier = NBVSCalibration(self.classifier)
            elif self.recalib == 'bcts':
-                self.classifier = BCTSCalibration(self.non_calibrated)
+                self.classifier = BCTSCalibration(self.classifier)
            elif self.recalib == 'ts':
-                self.classifier = TSCalibration(self.non_calibrated)
+                self.classifier = TSCalibration(self.classifier)
            elif self.recalib == 'vs':
-                self.classifier = VSCalibration(self.non_calibrated)
+                self.classifier = VSCalibration(self.classifier)
            elif self.recalib == 'platt':
                self.classifier = CalibratedClassifierCV(self.classifier, ensemble=False)
            else:
                raise ValueError('invalid param argument for recalibration method; available ones are '
                                 '"nbvs", "bcts", "ts", and "vs".')
            self.recalib = None
        else:
            self.classifier = self.non_calibrated
        self.classifier, _ = _training_helper(self.classifier, data, fit_classifier, ensure_probabilistic=True)
        if self.exact_train_prev:
            self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_)
@ -770,9 +766,7 @@ class DistributionMatching(AggregativeProbabilisticQuantifier):
        """
        Trains the classifier (if requested) and generates the validation distributions out of the training data.
        The validation distributions have shape `(n, ch, nbins)`, with `n` the number of classes, `ch` the number of
-        channels (a channel is a description, in form of a histogram, of a specific class -- there are as many channels
+        channels, and `nbins` the number of bins. In particular, let `V` be the validation distributions; `di=V[i]`
        as classes, although in the binary case one can use only one channel, since the other one is constrained),
        and `nbins` the number of bins. In particular, let `V` be the validation distributions; `di=V[i]`
        are the distributions obtained from training data labelled with class `i`; `dij = di[j]` is the discrete
        distribution of posterior probabilities `P(Y=j|X=x)` for training data labelled with class `i`, and `dij[k]`
        is the fraction of instances with a value in the `k`-th bin.
@ -821,7 +815,7 @@ class DistributionMatching(AggregativeProbabilisticQuantifier):
        uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
        # solutions are bounded to those contained in the unit-simplex
-        bounds = tuple((0, 1) for _ in range(n_classes))  # values in [0,1]
+        bounds = tuple((0, 1) for x in range(n_classes))  # values in [0,1]
        constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)})  # values summing up to 1
        r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
        return r.x
--- a/quapy/method/neural.py
+++ b/quapy/method/neural.py
@ -9,7 +9,6 @@ from torch.nn.functional import relu
 from quapy.protocol import UPP
 from quapy.method.aggregative import *
 from quapy.util import EarlyStop
 from tqdm import tqdm
 class QuaNetTrainer(BaseQuantifier):
@ -29,7 +28,7 @@ class QuaNetTrainer(BaseQuantifier):
    >>>
    >>> # load the kindle dataset as text, and convert words to numerical indexes
    >>> dataset = qp.datasets.fetch_reviews('kindle', pickle=True)
-    >>> qp.domains.preprocessing.index(dataset, min_df=5, inplace=True)
+    >>> qp.data.preprocessing.index(dataset, min_df=5, inplace=True)
    >>>
    >>> # the text classifier is a CNN trained by NeuralClassifierTrainer
    >>> cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes)
@ -264,19 +263,15 @@ class QuaNetTrainer(BaseQuantifier):
                                     f'patience={early_stop.patience}/{early_stop.PATIENCE_LIMIT}')
    def get_params(self, deep=True):
-        classifier_params = self.classifier.get_params()
+        return {**self.classifier.get_params(), **self.quanet_params}
        classifier_params = {'classifier__'+k:v for k,v in classifier_params.items()}
        return {**classifier_params, **self.quanet_params}
    def set_params(self, **parameters):
        learner_params = {}
        for key, val in parameters.items():
            if key in self.quanet_params:
                self.quanet_params[key] = val
            elif key.startswith('classifier__'):
                learner_params[key.replace('classifier__', '')] = val
            else:
-                raise ValueError('unknown parameter ', key)
+                learner_params[key] = val
        self.classifier.set_params(**learner_params)
    def __check_params_colision(self, quanet_params, learner_params):
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@ -56,7 +56,7 @@ class GridSearchQ(BaseQuantifier):
    def _sout(self, msg):
        if self.verbose:
-            print(f'[{self.__class__.__name__}:{self.model.__class__.__name__}]: {msg}')
+            print(f'[{self.__class__.__name__}]: {msg}')
    def __check_error(self, error):
        if error in qp.error.QUANTIFICATION_ERROR:
--- a/quapy/plot.py
+++ b/quapy/plot.py
@ -383,9 +383,6 @@ def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs
    # x_error function) and 'y' is the estim-test shift (computed as according to y_error)
    data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order)
    if method_order is None:
        method_order = method_names
    if binning == 'isomerous':
        # take bins containing the same amount of examples
        tr_test_drifts = np.concatenate([data[m]['x'] for m in method_order])
--- a/setup.py
+++ b/setup.py
@ -89,6 +89,8 @@ setup(
        'License :: OSI Approved :: BSD License',
        'Programming Language :: Python :: 3',
        'Programming Language :: Python :: 3.6',
        'Programming Language :: Python :: 3.7',
        'Programming Language :: Python :: 3.8',
        'Programming Language :: Python :: 3.9',
        'Programming Language :: Python :: 3 :: Only',
@ -111,7 +113,7 @@ setup(
    #
    packages=find_packages(include=['quapy', 'quapy.*']),  # Required
-    python_requires='>=3.8, <4',
+    python_requires='>=3.6, <4',
    install_requires=['scikit-learn', 'pandas', 'tqdm', 'matplotlib', 'joblib', 'xlrd', 'abstention'],