Compare commits

..

No commits in common. "ca25f1d601f5a3fb69e486a13332403a09ebea9f" and "140ab3bfc9a7cf398e49e601f1c1a16a9fdb8e5c" have entirely different histories.

24 changed files with 208 additions and 1027 deletions

View File

@ -1,23 +0,0 @@
name: Pylint
on: [push]
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint
- name: Analysing the code with pylint
run: |
pylint $(git ls-files '*.py')

View File

@ -1,6 +1,3 @@
ensembles seem to be broken; they have an internal model selection which takes the parameters, but since quapy now
works with protocols it would need to know the validation set in order to pass something like
"protocol: APP(val, etc.)"
sample_size should not be mandatory when qp.environ['SAMPLE_SIZE'] has been specified sample_size should not be mandatory when qp.environ['SAMPLE_SIZE'] has been specified
clean all the cumbersome methods that have to be implemented for new quantifiers (e.g., n_classes_ prop, etc.) clean all the cumbersome methods that have to be implemented for new quantifiers (e.g., n_classes_ prop, etc.)
make truly parallel the GridSearchQ make truly parallel the GridSearchQ

View File

@ -224,6 +224,8 @@
<li><a href="quapy.html#quapy.util.create_parent_dir">create_parent_dir() (in module quapy.util)</a> <li><a href="quapy.html#quapy.util.create_parent_dir">create_parent_dir() (in module quapy.util)</a>
</li> </li>
<li><a href="quapy.method.html#quapy.method.aggregative.cross_generate_predictions">cross_generate_predictions() (in module quapy.method.aggregative)</a> <li><a href="quapy.method.html#quapy.method.aggregative.cross_generate_predictions">cross_generate_predictions() (in module quapy.method.aggregative)</a>
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.cross_generate_predictions_depr">cross_generate_predictions_depr() (in module quapy.method.aggregative)</a>
</li> </li>
<li><a href="quapy.html#quapy.model_selection.cross_val_predict">cross_val_predict() (in module quapy.model_selection)</a> <li><a href="quapy.html#quapy.model_selection.cross_val_predict">cross_val_predict() (in module quapy.model_selection)</a>
</li> </li>

Binary file not shown.

View File

@ -316,14 +316,11 @@ fitting <cite>TruncatedSVD</cite> and then <cite>LogisticRegression</cite> on th
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.methods.LowRankLogisticRegression.get_params"> <dt class="sig sig-object py" id="quapy.classification.methods.LowRankLogisticRegression.get_params">
<span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">deep</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.get_params" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.get_params" title="Permalink to this definition"></a></dt>
<dd><p>Get hyper-parameters for this estimator.</p> <dd><p>Get hyper-parameters for this estimator.</p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>deep</strong> compatibility with sklearn</p> <dd class="field-odd"><p>a dictionary with parameter names mapped to their values</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a dictionary with parameter names mapped to their values</p>
</dd> </dd>
</dl> </dl>
</dd></dl> </dd></dl>
@ -527,7 +524,7 @@ dimensionality of the embedding</p>
<dl class="py class"> <dl class="py class">
<dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer"> <dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.classification.neural.</span></span><span class="sig-name descname"><span class="pre">NeuralClassifierTrainer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">net</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.classification.neural.TextClassifierNet" title="quapy.classification.neural.TextClassifierNet"><span class="pre">TextClassifierNet</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">lr</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.001</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">weight_decay</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">patience</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">epochs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">200</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">64</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size_test</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">512</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">padding_length</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">300</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'cuda'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">checkpointpath</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'../checkpoint/classifier_net.dat'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer" title="Permalink to this definition"></a></dt> <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.classification.neural.</span></span><span class="sig-name descname"><span class="pre">NeuralClassifierTrainer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">net</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.classification.neural.TextClassifierNet" title="quapy.classification.neural.TextClassifierNet"><span class="pre">TextClassifierNet</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">lr</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.001</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">weight_decay</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">patience</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">epochs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">200</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">64</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size_test</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">512</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">padding_length</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">300</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'cpu'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">checkpointpath</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'../checkpoint/classifier_net.dat'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p> <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<p>Trains a neural network for text classification.</p> <p>Trains a neural network for text classification.</p>
<dl class="field-list simple"> <dl class="field-list simple">

View File

@ -447,8 +447,8 @@ index.</p>
<span class="sig-name descname"><span class="pre">sampling_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shuffle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling_index" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">sampling_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shuffle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling_index" title="Permalink to this definition"></a></dt>
<dd><p>Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the <dd><p>Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the
prevalence values are not specified, then returns the index of a uniform sampling. prevalence values are not specified, then returns the index of a uniform sampling.
For each class, the sampling is drawn with replacement if the requested prevalence is larger than For each class, the sampling is drawn without replacement if the requested prevalence is larger than
the actual prevalence of the class, or without replacement otherwise.</p> the actual prevalence of the class, or with replacement otherwise.</p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple"> <dd class="field-odd"><ul class="simple">
@ -534,7 +534,7 @@ values for each class)</p>
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.uniform_sampling"> <dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.uniform_sampling">
<span class="sig-name descname"><span class="pre">uniform_sampling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.uniform_sampling" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">uniform_sampling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.uniform_sampling" title="Permalink to this definition"></a></dt>
<dd><p>Returns a uniform sample (an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>) of desired size. The sampling is drawn <dd><p>Returns a uniform sample (an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>) of desired size. The sampling is drawn
with replacement if the requested size is greater than the number of instances, or without replacement without replacement if the requested size is greater than the number of instances, or with replacement
otherwise.</p> otherwise.</p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Parameters<span class="colon">:</span></dt>
@ -553,7 +553,7 @@ otherwise.</p>
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.uniform_sampling_index"> <dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.uniform_sampling_index">
<span class="sig-name descname"><span class="pre">uniform_sampling_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.uniform_sampling_index" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">uniform_sampling_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.uniform_sampling_index" title="Permalink to this definition"></a></dt>
<dd><p>Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn <dd><p>Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn
with replacement if the requested size is greater than the number of instances, or without replacement without replacement if the requested size is greater than the number of instances, or with replacement
otherwise.</p> otherwise.</p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Parameters<span class="colon">:</span></dt>

View File

@ -61,7 +61,6 @@
</section> </section>
<section id="module-quapy.error"> <section id="module-quapy.error">
<span id="quapy-error"></span><h2>quapy.error<a class="headerlink" href="#module-quapy.error" title="Permalink to this heading"></a></h2> <span id="quapy-error"></span><h2>quapy.error<a class="headerlink" href="#module-quapy.error" title="Permalink to this heading"></a></h2>
<p>Implementation of error measures used for quantification</p>
<dl class="py function"> <dl class="py function">
<dt class="sig sig-object py" id="quapy.error.absolute_error"> <dt class="sig sig-object py" id="quapy.error.absolute_error">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.absolute_error" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.absolute_error" title="Permalink to this definition"></a></dt>
@ -87,9 +86,8 @@ where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the
<dl class="py function"> <dl class="py function">
<dt class="sig sig-object py" id="quapy.error.acc_error"> <dt class="sig sig-object py" id="quapy.error.acc_error">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">acc_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.acc_error" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">acc_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.acc_error" title="Permalink to this definition"></a></dt>
<dd><p>Computes the error in terms of 1-accuracy. The accuracy is computed as <dd><p>Computes the error in terms of 1-accuracy. The accuracy is computed as <span class="math notranslate nohighlight">\(\frac{tp+tn}{tp+fp+fn+tn}\)</span>, with
<span class="math notranslate nohighlight">\(\frac{tp+tn}{tp+fp+fn+tn}\)</span>, with <cite>tp</cite>, <cite>fp</cite>, <cite>fn</cite>, and <cite>tn</cite> standing <cite>tp</cite>, <cite>fp</cite>, <cite>fn</cite>, and <cite>tn</cite> standing for true positives, false positives, false negatives, and true negatives,
for true positives, false positives, false negatives, and true negatives,
respectively</p> respectively</p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Parameters<span class="colon">:</span></dt>
@ -107,9 +105,8 @@ respectively</p>
<dl class="py function"> <dl class="py function">
<dt class="sig sig-object py" id="quapy.error.acce"> <dt class="sig sig-object py" id="quapy.error.acce">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">acce</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.acce" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">acce</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.acce" title="Permalink to this definition"></a></dt>
<dd><p>Computes the error in terms of 1-accuracy. The accuracy is computed as <dd><p>Computes the error in terms of 1-accuracy. The accuracy is computed as <span class="math notranslate nohighlight">\(\frac{tp+tn}{tp+fp+fn+tn}\)</span>, with
<span class="math notranslate nohighlight">\(\frac{tp+tn}{tp+fp+fn+tn}\)</span>, with <cite>tp</cite>, <cite>fp</cite>, <cite>fn</cite>, and <cite>tn</cite> standing <cite>tp</cite>, <cite>fp</cite>, <cite>fn</cite>, and <cite>tn</cite> standing for true positives, false positives, false negatives, and true negatives,
for true positives, false positives, false negatives, and true negatives,
respectively</p> respectively</p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Parameters<span class="colon">:</span></dt>
@ -149,12 +146,10 @@ where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the
<dl class="py function"> <dl class="py function">
<dt class="sig sig-object py" id="quapy.error.f1_error"> <dt class="sig sig-object py" id="quapy.error.f1_error">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">f1_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.f1_error" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">f1_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.f1_error" title="Permalink to this definition"></a></dt>
<dd><p>F1 error: simply computes the error in terms of macro <span class="math notranslate nohighlight">\(F_1\)</span>, i.e., <dd><p>F1 error: simply computes the error in terms of macro <span class="math notranslate nohighlight">\(F_1\)</span>, i.e., <span class="math notranslate nohighlight">\(1-F_1^M\)</span>,
<span class="math notranslate nohighlight">\(1-F_1^M\)</span>, where <span class="math notranslate nohighlight">\(F_1\)</span> is the harmonic mean of precision and recall, where <span class="math notranslate nohighlight">\(F_1\)</span> is the harmonic mean of precision and recall, defined as <span class="math notranslate nohighlight">\(\frac{2tp}{2tp+fp+fn}\)</span>,
defined as <span class="math notranslate nohighlight">\(\frac{2tp}{2tp+fp+fn}\)</span>, with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing for true positives, false positives, and false negatives, respectively.
for true positives, false positives, and false negatives, respectively. <cite>Macro</cite> averaging means the <span class="math notranslate nohighlight">\(F_1\)</span> is computed for each category independently, and then averaged.</p>
<cite>Macro</cite> averaging means the <span class="math notranslate nohighlight">\(F_1\)</span> is computed for each category independently,
and then averaged.</p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple"> <dd class="field-odd"><ul class="simple">
@ -171,12 +166,10 @@ and then averaged.</p>
<dl class="py function"> <dl class="py function">
<dt class="sig sig-object py" id="quapy.error.f1e"> <dt class="sig sig-object py" id="quapy.error.f1e">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">f1e</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.f1e" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">f1e</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.f1e" title="Permalink to this definition"></a></dt>
<dd><p>F1 error: simply computes the error in terms of macro <span class="math notranslate nohighlight">\(F_1\)</span>, i.e., <dd><p>F1 error: simply computes the error in terms of macro <span class="math notranslate nohighlight">\(F_1\)</span>, i.e., <span class="math notranslate nohighlight">\(1-F_1^M\)</span>,
<span class="math notranslate nohighlight">\(1-F_1^M\)</span>, where <span class="math notranslate nohighlight">\(F_1\)</span> is the harmonic mean of precision and recall, where <span class="math notranslate nohighlight">\(F_1\)</span> is the harmonic mean of precision and recall, defined as <span class="math notranslate nohighlight">\(\frac{2tp}{2tp+fp+fn}\)</span>,
defined as <span class="math notranslate nohighlight">\(\frac{2tp}{2tp+fp+fn}\)</span>, with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing for true positives, false positives, and false negatives, respectively.
for true positives, false positives, and false negatives, respectively. <cite>Macro</cite> averaging means the <span class="math notranslate nohighlight">\(F_1\)</span> is computed for each category independently, and then averaged.</p>
<cite>Macro</cite> averaging means the <span class="math notranslate nohighlight">\(F_1\)</span> is computed for each category independently,
and then averaged.</p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple"> <dd class="field-odd"><ul class="simple">
@ -193,8 +186,7 @@ and then averaged.</p>
<dl class="py function"> <dl class="py function">
<dt class="sig sig-object py" id="quapy.error.from_name"> <dt class="sig sig-object py" id="quapy.error.from_name">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">from_name</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">err_name</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.from_name" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">from_name</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">err_name</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.from_name" title="Permalink to this definition"></a></dt>
<dd><p>Gets an error function from its name. E.g., <cite>from_name(“mae”)</cite> <dd><p>Gets an error function from its name. E.g., <cite>from_name(“mae”)</cite> will return function <a class="reference internal" href="#quapy.error.mae" title="quapy.error.mae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.mae()</span></code></a></p>
will return function <a class="reference internal" href="#quapy.error.mae" title="quapy.error.mae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.mae()</span></code></a></p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>err_name</strong> string, the error name</p> <dd class="field-odd"><p><strong>err_name</strong> string, the error name</p>
@ -207,13 +199,11 @@ will return function <a class="reference internal" href="#quapy.error.mae" title
<dl class="py function"> <dl class="py function">
<dt class="sig sig-object py" id="quapy.error.kld"> <dt class="sig sig-object py" id="quapy.error.kld">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">kld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.kld" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">kld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.kld" title="Permalink to this definition"></a></dt>
<dd><dl class="simple"> <dd><dl class="simple">
<dt>Computes the Kullback-Leibler divergence between the two prevalence distributions.</dt><dd><p>Kullback-Leibler divergence between two prevalence distributions <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> <dt>Computes the Kullback-Leibler divergence between the two prevalence distributions.</dt><dd><p>Kullback-Leibler divergence between two prevalence distributions <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
is computed as <span class="math notranslate nohighlight">\(KLD(p,\hat{p})=D_{KL}(p||\hat{p})=\sum_{y\in \mathcal{Y}} p(y)\log\frac{p(y)}{\hat{p}(y)}\)</span>, where
<span class="math notranslate nohighlight">\(KLD(p,\hat{p})=D_{KL}(p||\hat{p})= <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
\sum_{y\in \mathcal{Y}} p(y)\log\frac{p(y)}{\hat{p}(y)}\)</span>,
where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p> The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
</dd> </dd>
</dl> </dl>
@ -222,10 +212,9 @@ The distributions are smoothed using the <cite>eps</cite> factor (see <a class="
<dd class="field-odd"><ul class="simple"> <dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li> <li><p><strong>prevs</strong> array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li> <li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
<li><p><strong>eps</strong> smoothing factor. KLD is not defined in cases in which the distributions contain <li><p><strong>eps</strong> smoothing factor. KLD is not defined in cases in which the distributions contain zeros; <cite>eps</cite>
zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
If <cite>eps=None</cite>, the sample size will be taken from the environment variable <cite>SAMPLE_SIZE</cite> will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
(which has thus to be set beforehand).</p></li>
</ul> </ul>
</dd> </dd>
<dt class="field-even">Returns<span class="colon">:</span></dt> <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -242,8 +231,7 @@ If <cite>eps=None</cite>, the sample size will be taken from the environment var
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple"> <dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li> <li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted <li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
prevalence values</p></li>
</ul> </ul>
</dd> </dd>
<dt class="field-even">Returns<span class="colon">:</span></dt> <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -260,8 +248,7 @@ prevalence values</p></li>
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple"> <dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li> <li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted <li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
prevalence values</p></li>
</ul> </ul>
</dd> </dd>
<dt class="field-even">Returns<span class="colon">:</span></dt> <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -272,21 +259,17 @@ prevalence values</p></li>
<dl class="py function"> <dl class="py function">
<dt class="sig sig-object py" id="quapy.error.mean_relative_absolute_error"> <dt class="sig sig-object py" id="quapy.error.mean_relative_absolute_error">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mean_relative_absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mean_relative_absolute_error" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mean_relative_absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mean_relative_absolute_error" title="Permalink to this definition"></a></dt>
<dd><p>Computes the mean relative absolute error (see <a class="reference internal" href="#quapy.error.rae" title="quapy.error.rae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.rae()</span></code></a>) across <dd><p>Computes the mean relative absolute error (see <a class="reference internal" href="#quapy.error.rae" title="quapy.error.rae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.rae()</span></code></a>) across the sample pairs.
the sample pairs. The distributions are smoothed using the <cite>eps</cite> factor (see The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
<a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple"> <dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true <li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
prevalence values</p></li> <li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted <li><p><strong>eps</strong> smoothing factor. <cite>mrae</cite> is not defined in cases in which the true distribution contains zeros; <cite>eps</cite>
prevalence values</p></li> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
<li><p><strong>eps</strong> smoothing factor. <cite>mrae</cite> is not defined in cases in which the true will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
distribution contains zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>,
with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size will be taken from
the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
</ul> </ul>
</dd> </dd>
<dt class="field-even">Returns<span class="colon">:</span></dt> <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -298,20 +281,16 @@ the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set befo
<dl class="py function"> <dl class="py function">
<dt class="sig sig-object py" id="quapy.error.mkld"> <dt class="sig sig-object py" id="quapy.error.mkld">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mkld" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mkld" title="Permalink to this definition"></a></dt>
<dd><p>Computes the mean Kullback-Leibler divergence (see <a class="reference internal" href="#quapy.error.kld" title="quapy.error.kld"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.kld()</span></code></a>) across the <dd><p>Computes the mean Kullback-Leibler divergence (see <a class="reference internal" href="#quapy.error.kld" title="quapy.error.kld"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.kld()</span></code></a>) across the sample pairs.
sample pairs. The distributions are smoothed using the <cite>eps</cite> factor The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
(see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple"> <dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true <li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
prevalence values</p></li> <li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted <li><p><strong>eps</strong> smoothing factor. KLD is not defined in cases in which the distributions contain zeros; <cite>eps</cite>
prevalence values</p></li> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
<li><p><strong>eps</strong> smoothing factor. KLD is not defined in cases in which the distributions contain will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size.
If <cite>eps=None</cite>, the sample size will be taken from the environment variable <cite>SAMPLE_SIZE</cite>
(which has thus to be set beforehand).</p></li>
</ul> </ul>
</dd> </dd>
<dt class="field-even">Returns<span class="colon">:</span></dt> <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -323,19 +302,16 @@ If <cite>eps=None</cite>, the sample size will be taken from the environment var
<dl class="py function"> <dl class="py function">
<dt class="sig sig-object py" id="quapy.error.mnkld"> <dt class="sig sig-object py" id="quapy.error.mnkld">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mnkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mnkld" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mnkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mnkld" title="Permalink to this definition"></a></dt>
<dd><p>Computes the mean Normalized Kullback-Leibler divergence (see <a class="reference internal" href="#quapy.error.nkld" title="quapy.error.nkld"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.nkld()</span></code></a>) <dd><p>Computes the mean Normalized Kullback-Leibler divergence (see <a class="reference internal" href="#quapy.error.nkld" title="quapy.error.nkld"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.nkld()</span></code></a>) across the sample pairs.
across the sample pairs. The distributions are smoothed using the <cite>eps</cite> factor The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
(see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple"> <dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li> <li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted <li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
prevalence values</p></li> <li><p><strong>eps</strong> smoothing factor. NKLD is not defined in cases in which the distributions contain zeros; <cite>eps</cite>
<li><p><strong>eps</strong> smoothing factor. NKLD is not defined in cases in which the distributions contain is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
If <cite>eps=None</cite>, the sample size will be taken from the environment variable <cite>SAMPLE_SIZE</cite>
(which has thus to be set beforehand).</p></li>
</ul> </ul>
</dd> </dd>
<dt class="field-even">Returns<span class="colon">:</span></dt> <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -346,21 +322,17 @@ If <cite>eps=None</cite>, the sample size will be taken from the environment var
<dl class="py function"> <dl class="py function">
<dt class="sig sig-object py" id="quapy.error.mrae"> <dt class="sig sig-object py" id="quapy.error.mrae">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mrae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mrae" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mrae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mrae" title="Permalink to this definition"></a></dt>
<dd><p>Computes the mean relative absolute error (see <a class="reference internal" href="#quapy.error.rae" title="quapy.error.rae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.rae()</span></code></a>) across <dd><p>Computes the mean relative absolute error (see <a class="reference internal" href="#quapy.error.rae" title="quapy.error.rae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.rae()</span></code></a>) across the sample pairs.
the sample pairs. The distributions are smoothed using the <cite>eps</cite> factor (see The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
<a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple"> <dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true <li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
prevalence values</p></li> <li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted <li><p><strong>eps</strong> smoothing factor. <cite>mrae</cite> is not defined in cases in which the true distribution contains zeros; <cite>eps</cite>
prevalence values</p></li> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
<li><p><strong>eps</strong> smoothing factor. <cite>mrae</cite> is not defined in cases in which the true will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
distribution contains zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>,
with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size will be taken from
the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
</ul> </ul>
</dd> </dd>
<dt class="field-even">Returns<span class="colon">:</span></dt> <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -376,10 +348,8 @@ the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set befo
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple"> <dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the <li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
true prevalence values</p></li> <li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the
predicted prevalence values</p></li>
</ul> </ul>
</dd> </dd>
<dt class="field-even">Returns<span class="colon">:</span></dt> <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -390,12 +360,10 @@ predicted prevalence values</p></li>
<dl class="py function"> <dl class="py function">
<dt class="sig sig-object py" id="quapy.error.nkld"> <dt class="sig sig-object py" id="quapy.error.nkld">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">nkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.nkld" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">nkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.nkld" title="Permalink to this definition"></a></dt>
<dd><dl class="simple"> <dd><dl class="simple">
<dt>Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.</dt><dd><p>Normalized Kullback-Leibler divergence between two prevalence distributions <span class="math notranslate nohighlight">\(p\)</span> and <dt>Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.</dt><dd><p>Normalized Kullback-Leibler divergence between two prevalence distributions <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span>
<span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as is computed as <span class="math notranslate nohighlight">\(NKLD(p,\hat{p}) = 2\frac{e^{KLD(p,\hat{p})}}{e^{KLD(p,\hat{p})}+1}-1\)</span>, where
math:<cite>NKLD(p,hat{p}) = 2frac{e^{KLD(p,hat{p})}}{e^{KLD(p,hat{p})}+1}-1</cite>,
where
<span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest. <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p> The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
</dd> </dd>
@ -405,10 +373,9 @@ The distributions are smoothed using the <cite>eps</cite> factor (see <a class="
<dd class="field-odd"><ul class="simple"> <dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li> <li><p><strong>prevs</strong> array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li> <li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
<li><p><strong>eps</strong> smoothing factor. NKLD is not defined in cases in which the distributions <li><p><strong>eps</strong> smoothing factor. NKLD is not defined in cases in which the distributions contain zeros; <cite>eps</cite>
contain zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
size. If <cite>eps=None</cite>, the sample size will be taken from the environment variable will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
<cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
</ul> </ul>
</dd> </dd>
<dt class="field-even">Returns<span class="colon">:</span></dt> <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -419,12 +386,10 @@ size. If <cite>eps=None</cite>, the sample size will be taken from the environme
<dl class="py function"> <dl class="py function">
<dt class="sig sig-object py" id="quapy.error.rae"> <dt class="sig sig-object py" id="quapy.error.rae">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">rae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.rae" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">rae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.rae" title="Permalink to this definition"></a></dt>
<dd><dl class="simple"> <dd><dl class="simple">
<dt>Computes the absolute relative error between the two prevalence vectors.</dt><dd><p>Relative absolute error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> <dt>Computes the absolute relative error between the two prevalence vectors.</dt><dd><p>Relative absolute error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
is computed as <span class="math notranslate nohighlight">\(RAE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}\frac{|\hat{p}(y)-p(y)|}{p(y)}\)</span>,
<span class="math notranslate nohighlight">\(RAE(p,\hat{p})=
\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}\frac{|\hat{p}(y)-p(y)|}{p(y)}\)</span>,
where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest. where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p> The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
</dd> </dd>
@ -434,10 +399,9 @@ The distributions are smoothed using the <cite>eps</cite> factor (see <a class="
<dd class="field-odd"><ul class="simple"> <dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li> <li><p><strong>prevs</strong> array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li> <li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
<li><p><strong>eps</strong> smoothing factor. <cite>rae</cite> is not defined in cases in which the true distribution <li><p><strong>eps</strong> smoothing factor. <cite>rae</cite> is not defined in cases in which the true distribution contains zeros; <cite>eps</cite>
contains zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
sample size. If <cite>eps=None</cite>, the sample size will be taken from the environment variable will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
<cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
</ul> </ul>
</dd> </dd>
<dt class="field-even">Returns<span class="colon">:</span></dt> <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -448,12 +412,10 @@ sample size. If <cite>eps=None</cite>, the sample size will be taken from the en
<dl class="py function"> <dl class="py function">
<dt class="sig sig-object py" id="quapy.error.relative_absolute_error"> <dt class="sig sig-object py" id="quapy.error.relative_absolute_error">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">relative_absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.relative_absolute_error" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">relative_absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.relative_absolute_error" title="Permalink to this definition"></a></dt>
<dd><dl class="simple"> <dd><dl class="simple">
<dt>Computes the absolute relative error between the two prevalence vectors.</dt><dd><p>Relative absolute error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> <dt>Computes the absolute relative error between the two prevalence vectors.</dt><dd><p>Relative absolute error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
is computed as <span class="math notranslate nohighlight">\(RAE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}\frac{|\hat{p}(y)-p(y)|}{p(y)}\)</span>,
<span class="math notranslate nohighlight">\(RAE(p,\hat{p})=
\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}\frac{|\hat{p}(y)-p(y)|}{p(y)}\)</span>,
where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest. where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p> The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
</dd> </dd>
@ -463,10 +425,9 @@ The distributions are smoothed using the <cite>eps</cite> factor (see <a class="
<dd class="field-odd"><ul class="simple"> <dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li> <li><p><strong>prevs</strong> array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li> <li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
<li><p><strong>eps</strong> smoothing factor. <cite>rae</cite> is not defined in cases in which the true distribution <li><p><strong>eps</strong> smoothing factor. <cite>rae</cite> is not defined in cases in which the true distribution contains zeros; <cite>eps</cite>
contains zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
sample size. If <cite>eps=None</cite>, the sample size will be taken from the environment variable will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
<cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
</ul> </ul>
</dd> </dd>
<dt class="field-even">Returns<span class="colon">:</span></dt> <dt class="field-even">Returns<span class="colon">:</span></dt>
@ -477,11 +438,10 @@ sample size. If <cite>eps=None</cite>, the sample size will be taken from the en
<dl class="py function"> <dl class="py function">
<dt class="sig sig-object py" id="quapy.error.se"> <dt class="sig sig-object py" id="quapy.error.se">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">se</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.se" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">se</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.se" title="Permalink to this definition"></a></dt>
<dd><dl class="simple"> <dd><dl class="simple">
<dt>Computes the squared error between the two prevalence vectors.</dt><dd><p>Squared error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as <dt>Computes the squared error between the two prevalence vectors.</dt><dd><p>Squared error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
<span class="math notranslate nohighlight">\(SE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}(\hat{p}(y)-p(y))^2\)</span>, <span class="math notranslate nohighlight">\(SE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}(\hat{p}(y)-p(y))^2\)</span>, where
where
<span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.</p> <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.</p>
</dd> </dd>
</dl> </dl>
@ -502,8 +462,7 @@ where
<dt class="sig sig-object py" id="quapy.error.smooth"> <dt class="sig sig-object py" id="quapy.error.smooth">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">smooth</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.smooth" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">smooth</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.smooth" title="Permalink to this definition"></a></dt>
<dd><p>Smooths a prevalence distribution with <span class="math notranslate nohighlight">\(\epsilon\)</span> (<cite>eps</cite>) as: <dd><p>Smooths a prevalence distribution with <span class="math notranslate nohighlight">\(\epsilon\)</span> (<cite>eps</cite>) as:
<span class="math notranslate nohighlight">\(\underline{p}(y)=\frac{\epsilon+p(y)}{\epsilon|\mathcal{Y}|+ <span class="math notranslate nohighlight">\(\underline{p}(y)=\frac{\epsilon+p(y)}{\epsilon|\mathcal{Y}|+\displaystyle\sum_{y\in \mathcal{Y}}p(y)}\)</span></p>
\displaystyle\sum_{y\in \mathcal{Y}}p(y)}\)</span></p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt> <dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple"> <dd class="field-odd"><ul class="simple">
@ -642,7 +601,7 @@ convenient or not. Set to False to deactivate.</p></li>
</div> </div>
<span class="target" id="module-quapy.protocol"></span><dl class="py class"> <span class="target" id="module-quapy.protocol"></span><dl class="py class">
<dt class="sig sig-object py" id="quapy.protocol.APP"> <dt class="sig sig-object py" id="quapy.protocol.APP">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.protocol.</span></span><span class="sig-name descname"><span class="pre">APP</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_prevalences</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">21</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">smooth_limits_epsilon</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">sanity_check</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10000</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">return_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'sample_prev'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.protocol.APP" title="Permalink to this definition"></a></dt> <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.protocol.</span></span><span class="sig-name descname"><span class="pre">APP</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_prevalences</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">21</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">smooth_limits_epsilon</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">return_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'sample_prev'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.protocol.APP" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#quapy.protocol.AbstractStochasticSeededProtocol" title="quapy.protocol.AbstractStochasticSeededProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">AbstractStochasticSeededProtocol</span></code></a>, <a class="reference internal" href="#quapy.protocol.OnLabelledCollectionProtocol" title="quapy.protocol.OnLabelledCollectionProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">OnLabelledCollectionProtocol</span></code></a></p> <dd><p>Bases: <a class="reference internal" href="#quapy.protocol.AbstractStochasticSeededProtocol" title="quapy.protocol.AbstractStochasticSeededProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">AbstractStochasticSeededProtocol</span></code></a>, <a class="reference internal" href="#quapy.protocol.OnLabelledCollectionProtocol" title="quapy.protocol.OnLabelledCollectionProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">OnLabelledCollectionProtocol</span></code></a></p>
<p>Implementation of the artificial prevalence protocol (APP). <p>Implementation of the artificial prevalence protocol (APP).
The APP consists of exploring a grid of prevalence values containing <cite>n_prevalences</cite> points (e.g., The APP consists of exploring a grid of prevalence values containing <cite>n_prevalences</cite> points (e.g.,
@ -662,8 +621,6 @@ grid (default is 21)</p></li>
<li><p><strong>smooth_limits_epsilon</strong> the quantity to add and subtract to the limits 0 and 1</p></li> <li><p><strong>smooth_limits_epsilon</strong> the quantity to add and subtract to the limits 0 and 1</p></li>
<li><p><strong>random_state</strong> allows replicating samples across runs (default 0, meaning that the sequence of samples <li><p><strong>random_state</strong> allows replicating samples across runs (default 0, meaning that the sequence of samples
will be the same every time the protocol is called)</p></li> will be the same every time the protocol is called)</p></li>
<li><p><strong>sanity_check</strong> int, raises an exception warning the user that the number of examples to be generated exceed
this number; set to None for skipping this check</p></li>
<li><p><strong>return_type</strong> set to “sample_prev” (default) to get the pairs of (sample, prevalence) at each iteration, or <li><p><strong>return_type</strong> set to “sample_prev” (default) to get the pairs of (sample, prevalence) at each iteration, or
to “labelled_collection” to get instead instances of LabelledCollection</p></li> to “labelled_collection” to get instead instances of LabelledCollection</p></li>
</ul> </ul>
@ -1862,7 +1819,6 @@ this function is invoked, it loads the pickled resource. Example:</p>
</section> </section>
<section id="module-quapy"> <section id="module-quapy">
<span id="module-contents"></span><h2>Module contents<a class="headerlink" href="#module-quapy" title="Permalink to this heading"></a></h2> <span id="module-contents"></span><h2>Module contents<a class="headerlink" href="#module-quapy" title="Permalink to this heading"></a></h2>
<p>QuaPy module for quantification</p>
</section> </section>
</section> </section>

View File

@ -1064,6 +1064,11 @@ validation data, or as an integer, indicating that the misclassification rates s
<span class="sig-prename descclassname"><span class="pre">quapy.method.aggregative.</span></span><span class="sig-name descname"><span class="pre">cross_generate_predictions</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">val_split</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">probabilistic</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">fit_classifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.cross_generate_predictions" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.method.aggregative.</span></span><span class="sig-name descname"><span class="pre">cross_generate_predictions</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">val_split</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">probabilistic</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">fit_classifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.cross_generate_predictions" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.method.aggregative.cross_generate_predictions_depr">
<span class="sig-prename descclassname"><span class="pre">quapy.method.aggregative.</span></span><span class="sig-name descname"><span class="pre">cross_generate_predictions_depr</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">val_split</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">probabilistic</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">fit_classifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">method_name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.cross_generate_predictions_depr" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function"> <dl class="py function">
<dt class="sig sig-object py" id="quapy.method.aggregative.newELM"> <dt class="sig sig-object py" id="quapy.method.aggregative.newELM">
<span class="sig-prename descclassname"><span class="pre">quapy.method.aggregative.</span></span><span class="sig-name descname"><span class="pre">newELM</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">svmperf_base</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">loss</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'01'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">C</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.newELM" title="Permalink to this definition"></a></dt> <span class="sig-prename descclassname"><span class="pre">quapy.method.aggregative.</span></span><span class="sig-name descname"><span class="pre">newELM</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">svmperf_base</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">loss</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'01'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">C</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.newELM" title="Permalink to this definition"></a></dt>

File diff suppressed because one or more lines are too long

View File

@ -1,152 +0,0 @@
from copy import deepcopy
import quapy as qp
from sklearn.calibration import CalibratedClassifierCV
from sklearn.linear_model import LogisticRegression
from quapy.classification.methods import LowRankLogisticRegression
from quapy.method.meta import QuaNet
from quapy.protocol import APP
from quapy.method.aggregative import CC, ACC, PCC, PACC, MAX, MS, MS2, EMQ, HDy, newSVMAE
from quapy.method.meta import EHDy
import numpy as np
import os
import pickle
import itertools
import argparse
import torch
import shutil
N_JOBS = -1
CUDA_N_JOBS = 2
ENSEMBLE_N_JOBS = -1
qp.environ['SAMPLE_SIZE'] = 100
def newLR():
return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
def calibratedLR():
return CalibratedClassifierCV(LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1))
__C_range = np.logspace(-3, 3, 7)
lr_params = {'classifier__C': __C_range, 'classifier__class_weight': [None, 'balanced']}
svmperf_params = {'classifier__C': __C_range}
def quantification_models():
yield 'cc', CC(newLR()), lr_params
yield 'acc', ACC(newLR()), lr_params
yield 'pcc', PCC(newLR()), lr_params
yield 'pacc', PACC(newLR()), lr_params
yield 'MAX', MAX(newLR()), lr_params
yield 'MS', MS(newLR()), lr_params
yield 'MS2', MS2(newLR()), lr_params
yield 'sldc', EMQ(newLR(), recalib='platt'), lr_params
yield 'svmmae', newSVMAE(), svmperf_params
yield 'hdy', HDy(newLR()), lr_params
def quantification_cuda_models():
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Running QuaNet in {device}')
learner = LowRankLogisticRegression()
yield 'quanet', QuaNet(learner, checkpointdir=args.checkpointdir, device=device), lr_params
def evaluate_experiment(true_prevalences, estim_prevalences):
print('\nEvaluation Metrics:\n' + '=' * 22)
for eval_measure in [qp.error.mae, qp.error.mrae]:
err = eval_measure(true_prevalences, estim_prevalences)
print(f'\t{eval_measure.__name__}={err:.4f}')
print()
def result_path(path, dataset_name, model_name, run, optim_loss):
return os.path.join(path, f'{dataset_name}-{model_name}-run{run}-{optim_loss}.pkl')
def is_already_computed(dataset_name, model_name, run, optim_loss):
return os.path.exists(result_path(args.results, dataset_name, model_name, run, optim_loss))
def save_results(dataset_name, model_name, run, optim_loss, *results):
rpath = result_path(args.results, dataset_name, model_name, run, optim_loss)
qp.util.create_parent_dir(rpath)
with open(rpath, 'wb') as foo:
pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL)
def run(experiment):
optim_loss, dataset_name, (model_name, model, hyperparams) = experiment
if dataset_name in ['acute.a', 'acute.b', 'iris.1']: return
collection = qp.datasets.fetch_UCILabelledCollection(dataset_name)
for run, data in enumerate(qp.data.Dataset.kFCV(collection, nfolds=5, nrepeats=1)):
if is_already_computed(dataset_name, model_name, run=run, optim_loss=optim_loss):
print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} run={run+1}/5 already computed.')
continue
print(f'running dataset={dataset_name} model={model_name} loss={optim_loss} run={run+1}/5')
# model selection (hyperparameter optimization for a quantification-oriented loss)
train, test = data.train_test
train, val = train.split_stratified()
if hyperparams is not None:
model_selection = qp.model_selection.GridSearchQ(
deepcopy(model),
param_grid=hyperparams,
protocol=APP(val, n_prevalences=21, repeats=25),
error=optim_loss,
refit=True,
timeout=60*60,
verbose=True
)
model_selection.fit(data.training)
model = model_selection.best_model()
best_params = model_selection.best_params_
else:
model.fit(data.training)
best_params = {}
# model evaluation
true_prevalences, estim_prevalences = qp.evaluation.prediction(
model,
protocol=APP(test, n_prevalences=21, repeats=100)
)
test_true_prevalence = data.test.prevalence()
evaluate_experiment(true_prevalences, estim_prevalences)
save_results(dataset_name, model_name, run, optim_loss,
true_prevalences, estim_prevalences,
data.training.prevalence(), test_true_prevalence,
best_params)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
parser.add_argument('results', metavar='RESULT_PATH', type=str,
help='path to the directory where to store the results')
parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='../svm_perf_quantification',
help='path to the directory with svmperf')
parser.add_argument('--checkpointdir', metavar='PATH', type=str, default='./checkpoint',
help='path to the directory where to dump QuaNet checkpoints')
args = parser.parse_args()
print(f'Result folder: {args.results}')
np.random.seed(0)
qp.environ['SVMPERF_HOME'] = args.svmperfpath
optim_losses = ['mae']
datasets = qp.datasets.UCI_DATASETS
models = quantification_models()
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS)
models = quantification_cuda_models()
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=CUDA_N_JOBS)
shutil.rmtree(args.checkpointdir, ignore_errors=True)

View File

@ -1,244 +0,0 @@
from scipy.sparse import csc_matrix, csr_matrix
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_extraction.text import TfidfTransformer, TfidfVectorizer, CountVectorizer
import numpy as np
from joblib import Parallel, delayed
import sklearn
import math
from scipy.stats import t
class ContTable:
def __init__(self, tp=0, tn=0, fp=0, fn=0):
self.tp=tp
self.tn=tn
self.fp=fp
self.fn=fn
def get_d(self): return self.tp + self.tn + self.fp + self.fn
def get_c(self): return self.tp + self.fn
def get_not_c(self): return self.tn + self.fp
def get_f(self): return self.tp + self.fp
def get_not_f(self): return self.tn + self.fn
def p_c(self): return (1.0*self.get_c())/self.get_d()
def p_not_c(self): return 1.0-self.p_c()
def p_f(self): return (1.0*self.get_f())/self.get_d()
def p_not_f(self): return 1.0-self.p_f()
def p_tp(self): return (1.0*self.tp) / self.get_d()
def p_tn(self): return (1.0*self.tn) / self.get_d()
def p_fp(self): return (1.0*self.fp) / self.get_d()
def p_fn(self): return (1.0*self.fn) / self.get_d()
def tpr(self):
c = 1.0*self.get_c()
return self.tp / c if c > 0.0 else 0.0
def fpr(self):
_c = 1.0*self.get_not_c()
return self.fp / _c if _c > 0.0 else 0.0
def __ig_factor(p_tc, p_t, p_c):
den = p_t * p_c
if den != 0.0 and p_tc != 0:
return p_tc * math.log(p_tc / den, 2)
else:
return 0.0
def information_gain(cell):
return __ig_factor(cell.p_tp(), cell.p_f(), cell.p_c()) + \
__ig_factor(cell.p_fp(), cell.p_f(), cell.p_not_c()) +\
__ig_factor(cell.p_fn(), cell.p_not_f(), cell.p_c()) + \
__ig_factor(cell.p_tn(), cell.p_not_f(), cell.p_not_c())
def squared_information_gain(cell):
return information_gain(cell)**2
def posneg_information_gain(cell):
ig = information_gain(cell)
if cell.tpr() < cell.fpr():
return -ig
else:
return ig
def pos_information_gain(cell):
if cell.tpr() < cell.fpr():
return 0
else:
return information_gain(cell)
def pointwise_mutual_information(cell):
return __ig_factor(cell.p_tp(), cell.p_f(), cell.p_c())
def gss(cell):
return cell.p_tp()*cell.p_tn() - cell.p_fp()*cell.p_fn()
def chi_square(cell):
den = cell.p_f() * cell.p_not_f() * cell.p_c() * cell.p_not_c()
if den==0.0: return 0.0
num = gss(cell)**2
return num / den
def conf_interval(xt, n):
if n>30:
z2 = 3.84145882069 # norm.ppf(0.5+0.95/2.0)**2
else:
z2 = t.ppf(0.5 + 0.95 / 2.0, df=max(n-1,1)) ** 2
p = (xt + 0.5 * z2) / (n + z2)
amplitude = 0.5 * z2 * math.sqrt((p * (1.0 - p)) / (n + z2))
return p, amplitude
def strength(minPosRelFreq, minPos, maxNeg):
if minPos > maxNeg:
return math.log(2.0 * minPosRelFreq, 2.0)
else:
return 0.0
#set cancel_features=True to allow some features to be weighted as 0 (as in the original article)
#however, for some extremely imbalanced dataset caused all documents to be 0
def conf_weight(cell, cancel_features=False):
c = cell.get_c()
not_c = cell.get_not_c()
tp = cell.tp
fp = cell.fp
pos_p, pos_amp = conf_interval(tp, c)
neg_p, neg_amp = conf_interval(fp, not_c)
min_pos = pos_p-pos_amp
max_neg = neg_p+neg_amp
den = (min_pos + max_neg)
minpos_relfreq = min_pos / (den if den != 0 else 1)
str_tplus = strength(minpos_relfreq, min_pos, max_neg);
if str_tplus == 0 and not cancel_features:
return 1e-20
return str_tplus;
def get_tsr_matrix(cell_matrix, tsr_score_funtion):
nC = len(cell_matrix)
nF = len(cell_matrix[0])
tsr_matrix = [[tsr_score_funtion(cell_matrix[c,f]) for f in range(nF)] for c in range(nC)]
return np.array(tsr_matrix)
def feature_label_contingency_table(positive_document_indexes, feature_document_indexes, nD):
tp_ = len(positive_document_indexes & feature_document_indexes)
fp_ = len(feature_document_indexes - positive_document_indexes)
fn_ = len(positive_document_indexes - feature_document_indexes)
tn_ = nD - (tp_ + fp_ + fn_)
return ContTable(tp=tp_, tn=tn_, fp=fp_, fn=fn_)
def category_tables(feature_sets, category_sets, c, nD, nF):
return [feature_label_contingency_table(category_sets[c], feature_sets[f], nD) for f in range(nF)]
def get_supervised_matrix(coocurrence_matrix, label_matrix, n_jobs=-1):
"""
Computes the nC x nF supervised matrix M where Mcf is the 4-cell contingency table for feature f and class c.
Efficiency O(nF x nC x log(S)) where S is the sparse factor
"""
nD, nF = coocurrence_matrix.shape
nD2, nC = label_matrix.shape
if nD != nD2:
raise ValueError('Number of rows in coocurrence matrix shape %s and label matrix shape %s is not consistent' %
(coocurrence_matrix.shape,label_matrix.shape))
def nonzero_set(matrix, col):
return set(matrix[:, col].nonzero()[0])
if isinstance(coocurrence_matrix, csr_matrix):
coocurrence_matrix = csc_matrix(coocurrence_matrix)
feature_sets = [nonzero_set(coocurrence_matrix, f) for f in range(nF)]
category_sets = [nonzero_set(label_matrix, c) for c in range(nC)]
cell_matrix = Parallel(n_jobs=n_jobs, backend="threading")(delayed(category_tables)(feature_sets, category_sets, c, nD, nF) for c in range(nC))
return np.array(cell_matrix)
class TSRweighting(BaseEstimator,TransformerMixin):
"""
Supervised Term Weighting function based on any Term Selection Reduction (TSR) function (e.g., information gain,
chi-square, etc.) or, more generally, on any function that could be computed on the 4-cell contingency table for
each category-feature pair.
The supervised_4cell_matrix (a CxF matrix containing the 4-cell contingency tables
for each category-feature pair) can be pre-computed (e.g., during the feature selection phase) and passed as an
argument.
When C>1, i.e., in multiclass scenarios, a global_policy is used in order to determine a single feature-score which
informs about its relevance. Accepted policies include "max" (takes the max score across categories), "ave" and "wave"
(take the average, or weighted average, across all categories -- weights correspond to the class prevalence), and "sum"
(which sums all category scores).
"""
def __init__(self, tsr_function, global_policy='max', supervised_4cell_matrix=None, sublinear_tf=True, norm='l2', min_df=3, n_jobs=-1):
if global_policy not in ['max', 'ave', 'wave', 'sum']: raise ValueError('Global policy should be in {"max", "ave", "wave", "sum"}')
self.tsr_function = tsr_function
self.global_policy = global_policy
self.supervised_4cell_matrix = supervised_4cell_matrix
self.sublinear_tf=sublinear_tf
self.norm=norm
self.min_df = min_df
self.n_jobs=n_jobs
def fit(self, X, y):
self.count_vectorizer = CountVectorizer(min_df=self.min_df)
X = self.count_vectorizer.fit_transform(X)
self.tf_vectorizer = TfidfTransformer(
norm=None, use_idf=False, smooth_idf=False, sublinear_tf=self.sublinear_tf).fit(X)
if len(y.shape) == 1:
y = np.expand_dims(y, axis=1)
nD, nC = y.shape
nF = len(self.tf_vectorizer.get_feature_names_out())
if self.supervised_4cell_matrix is None:
self.supervised_4cell_matrix = get_supervised_matrix(X, y, n_jobs=self.n_jobs)
else:
if self.supervised_4cell_matrix.shape != (nC, nF): raise ValueError("Shape of supervised information matrix is inconsistent with X and y")
tsr_matrix = get_tsr_matrix(self.supervised_4cell_matrix, self.tsr_function)
if self.global_policy == 'ave':
self.global_tsr_vector = np.average(tsr_matrix, axis=0)
elif self.global_policy == 'wave':
category_prevalences = [sum(y[:,c])*1.0/nD for c in range(nC)]
self.global_tsr_vector = np.average(tsr_matrix, axis=0, weights=category_prevalences)
elif self.global_policy == 'sum':
self.global_tsr_vector = np.sum(tsr_matrix, axis=0)
elif self.global_policy == 'max':
self.global_tsr_vector = np.amax(tsr_matrix, axis=0)
return self
def fit_transform(self, X, y):
return self.fit(X,y).transform(X)
def transform(self, X):
if not hasattr(self, 'global_tsr_vector'): raise NameError('TSRweighting: transform method called before fit.')
X = self.count_vectorizer.transform(X)
tf_X = self.tf_vectorizer.transform(X).toarray()
weighted_X = np.multiply(tf_X, self.global_tsr_vector)
if self.norm is not None and self.norm!='none':
weighted_X = sklearn.preprocessing.normalize(weighted_X, norm=self.norm, axis=1, copy=False)
return csr_matrix(weighted_X)

View File

View File

@ -1,148 +0,0 @@
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.linear_model import LogisticRegression
import quapy as qp
from data import LabelledCollection
import numpy as np
from laboratory.custom_vectorizers import *
from protocol import APP
from quapy.method.aggregative import _get_divergence, HDy, DistributionMatching
from quapy.method.base import BaseQuantifier
from scipy import optimize
import pandas as pd
# TODO: explore the bernoulli (term presence/absence) variant
# TODO: explore the multinomial (term frequency) variant
# TODO: explore the multinomial + length normalization variant
# TODO: consolidate the TSR-variant (e.g., using information gain) variant;
# - works better with the idf?
# - works better with length normalization?
# - etc
class DxS(BaseQuantifier):
def __init__(self, vectorizer=None, divergence='topsoe'):
self.vectorizer = vectorizer
self.divergence = divergence
# def __as_distribution(self, instances):
# return np.asarray(instances.sum(axis=0) / instances.sum()).flatten()
def __as_distribution(self, instances):
dist = instances.sum(axis=0) / instances.sum()
return np.asarray(dist).flatten()
def fit(self, data: LabelledCollection):
text_instances, labels = data.Xy
if self.vectorizer is not None:
text_instances = self.vectorizer.fit_transform(text_instances, y=labels)
distributions = []
for class_i in data.classes_:
distributions.append(self.__as_distribution(text_instances[labels == class_i]))
self.validation_distribution = np.asarray(distributions)
return self
def quantify(self, text_instances):
if self.vectorizer is not None:
text_instances = self.vectorizer.transform(text_instances)
test_distribution = self.__as_distribution(text_instances)
divergence = _get_divergence(self.divergence)
n_classes, n_feats = self.validation_distribution.shape
def match(prev):
prev = np.expand_dims(prev, axis=0)
mixture_distribution = (prev @ self.validation_distribution).flatten()
return divergence(test_distribution, mixture_distribution)
# the initial point is set as the uniform distribution
uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
# solutions are bounded to those contained in the unit-simplex
bounds = tuple((0, 1) for x in range(n_classes)) # values in [0,1]
constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # values summing up to 1
r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
return r.x
if __name__ == '__main__':
qp.environ['SAMPLE_SIZE'] = 250
qp.environ['N_JOBS'] = -1
min_df = 10
# dataset = 'imdb'
repeats = 10
error = 'mae'
div = 'HD'
# generates tuples (dataset, method, method_name)
# (the dataset is needed for methods that process the dataset differently)
def gen_methods():
for dataset in qp.datasets.REVIEWS_SENTIMENT_DATASETS:
data = qp.datasets.fetch_reviews(dataset, tfidf=False)
bernoulli_vectorizer = CountVectorizer(min_df=min_df, binary=True)
dxs = DxS(divergence=div, vectorizer=bernoulli_vectorizer)
yield data, dxs, 'DxS-Bernoulli'
multinomial_vectorizer = CountVectorizer(min_df=min_df, binary=False)
dxs = DxS(divergence=div, vectorizer=multinomial_vectorizer)
yield data, dxs, 'DxS-multinomial'
tf_vectorizer = TfidfVectorizer(sublinear_tf=False, use_idf=False, min_df=min_df, norm=None)
dxs = DxS(divergence=div, vectorizer=tf_vectorizer)
yield data, dxs, 'DxS-TF'
logtf_vectorizer = TfidfVectorizer(sublinear_tf=True, use_idf=False, min_df=min_df, norm=None)
dxs = DxS(divergence=div, vectorizer=logtf_vectorizer)
yield data, dxs, 'DxS-logTF'
tfidf_vectorizer = TfidfVectorizer(use_idf=True, min_df=min_df, norm=None)
dxs = DxS(divergence=div, vectorizer=tfidf_vectorizer)
yield data, dxs, 'DxS-TFIDF'
tfidf_vectorizer = TfidfVectorizer(use_idf=True, min_df=min_df, norm='l2')
dxs = DxS(divergence=div, vectorizer=tfidf_vectorizer)
yield data, dxs, 'DxS-TFIDF-l2'
tsr_vectorizer = TSRweighting(tsr_function=information_gain, min_df=min_df, norm='l2')
dxs = DxS(divergence=div, vectorizer=tsr_vectorizer)
yield data, dxs, 'DxS-TFTSR-l2'
data = qp.datasets.fetch_reviews(dataset, tfidf=True, min_df=min_df)
hdy = HDy(LogisticRegression())
yield data, hdy, 'HDy'
dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=5)
yield data, dm, 'DM-5b'
dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=10)
yield data, dm, 'DM-10b'
result_path = 'results.csv'
with open(result_path, 'wt') as csv:
csv.write(f'Method\tDataset\tMAE\tMRAE\n')
for data, quantifier, quant_name in gen_methods():
quantifier.fit(data.training)
report = qp.evaluation.evaluation_report(quantifier, APP(data.test, repeats=repeats), error_metrics=['mae','mrae'], verbose=True)
means = report.mean()
csv.write(f'{quant_name}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
df = pd.read_csv(result_path, sep='\t')
# print(df)
pv = df.pivot_table(index='Method', columns="Dataset", values=["MAE", "MRAE"])
print(pv)

View File

@ -1,168 +0,0 @@
from typing import Union, Callable
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.linear_model import LogisticRegression
import pandas as pd
from sklearn.neighbors import KernelDensity
import quapy as qp
from data import LabelledCollection
from protocol import APP, UPP
from quapy.method.aggregative import AggregativeProbabilisticQuantifier, _training_helper, cross_generate_predictions, \
DistributionMatching, _get_divergence
import scipy
from scipy import optimize
class KDEy(AggregativeProbabilisticQuantifier):
BANDWIDTH_METHOD = ['auto', 'scott', 'silverman']
ENGINE = ['scipy', 'sklearn']
def __init__(self, classifier: BaseEstimator, val_split=0.4, divergence: Union[str, Callable]='HD',
bandwidth_method='scott', engine='sklearn', n_jobs=None):
self.classifier = classifier
self.val_split = val_split
self.divergence = divergence
self.bandwidth_method = bandwidth_method
self.engine = engine
self.n_jobs = n_jobs
assert bandwidth_method in KDEy.BANDWIDTH_METHOD, f'unknown bandwidth_method, valid ones are {KDEy.BANDWIDTH_METHOD}'
assert engine in KDEy.ENGINE, f'unknown engine, valid ones are {KDEy.ENGINE}'
def get_kde(self, posteriors):
if self.engine == 'scipy':
# scipy treats columns as datapoints, and need the datapoints not to lie in a lower-dimensional subspace, which
# requires removing the last dimension which is constrained
posteriors = posteriors[:,:-1].T
kde = scipy.stats.gaussian_kde(posteriors)
kde.set_bandwidth(self.bandwidth_method)
elif self.engine == 'sklearn':
kde = KernelDensity(bandwidth=self.bandwidth_method).fit(posteriors)
return kde
def pdf(self, kde, posteriors):
if self.engine == 'scipy':
return kde(posteriors[:,:-1].T)
elif self.engine == 'sklearn':
return np.exp(kde.score_samples(posteriors))
def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
"""
Trains the classifier (if requested) and generates the validation distributions out of the training data.
The validation distributions have shape `(n, ch, nbins)`, with `n` the number of classes, `ch` the number of
channels (a channel is a description, in form of a histogram, of a specific class -- there are as many channels
as classes, although in the binary case one can use only one channel, since the other one is constrained),
and `nbins` the number of bins. In particular, let `V` be the validation distributions; `di=V[i]`
are the distributions obtained from training data labelled with class `i`; `dij = di[j]` is the discrete
distribution of posterior probabilities `P(Y=j|X=x)` for training data labelled with class `i`, and `dij[k]`
is the fraction of instances with a value in the `k`-th bin.
:param data: the training set
:param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)
:param val_split: either a float in (0,1) indicating the proportion of training instances to use for
validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection
indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV
to estimate the parameters
"""
if val_split is None:
val_split = self.val_split
self.classifier, y, posteriors, classes, class_count = cross_generate_predictions(
data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
)
self.val_densities = [self.get_kde(posteriors[y == cat]) for cat in range(data.n_classes)]
self.val_posteriors = posteriors
return self
def val_pdf(self, prev):
"""
Returns a function that computes the mixture model with the given prev as mixture factor
:param prev: a prevalence vector, ndarray
:return: a function implementing the validation distribution with fixed mixture factor
"""
return lambda posteriors: sum(prev_i * self.pdf(kde_i, posteriors) for kde_i, prev_i in zip(self.val_densities, prev))
def aggregate(self, posteriors: np.ndarray):
"""
Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution
(the mixture) that best matches the test distribution, in terms of the divergence measure of choice.
In the multiclass case, with `n` the number of classes, the test and mixture distributions contain
`n` channels (proper distributions of binned posterior probabilities), on which the divergence is computed
independently. The matching is computed as an average of the divergence across all channels.
:param instances: instances in the sample
:return: a vector of class prevalence estimates
"""
test_density = self.get_kde(posteriors)
# val_test_posteriors = np.concatenate([self.val_posteriors, posteriors])
test_likelihood = self.pdf(test_density, posteriors)
divergence = _get_divergence(self.divergence)
n_classes = len(self.val_densities)
def match(prev):
val_pdf = self.val_pdf(prev)
val_likelihood = val_pdf(posteriors)
return divergence(val_likelihood, test_likelihood)
# the initial point is set as the uniform distribution
uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
# solutions are bounded to those contained in the unit-simplex
bounds = tuple((0, 1) for _ in range(n_classes)) # values in [0,1]
constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # values summing up to 1
r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
return r.x
if __name__ == '__main__':
qp.environ['SAMPLE_SIZE'] = 100
qp.environ['N_JOBS'] = -1
div = 'HD'
# generates tuples (dataset, method, method_name)
# (the dataset is needed for methods that process the dataset differently)
def gen_methods():
for dataset in qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST:
data = qp.datasets.fetch_twitter(dataset, min_df=3, pickle=True)
# kdey = KDEy(LogisticRegression(), divergence=div, bandwidth_method='scott')
# yield data, kdey, f'KDEy-{div}-scott'
kdey = KDEy(LogisticRegression(), divergence=div, bandwidth_method='silverman', engine='sklearn')
yield data, kdey, f'KDEy-{div}-silverman'
dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=5)
yield data, dm, f'DM-5b-{div}'
# dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=10)
# yield data, dm, f'DM-10b-{div}'
result_path = 'results_kdey.csv'
with open(result_path, 'wt') as csv:
csv.write(f'Method\tDataset\tMAE\tMRAE\n')
for data, quantifier, quant_name in gen_methods():
quantifier.fit(data.training)
protocol = UPP(data.test, repeats=100)
report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae','mrae'], verbose=True)
means = report.mean()
csv.write(f'{quant_name}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
csv.flush()
df = pd.read_csv(result_path, sep='\t')
# print(df)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE", "MRAE"])
print(pv)

View File

@ -1,7 +1,6 @@
"""QuaPy module for quantification"""
from quapy.data import datasets
from . import error from . import error
from . import data from . import data
from quapy.data import datasets
from . import functional from . import functional
# from . import method # from . import method
from . import evaluation from . import evaluation
@ -26,8 +25,7 @@ environ = {
def _get_njobs(n_jobs): def _get_njobs(n_jobs):
""" """
If `n_jobs` is None, then it returns `environ['N_JOBS']`; If `n_jobs` is None, then it returns `environ['N_JOBS']`; if otherwise, returns `n_jobs`.
if otherwise, returns `n_jobs`.
:param n_jobs: the number of `n_jobs` or None if not specified :param n_jobs: the number of `n_jobs` or None if not specified
:return: int :return: int
@ -37,8 +35,7 @@ def _get_njobs(n_jobs):
def _get_sample_size(sample_size): def _get_sample_size(sample_size):
""" """
If `sample_size` is None, then it returns `environ['SAMPLE_SIZE']`; If `sample_size` is None, then it returns `environ['SAMPLE_SIZE']`; if otherwise, returns `sample_size`.
if otherwise, returns `sample_size`.
If none of these are set, then a ValueError exception is raised. If none of these are set, then a ValueError exception is raised.
:param sample_size: integer or None :param sample_size: integer or None
@ -48,3 +45,6 @@ def _get_sample_size(sample_size):
if sample_size is None: if sample_size is None:
raise ValueError('neither sample_size nor qp.environ["SAMPLE_SIZE"] have been specified') raise ValueError('neither sample_size nor qp.environ["SAMPLE_SIZE"] have been specified')
return sample_size return sample_size

View File

@ -19,7 +19,7 @@ class LowRankLogisticRegression(BaseEstimator):
def __init__(self, n_components=100, **kwargs): def __init__(self, n_components=100, **kwargs):
self.n_components = n_components self.n_components = n_components
self.classifier = LogisticRegression(**kwargs) self.learner = LogisticRegression(**kwargs)
def get_params(self): def get_params(self):
""" """
@ -28,7 +28,7 @@ class LowRankLogisticRegression(BaseEstimator):
:return: a dictionary with parameter names mapped to their values :return: a dictionary with parameter names mapped to their values
""" """
params = {'n_components': self.n_components} params = {'n_components': self.n_components}
params.update(self.classifier.get_params()) params.update(self.learner.get_params())
return params return params
def set_params(self, **params): def set_params(self, **params):
@ -43,7 +43,7 @@ class LowRankLogisticRegression(BaseEstimator):
if 'n_components' in params_: if 'n_components' in params_:
self.n_components = params_['n_components'] self.n_components = params_['n_components']
del params_['n_components'] del params_['n_components']
self.classifier.set_params(**params_) self.learner.set_params(**params_)
def fit(self, X, y): def fit(self, X, y):
""" """
@ -59,8 +59,8 @@ class LowRankLogisticRegression(BaseEstimator):
if nF > self.n_components: if nF > self.n_components:
self.pca = TruncatedSVD(self.n_components).fit(X) self.pca = TruncatedSVD(self.n_components).fit(X)
X = self.transform(X) X = self.transform(X)
self.classifier.fit(X, y) self.learner.fit(X, y)
self.classes_ = self.classifier.classes_ self.classes_ = self.learner.classes_
return self return self
def predict(self, X): def predict(self, X):
@ -72,7 +72,7 @@ class LowRankLogisticRegression(BaseEstimator):
instances in `X` instances in `X`
""" """
X = self.transform(X) X = self.transform(X)
return self.classifier.predict(X) return self.learner.predict(X)
def predict_proba(self, X): def predict_proba(self, X):
""" """
@ -82,7 +82,7 @@ class LowRankLogisticRegression(BaseEstimator):
:return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities
""" """
X = self.transform(X) X = self.transform(X)
return self.classifier.predict_proba(X) return self.learner.predict_proba(X)
def transform(self, X): def transform(self, X):
""" """

View File

@ -207,7 +207,7 @@ def fetch_UCIDataset(dataset_name, data_home=None, test_split=0.3, verbose=False
return Dataset(*data.split_stratified(1 - test_split, random_state=0)) return Dataset(*data.split_stratified(1 - test_split, random_state=0))
def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) -> LabelledCollection: def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) -> Dataset:
""" """
Loads a UCI collection as an instance of :class:`quapy.data.base.LabelledCollection`, as used in Loads a UCI collection as an instance of :class:`quapy.data.base.LabelledCollection`, as used in
`Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017). `Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017).
@ -223,7 +223,7 @@ def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) ->
>>> import quapy as qp >>> import quapy as qp
>>> collection = qp.datasets.fetch_UCILabelledCollection("yeast") >>> collection = qp.datasets.fetch_UCILabelledCollection("yeast")
>>> for data in qp.domains.Dataset.kFCV(collection, nfolds=5, nrepeats=2): >>> for data in qp.data.Dataset.kFCV(collection, nfolds=5, nrepeats=2):
>>> ... >>> ...
The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS` The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS`
@ -233,7 +233,7 @@ def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) ->
~/quay_data/ directory) ~/quay_data/ directory)
:param test_split: proportion of documents to be included in the test set. The rest conforms the training set :param test_split: proportion of documents to be included in the test set. The rest conforms the training set
:param verbose: set to True (default is False) to get information (from the UCI ML repository) about the datasets :param verbose: set to True (default is False) to get information (from the UCI ML repository) about the datasets
:return: a :class:`quapy.data.base.LabelledCollection` instance :return: a :class:`quapy.data.base.Dataset` instance
""" """
assert dataset_name in UCI_DATASETS, \ assert dataset_name in UCI_DATASETS, \

View File

@ -1,13 +1,10 @@
"""Implementation of error measures used for quantification""" import quapy as qp
import numpy as np import numpy as np
from sklearn.metrics import f1_score from sklearn.metrics import f1_score
import quapy as qp
def from_name(err_name): def from_name(err_name):
"""Gets an error function from its name. E.g., `from_name("mae")` """Gets an error function from its name. E.g., `from_name("mae")` will return function :meth:`quapy.error.mae`
will return function :meth:`quapy.error.mae`
:param err_name: string, the error name :param err_name: string, the error name
:return: a callable implementing the requested error :return: a callable implementing the requested error
@ -18,12 +15,10 @@ def from_name(err_name):
def f1e(y_true, y_pred): def f1e(y_true, y_pred):
"""F1 error: simply computes the error in terms of macro :math:`F_1`, i.e., """F1 error: simply computes the error in terms of macro :math:`F_1`, i.e., :math:`1-F_1^M`,
:math:`1-F_1^M`, where :math:`F_1` is the harmonic mean of precision and recall, where :math:`F_1` is the harmonic mean of precision and recall, defined as :math:`\\frac{2tp}{2tp+fp+fn}`,
defined as :math:`\\frac{2tp}{2tp+fp+fn}`, with `tp`, `fp`, and `fn` standing with `tp`, `fp`, and `fn` standing for true positives, false positives, and false negatives, respectively.
for true positives, false positives, and false negatives, respectively. `Macro` averaging means the :math:`F_1` is computed for each category independently, and then averaged.
`Macro` averaging means the :math:`F_1` is computed for each category independently,
and then averaged.
:param y_true: array-like of true labels :param y_true: array-like of true labels
:param y_pred: array-like of predicted labels :param y_pred: array-like of predicted labels
@ -33,9 +28,8 @@ def f1e(y_true, y_pred):
def acce(y_true, y_pred): def acce(y_true, y_pred):
"""Computes the error in terms of 1-accuracy. The accuracy is computed as """Computes the error in terms of 1-accuracy. The accuracy is computed as :math:`\\frac{tp+tn}{tp+fp+fn+tn}`, with
:math:`\\frac{tp+tn}{tp+fp+fn+tn}`, with `tp`, `fp`, `fn`, and `tn` standing `tp`, `fp`, `fn`, and `tn` standing for true positives, false positives, false negatives, and true negatives,
for true positives, false positives, false negatives, and true negatives,
respectively respectively
:param y_true: array-like of true labels :param y_true: array-like of true labels
@ -49,8 +43,7 @@ def mae(prevs, prevs_hat):
"""Computes the mean absolute error (see :meth:`quapy.error.ae`) across the sample pairs. """Computes the mean absolute error (see :meth:`quapy.error.ae`) across the sample pairs.
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
prevalence values
:return: mean absolute error :return: mean absolute error
""" """
return ae(prevs, prevs_hat).mean() return ae(prevs, prevs_hat).mean()
@ -59,7 +52,7 @@ def mae(prevs, prevs_hat):
def ae(prevs, prevs_hat): def ae(prevs, prevs_hat):
"""Computes the absolute error between the two prevalence vectors. """Computes the absolute error between the two prevalence vectors.
Absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as Absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as
:math:`AE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}|\\hat{p}(y)-p(y)|`, :math:`AE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\in \mathcal{Y}}|\\hat{p}(y)-p(y)|`,
where :math:`\\mathcal{Y}` are the classes of interest. where :math:`\\mathcal{Y}` are the classes of interest.
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values :param prevs: array-like of shape `(n_classes,)` with the true prevalence values
@ -73,153 +66,129 @@ def ae(prevs, prevs_hat):
def mse(prevs, prevs_hat): def mse(prevs, prevs_hat):
"""Computes the mean squared error (see :meth:`quapy.error.se`) across the sample pairs. """Computes the mean squared error (see :meth:`quapy.error.se`) across the sample pairs.
:param prevs: array-like of shape `(n_samples, n_classes,)` with the :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
true prevalence values :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the
predicted prevalence values
:return: mean squared error :return: mean squared error
""" """
return se(prevs, prevs_hat).mean() return se(prevs, prevs_hat).mean()
def se(prevs, prevs_hat): def se(p, p_hat):
"""Computes the squared error between the two prevalence vectors. """Computes the squared error between the two prevalence vectors.
Squared error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as Squared error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as
:math:`SE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}(\\hat{p}(y)-p(y))^2`, :math:`SE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\in \mathcal{Y}}(\\hat{p}(y)-p(y))^2`, where
where
:math:`\\mathcal{Y}` are the classes of interest. :math:`\\mathcal{Y}` are the classes of interest.
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values :param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:return: absolute error :return: absolute error
""" """
return ((prevs_hat - prevs) ** 2).mean(axis=-1) return ((p_hat-p)**2).mean(axis=-1)
def mkld(prevs, prevs_hat, eps=None): def mkld(prevs, prevs_hat, eps=None):
"""Computes the mean Kullback-Leibler divergence (see :meth:`quapy.error.kld`) across the """Computes the mean Kullback-Leibler divergence (see :meth:`quapy.error.kld`) across the sample pairs.
sample pairs. The distributions are smoothed using the `eps` factor The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
(see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
prevalence values :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted :param eps: smoothing factor. KLD is not defined in cases in which the distributions contain zeros; `eps`
prevalence values is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
:param eps: smoothing factor. KLD is not defined in cases in which the distributions contain will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.
If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`
(which has thus to be set beforehand).
:return: mean Kullback-Leibler distribution :return: mean Kullback-Leibler distribution
""" """
return kld(prevs, prevs_hat, eps).mean() return kld(prevs, prevs_hat, eps).mean()
def kld(prevs, prevs_hat, eps=None): def kld(p, p_hat, eps=None):
"""Computes the Kullback-Leibler divergence between the two prevalence distributions. """Computes the Kullback-Leibler divergence between the two prevalence distributions.
Kullback-Leibler divergence between two prevalence distributions :math:`p` and :math:`\\hat{p}` Kullback-Leibler divergence between two prevalence distributions :math:`p` and :math:`\\hat{p}` is computed as
is computed as :math:`KLD(p,\\hat{p})=D_{KL}(p||\\hat{p})=\\sum_{y\\in \\mathcal{Y}} p(y)\\log\\frac{p(y)}{\\hat{p}(y)}`, where
:math:`KLD(p,\\hat{p})=D_{KL}(p||\\hat{p})=
\\sum_{y\\in \\mathcal{Y}} p(y)\\log\\frac{p(y)}{\\hat{p}(y)}`,
where :math:`\\mathcal{Y}` are the classes of interest.
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:param eps: smoothing factor. KLD is not defined in cases in which the distributions contain
zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.
If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`
(which has thus to be set beforehand).
:return: Kullback-Leibler divergence between the two distributions
"""
eps = __check_eps(eps)
smooth_prevs = prevs + eps
smooth_prevs_hat = prevs_hat + eps
return (smooth_prevs*np.log(smooth_prevs/smooth_prevs_hat)).sum(axis=-1)
def mnkld(prevs, prevs_hat, eps=None):
"""Computes the mean Normalized Kullback-Leibler divergence (see :meth:`quapy.error.nkld`)
across the sample pairs. The distributions are smoothed using the `eps` factor
(see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
prevalence values
:param eps: smoothing factor. NKLD is not defined in cases in which the distributions contain
zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.
If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`
(which has thus to be set beforehand).
:return: mean Normalized Kullback-Leibler distribution
"""
return nkld(prevs, prevs_hat, eps).mean()
def nkld(prevs, prevs_hat, eps=None):
"""Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.
Normalized Kullback-Leibler divergence between two prevalence distributions :math:`p` and
:math:`\\hat{p}` is computed as
math:`NKLD(p,\\hat{p}) = 2\\frac{e^{KLD(p,\\hat{p})}}{e^{KLD(p,\\hat{p})}+1}-1`,
where
:math:`\\mathcal{Y}` are the classes of interest. :math:`\\mathcal{Y}` are the classes of interest.
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`). The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values :param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:param eps: smoothing factor. NKLD is not defined in cases in which the distributions :param eps: smoothing factor. KLD is not defined in cases in which the distributions contain zeros; `eps`
contain zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
size. If `eps=None`, the sample size will be taken from the environment variable will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
`SAMPLE_SIZE` (which has thus to be set beforehand). :return: Kullback-Leibler divergence between the two distributions
"""
eps = __check_eps(eps)
sp = p+eps
sp_hat = p_hat + eps
return (sp*np.log(sp/sp_hat)).sum(axis=-1)
def mnkld(prevs, prevs_hat, eps=None):
"""Computes the mean Normalized Kullback-Leibler divergence (see :meth:`quapy.error.nkld`) across the sample pairs.
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
:param eps: smoothing factor. NKLD is not defined in cases in which the distributions contain zeros; `eps`
is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
:return: mean Normalized Kullback-Leibler distribution
"""
return nkld(prevs, prevs_hat, eps).mean()
def nkld(p, p_hat, eps=None):
"""Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.
Normalized Kullback-Leibler divergence between two prevalence distributions :math:`p` and :math:`\\hat{p}`
is computed as :math:`NKLD(p,\\hat{p}) = 2\\frac{e^{KLD(p,\\hat{p})}}{e^{KLD(p,\\hat{p})}+1}-1`, where
:math:`\\mathcal{Y}` are the classes of interest.
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:param eps: smoothing factor. NKLD is not defined in cases in which the distributions contain zeros; `eps`
is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
:return: Normalized Kullback-Leibler divergence between the two distributions :return: Normalized Kullback-Leibler divergence between the two distributions
""" """
ekld = np.exp(kld(prevs, prevs_hat, eps)) ekld = np.exp(kld(p, p_hat, eps))
return 2. * ekld / (1 + ekld) - 1. return 2. * ekld / (1 + ekld) - 1.
def mrae(prevs, prevs_hat, eps=None): def mrae(p, p_hat, eps=None):
"""Computes the mean relative absolute error (see :meth:`quapy.error.rae`) across """Computes the mean relative absolute error (see :meth:`quapy.error.rae`) across the sample pairs.
the sample pairs. The distributions are smoothed using the `eps` factor (see The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
prevalence values :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted :param eps: smoothing factor. `mrae` is not defined in cases in which the true distribution contains zeros; `eps`
prevalence values is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
:param eps: smoothing factor. `mrae` is not defined in cases in which the true will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
distribution contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`,
with :math:`T` the sample size. If `eps=None`, the sample size will be taken from
the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
:return: mean relative absolute error :return: mean relative absolute error
""" """
return rae(prevs, prevs_hat, eps).mean() return rae(p, p_hat, eps).mean()
def rae(prevs, prevs_hat, eps=None): def rae(p, p_hat, eps=None):
"""Computes the absolute relative error between the two prevalence vectors. """Computes the absolute relative error between the two prevalence vectors.
Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as
is computed as :math:`RAE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\in \mathcal{Y}}\\frac{|\\hat{p}(y)-p(y)|}{p(y)}`,
:math:`RAE(p,\\hat{p})=
\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}\\frac{|\\hat{p}(y)-p(y)|}{p(y)}`,
where :math:`\\mathcal{Y}` are the classes of interest. where :math:`\\mathcal{Y}` are the classes of interest.
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`). The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values :param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:param eps: smoothing factor. `rae` is not defined in cases in which the true distribution :param eps: smoothing factor. `rae` is not defined in cases in which the true distribution contains zeros; `eps`
contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
sample size. If `eps=None`, the sample size will be taken from the environment variable will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
`SAMPLE_SIZE` (which has thus to be set beforehand).
:return: relative absolute error :return: relative absolute error
""" """
eps = __check_eps(eps) eps = __check_eps(eps)
prevs = smooth(prevs, eps) p = smooth(p, eps)
prevs_hat = smooth(prevs_hat, eps) p_hat = smooth(p_hat, eps)
return (abs(prevs - prevs_hat) / prevs).mean(axis=-1) return (abs(p-p_hat)/p).mean(axis=-1)
def smooth(prevs, eps): def smooth(prevs, eps):
""" Smooths a prevalence distribution with :math:`\\epsilon` (`eps`) as: """ Smooths a prevalence distribution with :math:`\epsilon` (`eps`) as:
:math:`\\underline{p}(y)=\\frac{\\epsilon+p(y)}{\\epsilon|\\mathcal{Y}|+ :math:`\\underline{p}(y)=\\frac{\\epsilon+p(y)}{\\epsilon|\\mathcal{Y}|+\\displaystyle\\sum_{y\\in \\mathcal{Y}}p(y)}`
\\displaystyle\\sum_{y\\in \\mathcal{Y}}p(y)}`
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values :param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param eps: smoothing factor :param eps: smoothing factor
@ -231,10 +200,12 @@ def smooth(prevs, eps):
def __check_eps(eps=None): def __check_eps(eps=None):
if eps is None: if eps is None:
import quapy as qp
sample_size = qp.environ['SAMPLE_SIZE'] sample_size = qp.environ['SAMPLE_SIZE']
if sample_size is None: if sample_size is None:
raise ValueError('eps was not defined, and qp.environ["SAMPLE_SIZE"] was not set') raise ValueError('eps was not defined, and qp.environ["SAMPLE_SIZE"] was not set')
eps = 1. / (2. * sample_size) else:
eps = 1. / (2. * sample_size)
return eps return eps
@ -246,8 +217,7 @@ CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR} QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
QUANTIFICATION_ERROR_SINGLE_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR_SINGLE} QUANTIFICATION_ERROR_SINGLE_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR_SINGLE}
QUANTIFICATION_ERROR_SMOOTH_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR_SMOOTH} QUANTIFICATION_ERROR_SMOOTH_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR_SMOOTH}
ERROR_NAMES = \ ERROR_NAMES = CLASSIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_SINGLE_NAMES
CLASSIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_SINGLE_NAMES
f1_error = f1e f1_error = f1e
acc_error = acce acc_error = acce
@ -255,3 +225,4 @@ mean_absolute_error = mae
absolute_error = ae absolute_error = ae
mean_relative_absolute_error = mrae mean_relative_absolute_error = mrae
relative_absolute_error = rae relative_absolute_error = rae

View File

@ -444,28 +444,24 @@ class EMQ(AggregativeProbabilisticQuantifier):
def __init__(self, classifier: BaseEstimator, exact_train_prev=True, recalib=None): def __init__(self, classifier: BaseEstimator, exact_train_prev=True, recalib=None):
self.classifier = classifier self.classifier = classifier
self.non_calibrated = classifier
self.exact_train_prev = exact_train_prev self.exact_train_prev = exact_train_prev
self.recalib = recalib self.recalib = recalib
def fit(self, data: LabelledCollection, fit_classifier=True): def fit(self, data: LabelledCollection, fit_classifier=True):
if self.recalib is not None: if self.recalib is not None:
if self.recalib == 'nbvs': if self.recalib == 'nbvs':
self.classifier = NBVSCalibration(self.non_calibrated) self.classifier = NBVSCalibration(self.classifier)
elif self.recalib == 'bcts': elif self.recalib == 'bcts':
self.classifier = BCTSCalibration(self.non_calibrated) self.classifier = BCTSCalibration(self.classifier)
elif self.recalib == 'ts': elif self.recalib == 'ts':
self.classifier = TSCalibration(self.non_calibrated) self.classifier = TSCalibration(self.classifier)
elif self.recalib == 'vs': elif self.recalib == 'vs':
self.classifier = VSCalibration(self.non_calibrated) self.classifier = VSCalibration(self.classifier)
elif self.recalib == 'platt': elif self.recalib == 'platt':
self.classifier = CalibratedClassifierCV(self.classifier, ensemble=False) self.classifier = CalibratedClassifierCV(self.classifier, ensemble=False)
else: else:
raise ValueError('invalid param argument for recalibration method; available ones are ' raise ValueError('invalid param argument for recalibration method; available ones are '
'"nbvs", "bcts", "ts", and "vs".') '"nbvs", "bcts", "ts", and "vs".')
self.recalib = None
else:
self.classifier = self.non_calibrated
self.classifier, _ = _training_helper(self.classifier, data, fit_classifier, ensure_probabilistic=True) self.classifier, _ = _training_helper(self.classifier, data, fit_classifier, ensure_probabilistic=True)
if self.exact_train_prev: if self.exact_train_prev:
self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_) self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_)
@ -770,9 +766,7 @@ class DistributionMatching(AggregativeProbabilisticQuantifier):
""" """
Trains the classifier (if requested) and generates the validation distributions out of the training data. Trains the classifier (if requested) and generates the validation distributions out of the training data.
The validation distributions have shape `(n, ch, nbins)`, with `n` the number of classes, `ch` the number of The validation distributions have shape `(n, ch, nbins)`, with `n` the number of classes, `ch` the number of
channels (a channel is a description, in form of a histogram, of a specific class -- there are as many channels channels, and `nbins` the number of bins. In particular, let `V` be the validation distributions; `di=V[i]`
as classes, although in the binary case one can use only one channel, since the other one is constrained),
and `nbins` the number of bins. In particular, let `V` be the validation distributions; `di=V[i]`
are the distributions obtained from training data labelled with class `i`; `dij = di[j]` is the discrete are the distributions obtained from training data labelled with class `i`; `dij = di[j]` is the discrete
distribution of posterior probabilities `P(Y=j|X=x)` for training data labelled with class `i`, and `dij[k]` distribution of posterior probabilities `P(Y=j|X=x)` for training data labelled with class `i`, and `dij[k]`
is the fraction of instances with a value in the `k`-th bin. is the fraction of instances with a value in the `k`-th bin.
@ -821,7 +815,7 @@ class DistributionMatching(AggregativeProbabilisticQuantifier):
uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,)) uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
# solutions are bounded to those contained in the unit-simplex # solutions are bounded to those contained in the unit-simplex
bounds = tuple((0, 1) for _ in range(n_classes)) # values in [0,1] bounds = tuple((0, 1) for x in range(n_classes)) # values in [0,1]
constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # values summing up to 1 constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # values summing up to 1
r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints) r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
return r.x return r.x

View File

@ -9,7 +9,6 @@ from torch.nn.functional import relu
from quapy.protocol import UPP from quapy.protocol import UPP
from quapy.method.aggregative import * from quapy.method.aggregative import *
from quapy.util import EarlyStop from quapy.util import EarlyStop
from tqdm import tqdm
class QuaNetTrainer(BaseQuantifier): class QuaNetTrainer(BaseQuantifier):
@ -29,7 +28,7 @@ class QuaNetTrainer(BaseQuantifier):
>>> >>>
>>> # load the kindle dataset as text, and convert words to numerical indexes >>> # load the kindle dataset as text, and convert words to numerical indexes
>>> dataset = qp.datasets.fetch_reviews('kindle', pickle=True) >>> dataset = qp.datasets.fetch_reviews('kindle', pickle=True)
>>> qp.domains.preprocessing.index(dataset, min_df=5, inplace=True) >>> qp.data.preprocessing.index(dataset, min_df=5, inplace=True)
>>> >>>
>>> # the text classifier is a CNN trained by NeuralClassifierTrainer >>> # the text classifier is a CNN trained by NeuralClassifierTrainer
>>> cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes) >>> cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes)
@ -264,19 +263,15 @@ class QuaNetTrainer(BaseQuantifier):
f'patience={early_stop.patience}/{early_stop.PATIENCE_LIMIT}') f'patience={early_stop.patience}/{early_stop.PATIENCE_LIMIT}')
def get_params(self, deep=True): def get_params(self, deep=True):
classifier_params = self.classifier.get_params() return {**self.classifier.get_params(), **self.quanet_params}
classifier_params = {'classifier__'+k:v for k,v in classifier_params.items()}
return {**classifier_params, **self.quanet_params}
def set_params(self, **parameters): def set_params(self, **parameters):
learner_params = {} learner_params = {}
for key, val in parameters.items(): for key, val in parameters.items():
if key in self.quanet_params: if key in self.quanet_params:
self.quanet_params[key] = val self.quanet_params[key] = val
elif key.startswith('classifier__'):
learner_params[key.replace('classifier__', '')] = val
else: else:
raise ValueError('unknown parameter ', key) learner_params[key] = val
self.classifier.set_params(**learner_params) self.classifier.set_params(**learner_params)
def __check_params_colision(self, quanet_params, learner_params): def __check_params_colision(self, quanet_params, learner_params):

View File

@ -56,7 +56,7 @@ class GridSearchQ(BaseQuantifier):
def _sout(self, msg): def _sout(self, msg):
if self.verbose: if self.verbose:
print(f'[{self.__class__.__name__}:{self.model.__class__.__name__}]: {msg}') print(f'[{self.__class__.__name__}]: {msg}')
def __check_error(self, error): def __check_error(self, error):
if error in qp.error.QUANTIFICATION_ERROR: if error in qp.error.QUANTIFICATION_ERROR:

View File

@ -383,9 +383,6 @@ def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs
# x_error function) and 'y' is the estim-test shift (computed as according to y_error) # x_error function) and 'y' is the estim-test shift (computed as according to y_error)
data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order) data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order)
if method_order is None:
method_order = method_names
if binning == 'isomerous': if binning == 'isomerous':
# take bins containing the same amount of examples # take bins containing the same amount of examples
tr_test_drifts = np.concatenate([data[m]['x'] for m in method_order]) tr_test_drifts = np.concatenate([data[m]['x'] for m in method_order])

View File

@ -89,6 +89,8 @@ setup(
'License :: OSI Approved :: BSD License', 'License :: OSI Approved :: BSD License',
'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3 :: Only', 'Programming Language :: Python :: 3 :: Only',
@ -111,7 +113,7 @@ setup(
# #
packages=find_packages(include=['quapy', 'quapy.*']), # Required packages=find_packages(include=['quapy', 'quapy.*']), # Required
python_requires='>=3.8, <4', python_requires='>=3.6, <4',
install_requires=['scikit-learn', 'pandas', 'tqdm', 'matplotlib', 'joblib', 'xlrd', 'abstention'], install_requires=['scikit-learn', 'pandas', 'tqdm', 'matplotlib', 'joblib', 'xlrd', 'abstention'],