Compare commits
14 Commits
140ab3bfc9
...
ca25f1d601
| Author | SHA1 | Date |
|---|---|---|
|
|
ca25f1d601 | |
|
|
2df89c83e8 | |
|
|
1efe13c538 | |
|
|
763c008b6d | |
|
|
fa9d5ea243 | |
|
|
67906f6f2d | |
|
|
4904475d26 | |
|
|
1826d8a8dc | |
|
|
de93cce391 | |
|
|
d1e11f8a6b | |
|
|
d0706005d7 | |
|
|
368ee03fbc | |
|
|
e9d56e5801 | |
|
|
3779bb2123 |
|
|
@ -0,0 +1,23 @@
|
|||
name: Pylint
|
||||
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.8", "3.9", "3.10"]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v3
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pylint
|
||||
- name: Analysing the code with pylint
|
||||
run: |
|
||||
pylint $(git ls-files '*.py')
|
||||
3
TODO.txt
3
TODO.txt
|
|
@ -1,3 +1,6 @@
|
|||
ensembles seem to be broken; they have an internal model selection which takes the parameters, but since quapy now
|
||||
works with protocols it would need to know the validation set in order to pass something like
|
||||
"protocol: APP(val, etc.)"
|
||||
sample_size should not be mandatory when qp.environ['SAMPLE_SIZE'] has been specified
|
||||
clean all the cumbersome methods that have to be implemented for new quantifiers (e.g., n_classes_ prop, etc.)
|
||||
make truly parallel the GridSearchQ
|
||||
|
|
|
|||
|
|
@ -224,8 +224,6 @@
|
|||
<li><a href="quapy.html#quapy.util.create_parent_dir">create_parent_dir() (in module quapy.util)</a>
|
||||
</li>
|
||||
<li><a href="quapy.method.html#quapy.method.aggregative.cross_generate_predictions">cross_generate_predictions() (in module quapy.method.aggregative)</a>
|
||||
</li>
|
||||
<li><a href="quapy.method.html#quapy.method.aggregative.cross_generate_predictions_depr">cross_generate_predictions_depr() (in module quapy.method.aggregative)</a>
|
||||
</li>
|
||||
<li><a href="quapy.html#quapy.model_selection.cross_val_predict">cross_val_predict() (in module quapy.model_selection)</a>
|
||||
</li>
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -316,11 +316,14 @@ fitting <cite>TruncatedSVD</cite> and then <cite>LogisticRegression</cite> on th
|
|||
|
||||
<dl class="py method">
|
||||
<dt class="sig sig-object py" id="quapy.classification.methods.LowRankLogisticRegression.get_params">
|
||||
<span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.get_params" title="Permalink to this definition">¶</a></dt>
|
||||
<span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">deep</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.get_params" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Get hyper-parameters for this estimator.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||||
<dd class="field-odd"><p>a dictionary with parameter names mapped to their values</p>
|
||||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
<dd class="field-odd"><p><strong>deep</strong> – compatibility with sklearn</p>
|
||||
</dd>
|
||||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||||
<dd class="field-even"><p>a dictionary with parameter names mapped to their values</p>
|
||||
</dd>
|
||||
</dl>
|
||||
</dd></dl>
|
||||
|
|
@ -524,7 +527,7 @@ dimensionality of the embedding</p>
|
|||
|
||||
<dl class="py class">
|
||||
<dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer">
|
||||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.classification.neural.</span></span><span class="sig-name descname"><span class="pre">NeuralClassifierTrainer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">net</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.classification.neural.TextClassifierNet" title="quapy.classification.neural.TextClassifierNet"><span class="pre">TextClassifierNet</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">lr</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.001</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">weight_decay</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">patience</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">epochs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">200</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">64</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size_test</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">512</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">padding_length</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">300</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'cpu'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">checkpointpath</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'../checkpoint/classifier_net.dat'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.classification.neural.</span></span><span class="sig-name descname"><span class="pre">NeuralClassifierTrainer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">net</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.classification.neural.TextClassifierNet" title="quapy.classification.neural.TextClassifierNet"><span class="pre">TextClassifierNet</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">lr</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.001</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">weight_decay</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">patience</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">epochs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">200</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">64</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size_test</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">512</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">padding_length</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">300</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'cuda'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">checkpointpath</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'../checkpoint/classifier_net.dat'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
|
||||
<p>Trains a neural network for text classification.</p>
|
||||
<dl class="field-list simple">
|
||||
|
|
|
|||
|
|
@ -447,8 +447,8 @@ index.</p>
|
|||
<span class="sig-name descname"><span class="pre">sampling_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shuffle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling_index" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the
|
||||
prevalence values are not specified, then returns the index of a uniform sampling.
|
||||
For each class, the sampling is drawn without replacement if the requested prevalence is larger than
|
||||
the actual prevalence of the class, or with replacement otherwise.</p>
|
||||
For each class, the sampling is drawn with replacement if the requested prevalence is larger than
|
||||
the actual prevalence of the class, or without replacement otherwise.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
|
@ -534,7 +534,7 @@ values for each class)</p>
|
|||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.uniform_sampling">
|
||||
<span class="sig-name descname"><span class="pre">uniform_sampling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.uniform_sampling" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Returns a uniform sample (an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>) of desired size. The sampling is drawn
|
||||
without replacement if the requested size is greater than the number of instances, or with replacement
|
||||
with replacement if the requested size is greater than the number of instances, or without replacement
|
||||
otherwise.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
|
|
@ -553,7 +553,7 @@ otherwise.</p>
|
|||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.uniform_sampling_index">
|
||||
<span class="sig-name descname"><span class="pre">uniform_sampling_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.uniform_sampling_index" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn
|
||||
without replacement if the requested size is greater than the number of instances, or with replacement
|
||||
with replacement if the requested size is greater than the number of instances, or without replacement
|
||||
otherwise.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@
|
|||
</section>
|
||||
<section id="module-quapy.error">
|
||||
<span id="quapy-error"></span><h2>quapy.error<a class="headerlink" href="#module-quapy.error" title="Permalink to this heading">¶</a></h2>
|
||||
<p>Implementation of error measures used for quantification</p>
|
||||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.error.absolute_error">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.absolute_error" title="Permalink to this definition">¶</a></dt>
|
||||
|
|
@ -86,8 +87,9 @@ where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the
|
|||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.error.acc_error">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">acc_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.acc_error" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Computes the error in terms of 1-accuracy. The accuracy is computed as <span class="math notranslate nohighlight">\(\frac{tp+tn}{tp+fp+fn+tn}\)</span>, with
|
||||
<cite>tp</cite>, <cite>fp</cite>, <cite>fn</cite>, and <cite>tn</cite> standing for true positives, false positives, false negatives, and true negatives,
|
||||
<dd><p>Computes the error in terms of 1-accuracy. The accuracy is computed as
|
||||
<span class="math notranslate nohighlight">\(\frac{tp+tn}{tp+fp+fn+tn}\)</span>, with <cite>tp</cite>, <cite>fp</cite>, <cite>fn</cite>, and <cite>tn</cite> standing
|
||||
for true positives, false positives, false negatives, and true negatives,
|
||||
respectively</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
|
|
@ -105,8 +107,9 @@ respectively</p>
|
|||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.error.acce">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">acce</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.acce" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Computes the error in terms of 1-accuracy. The accuracy is computed as <span class="math notranslate nohighlight">\(\frac{tp+tn}{tp+fp+fn+tn}\)</span>, with
|
||||
<cite>tp</cite>, <cite>fp</cite>, <cite>fn</cite>, and <cite>tn</cite> standing for true positives, false positives, false negatives, and true negatives,
|
||||
<dd><p>Computes the error in terms of 1-accuracy. The accuracy is computed as
|
||||
<span class="math notranslate nohighlight">\(\frac{tp+tn}{tp+fp+fn+tn}\)</span>, with <cite>tp</cite>, <cite>fp</cite>, <cite>fn</cite>, and <cite>tn</cite> standing
|
||||
for true positives, false positives, false negatives, and true negatives,
|
||||
respectively</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
|
|
@ -146,10 +149,12 @@ where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the
|
|||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.error.f1_error">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">f1_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.f1_error" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>F1 error: simply computes the error in terms of macro <span class="math notranslate nohighlight">\(F_1\)</span>, i.e., <span class="math notranslate nohighlight">\(1-F_1^M\)</span>,
|
||||
where <span class="math notranslate nohighlight">\(F_1\)</span> is the harmonic mean of precision and recall, defined as <span class="math notranslate nohighlight">\(\frac{2tp}{2tp+fp+fn}\)</span>,
|
||||
with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing for true positives, false positives, and false negatives, respectively.
|
||||
<cite>Macro</cite> averaging means the <span class="math notranslate nohighlight">\(F_1\)</span> is computed for each category independently, and then averaged.</p>
|
||||
<dd><p>F1 error: simply computes the error in terms of macro <span class="math notranslate nohighlight">\(F_1\)</span>, i.e.,
|
||||
<span class="math notranslate nohighlight">\(1-F_1^M\)</span>, where <span class="math notranslate nohighlight">\(F_1\)</span> is the harmonic mean of precision and recall,
|
||||
defined as <span class="math notranslate nohighlight">\(\frac{2tp}{2tp+fp+fn}\)</span>, with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing
|
||||
for true positives, false positives, and false negatives, respectively.
|
||||
<cite>Macro</cite> averaging means the <span class="math notranslate nohighlight">\(F_1\)</span> is computed for each category independently,
|
||||
and then averaged.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
|
@ -166,10 +171,12 @@ with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing for true pos
|
|||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.error.f1e">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">f1e</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.f1e" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>F1 error: simply computes the error in terms of macro <span class="math notranslate nohighlight">\(F_1\)</span>, i.e., <span class="math notranslate nohighlight">\(1-F_1^M\)</span>,
|
||||
where <span class="math notranslate nohighlight">\(F_1\)</span> is the harmonic mean of precision and recall, defined as <span class="math notranslate nohighlight">\(\frac{2tp}{2tp+fp+fn}\)</span>,
|
||||
with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing for true positives, false positives, and false negatives, respectively.
|
||||
<cite>Macro</cite> averaging means the <span class="math notranslate nohighlight">\(F_1\)</span> is computed for each category independently, and then averaged.</p>
|
||||
<dd><p>F1 error: simply computes the error in terms of macro <span class="math notranslate nohighlight">\(F_1\)</span>, i.e.,
|
||||
<span class="math notranslate nohighlight">\(1-F_1^M\)</span>, where <span class="math notranslate nohighlight">\(F_1\)</span> is the harmonic mean of precision and recall,
|
||||
defined as <span class="math notranslate nohighlight">\(\frac{2tp}{2tp+fp+fn}\)</span>, with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing
|
||||
for true positives, false positives, and false negatives, respectively.
|
||||
<cite>Macro</cite> averaging means the <span class="math notranslate nohighlight">\(F_1\)</span> is computed for each category independently,
|
||||
and then averaged.</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
|
@ -186,7 +193,8 @@ with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing for true pos
|
|||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.error.from_name">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">from_name</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">err_name</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.from_name" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Gets an error function from its name. E.g., <cite>from_name(“mae”)</cite> will return function <a class="reference internal" href="#quapy.error.mae" title="quapy.error.mae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.mae()</span></code></a></p>
|
||||
<dd><p>Gets an error function from its name. E.g., <cite>from_name(“mae”)</cite>
|
||||
will return function <a class="reference internal" href="#quapy.error.mae" title="quapy.error.mae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.mae()</span></code></a></p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
<dd class="field-odd"><p><strong>err_name</strong> – string, the error name</p>
|
||||
|
|
@ -199,11 +207,13 @@ with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing for true pos
|
|||
|
||||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.error.kld">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">kld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.kld" title="Permalink to this definition">¶</a></dt>
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">kld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.kld" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="simple">
|
||||
<dt>Computes the Kullback-Leibler divergence between the two prevalence distributions.</dt><dd><p>Kullback-Leibler divergence between two prevalence distributions <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
|
||||
<span class="math notranslate nohighlight">\(KLD(p,\hat{p})=D_{KL}(p||\hat{p})=\sum_{y\in \mathcal{Y}} p(y)\log\frac{p(y)}{\hat{p}(y)}\)</span>, where
|
||||
<span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
|
||||
<dt>Computes the Kullback-Leibler divergence between the two prevalence distributions.</dt><dd><p>Kullback-Leibler divergence between two prevalence distributions <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span>
|
||||
is computed as
|
||||
<span class="math notranslate nohighlight">\(KLD(p,\hat{p})=D_{KL}(p||\hat{p})=
|
||||
\sum_{y\in \mathcal{Y}} p(y)\log\frac{p(y)}{\hat{p}(y)}\)</span>,
|
||||
where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
|
||||
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
|
||||
</dd>
|
||||
</dl>
|
||||
|
|
@ -212,9 +222,10 @@ The distributions are smoothed using the <cite>eps</cite> factor (see <a class="
|
|||
<dd class="field-odd"><ul class="simple">
|
||||
<li><p><strong>prevs</strong> – array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
|
||||
<li><p><strong>eps</strong> – smoothing factor. KLD is not defined in cases in which the distributions contain zeros; <cite>eps</cite>
|
||||
is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
|
||||
will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
|
||||
<li><p><strong>eps</strong> – smoothing factor. KLD is not defined in cases in which the distributions contain
|
||||
zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size.
|
||||
If <cite>eps=None</cite>, the sample size will be taken from the environment variable <cite>SAMPLE_SIZE</cite>
|
||||
(which has thus to be set beforehand).</p></li>
|
||||
</ul>
|
||||
</dd>
|
||||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||||
|
|
@ -231,7 +242,8 @@ will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has
|
|||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted
|
||||
prevalence values</p></li>
|
||||
</ul>
|
||||
</dd>
|
||||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||||
|
|
@ -248,7 +260,8 @@ will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has
|
|||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted
|
||||
prevalence values</p></li>
|
||||
</ul>
|
||||
</dd>
|
||||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||||
|
|
@ -259,17 +272,21 @@ will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has
|
|||
|
||||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.error.mean_relative_absolute_error">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mean_relative_absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mean_relative_absolute_error" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Computes the mean relative absolute error (see <a class="reference internal" href="#quapy.error.rae" title="quapy.error.rae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.rae()</span></code></a>) across the sample pairs.
|
||||
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mean_relative_absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mean_relative_absolute_error" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Computes the mean relative absolute error (see <a class="reference internal" href="#quapy.error.rae" title="quapy.error.rae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.rae()</span></code></a>) across
|
||||
the sample pairs. The distributions are smoothed using the <cite>eps</cite> factor (see
|
||||
<a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
|
||||
<li><p><strong>eps</strong> – smoothing factor. <cite>mrae</cite> is not defined in cases in which the true distribution contains zeros; <cite>eps</cite>
|
||||
is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
|
||||
will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
|
||||
<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true
|
||||
prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted
|
||||
prevalence values</p></li>
|
||||
<li><p><strong>eps</strong> – smoothing factor. <cite>mrae</cite> is not defined in cases in which the true
|
||||
distribution contains zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>,
|
||||
with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size will be taken from
|
||||
the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
|
||||
</ul>
|
||||
</dd>
|
||||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||||
|
|
@ -281,16 +298,20 @@ will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has
|
|||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.error.mkld">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mkld" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Computes the mean Kullback-Leibler divergence (see <a class="reference internal" href="#quapy.error.kld" title="quapy.error.kld"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.kld()</span></code></a>) across the sample pairs.
|
||||
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
|
||||
<dd><p>Computes the mean Kullback-Leibler divergence (see <a class="reference internal" href="#quapy.error.kld" title="quapy.error.kld"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.kld()</span></code></a>) across the
|
||||
sample pairs. The distributions are smoothed using the <cite>eps</cite> factor
|
||||
(see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
|
||||
<li><p><strong>eps</strong> – smoothing factor. KLD is not defined in cases in which the distributions contain zeros; <cite>eps</cite>
|
||||
is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
|
||||
will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
|
||||
<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true
|
||||
prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted
|
||||
prevalence values</p></li>
|
||||
<li><p><strong>eps</strong> – smoothing factor. KLD is not defined in cases in which the distributions contain
|
||||
zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size.
|
||||
If <cite>eps=None</cite>, the sample size will be taken from the environment variable <cite>SAMPLE_SIZE</cite>
|
||||
(which has thus to be set beforehand).</p></li>
|
||||
</ul>
|
||||
</dd>
|
||||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||||
|
|
@ -302,16 +323,19 @@ will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has
|
|||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.error.mnkld">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mnkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mnkld" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Computes the mean Normalized Kullback-Leibler divergence (see <a class="reference internal" href="#quapy.error.nkld" title="quapy.error.nkld"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.nkld()</span></code></a>) across the sample pairs.
|
||||
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
|
||||
<dd><p>Computes the mean Normalized Kullback-Leibler divergence (see <a class="reference internal" href="#quapy.error.nkld" title="quapy.error.nkld"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.nkld()</span></code></a>)
|
||||
across the sample pairs. The distributions are smoothed using the <cite>eps</cite> factor
|
||||
(see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
|
||||
<li><p><strong>eps</strong> – smoothing factor. NKLD is not defined in cases in which the distributions contain zeros; <cite>eps</cite>
|
||||
is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
|
||||
will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted
|
||||
prevalence values</p></li>
|
||||
<li><p><strong>eps</strong> – smoothing factor. NKLD is not defined in cases in which the distributions contain
|
||||
zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size.
|
||||
If <cite>eps=None</cite>, the sample size will be taken from the environment variable <cite>SAMPLE_SIZE</cite>
|
||||
(which has thus to be set beforehand).</p></li>
|
||||
</ul>
|
||||
</dd>
|
||||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||||
|
|
@ -322,17 +346,21 @@ will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has
|
|||
|
||||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.error.mrae">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mrae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mrae" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Computes the mean relative absolute error (see <a class="reference internal" href="#quapy.error.rae" title="quapy.error.rae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.rae()</span></code></a>) across the sample pairs.
|
||||
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mrae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mrae" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Computes the mean relative absolute error (see <a class="reference internal" href="#quapy.error.rae" title="quapy.error.rae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.rae()</span></code></a>) across
|
||||
the sample pairs. The distributions are smoothed using the <cite>eps</cite> factor (see
|
||||
<a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
|
||||
<li><p><strong>eps</strong> – smoothing factor. <cite>mrae</cite> is not defined in cases in which the true distribution contains zeros; <cite>eps</cite>
|
||||
is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
|
||||
will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
|
||||
<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true
|
||||
prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted
|
||||
prevalence values</p></li>
|
||||
<li><p><strong>eps</strong> – smoothing factor. <cite>mrae</cite> is not defined in cases in which the true
|
||||
distribution contains zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>,
|
||||
with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size will be taken from
|
||||
the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
|
||||
</ul>
|
||||
</dd>
|
||||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||||
|
|
@ -348,8 +376,10 @@ will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has
|
|||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
|
||||
<li><p><strong>prevs</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the
|
||||
true prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_samples, n_classes,)</cite> with the
|
||||
predicted prevalence values</p></li>
|
||||
</ul>
|
||||
</dd>
|
||||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||||
|
|
@ -360,10 +390,12 @@ will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has
|
|||
|
||||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.error.nkld">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">nkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.nkld" title="Permalink to this definition">¶</a></dt>
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">nkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.nkld" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="simple">
|
||||
<dt>Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.</dt><dd><p>Normalized Kullback-Leibler divergence between two prevalence distributions <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span>
|
||||
is computed as <span class="math notranslate nohighlight">\(NKLD(p,\hat{p}) = 2\frac{e^{KLD(p,\hat{p})}}{e^{KLD(p,\hat{p})}+1}-1\)</span>, where
|
||||
<dt>Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.</dt><dd><p>Normalized Kullback-Leibler divergence between two prevalence distributions <span class="math notranslate nohighlight">\(p\)</span> and
|
||||
<span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
|
||||
math:<cite>NKLD(p,hat{p}) = 2frac{e^{KLD(p,hat{p})}}{e^{KLD(p,hat{p})}+1}-1</cite>,
|
||||
where
|
||||
<span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
|
||||
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
|
||||
</dd>
|
||||
|
|
@ -373,9 +405,10 @@ The distributions are smoothed using the <cite>eps</cite> factor (see <a class="
|
|||
<dd class="field-odd"><ul class="simple">
|
||||
<li><p><strong>prevs</strong> – array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
|
||||
<li><p><strong>eps</strong> – smoothing factor. NKLD is not defined in cases in which the distributions contain zeros; <cite>eps</cite>
|
||||
is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
|
||||
will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
|
||||
<li><p><strong>eps</strong> – smoothing factor. NKLD is not defined in cases in which the distributions
|
||||
contain zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample
|
||||
size. If <cite>eps=None</cite>, the sample size will be taken from the environment variable
|
||||
<cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
|
||||
</ul>
|
||||
</dd>
|
||||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||||
|
|
@ -386,10 +419,12 @@ will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has
|
|||
|
||||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.error.rae">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">rae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.rae" title="Permalink to this definition">¶</a></dt>
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">rae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.rae" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="simple">
|
||||
<dt>Computes the absolute relative error between the two prevalence vectors.</dt><dd><p>Relative absolute error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
|
||||
<span class="math notranslate nohighlight">\(RAE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}\frac{|\hat{p}(y)-p(y)|}{p(y)}\)</span>,
|
||||
<dt>Computes the absolute relative error between the two prevalence vectors.</dt><dd><p>Relative absolute error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span>
|
||||
is computed as
|
||||
<span class="math notranslate nohighlight">\(RAE(p,\hat{p})=
|
||||
\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}\frac{|\hat{p}(y)-p(y)|}{p(y)}\)</span>,
|
||||
where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
|
||||
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
|
||||
</dd>
|
||||
|
|
@ -399,9 +434,10 @@ The distributions are smoothed using the <cite>eps</cite> factor (see <a class="
|
|||
<dd class="field-odd"><ul class="simple">
|
||||
<li><p><strong>prevs</strong> – array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
|
||||
<li><p><strong>eps</strong> – smoothing factor. <cite>rae</cite> is not defined in cases in which the true distribution contains zeros; <cite>eps</cite>
|
||||
is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
|
||||
will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
|
||||
<li><p><strong>eps</strong> – smoothing factor. <cite>rae</cite> is not defined in cases in which the true distribution
|
||||
contains zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the
|
||||
sample size. If <cite>eps=None</cite>, the sample size will be taken from the environment variable
|
||||
<cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
|
||||
</ul>
|
||||
</dd>
|
||||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||||
|
|
@ -412,10 +448,12 @@ will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has
|
|||
|
||||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.error.relative_absolute_error">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">relative_absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.relative_absolute_error" title="Permalink to this definition">¶</a></dt>
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">relative_absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.relative_absolute_error" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="simple">
|
||||
<dt>Computes the absolute relative error between the two prevalence vectors.</dt><dd><p>Relative absolute error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
|
||||
<span class="math notranslate nohighlight">\(RAE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}\frac{|\hat{p}(y)-p(y)|}{p(y)}\)</span>,
|
||||
<dt>Computes the absolute relative error between the two prevalence vectors.</dt><dd><p>Relative absolute error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span>
|
||||
is computed as
|
||||
<span class="math notranslate nohighlight">\(RAE(p,\hat{p})=
|
||||
\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}\frac{|\hat{p}(y)-p(y)|}{p(y)}\)</span>,
|
||||
where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
|
||||
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
|
||||
</dd>
|
||||
|
|
@ -425,9 +463,10 @@ The distributions are smoothed using the <cite>eps</cite> factor (see <a class="
|
|||
<dd class="field-odd"><ul class="simple">
|
||||
<li><p><strong>prevs</strong> – array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
|
||||
<li><p><strong>prevs_hat</strong> – array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
|
||||
<li><p><strong>eps</strong> – smoothing factor. <cite>rae</cite> is not defined in cases in which the true distribution contains zeros; <cite>eps</cite>
|
||||
is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
|
||||
will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
|
||||
<li><p><strong>eps</strong> – smoothing factor. <cite>rae</cite> is not defined in cases in which the true distribution
|
||||
contains zeros; <cite>eps</cite> is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the
|
||||
sample size. If <cite>eps=None</cite>, the sample size will be taken from the environment variable
|
||||
<cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
|
||||
</ul>
|
||||
</dd>
|
||||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||||
|
|
@ -438,10 +477,11 @@ will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has
|
|||
|
||||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.error.se">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">se</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.se" title="Permalink to this definition">¶</a></dt>
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">se</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.se" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><dl class="simple">
|
||||
<dt>Computes the squared error between the two prevalence vectors.</dt><dd><p>Squared error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
|
||||
<span class="math notranslate nohighlight">\(SE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}(\hat{p}(y)-p(y))^2\)</span>, where
|
||||
<span class="math notranslate nohighlight">\(SE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}(\hat{p}(y)-p(y))^2\)</span>,
|
||||
where
|
||||
<span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.</p>
|
||||
</dd>
|
||||
</dl>
|
||||
|
|
@ -462,7 +502,8 @@ will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has
|
|||
<dt class="sig sig-object py" id="quapy.error.smooth">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">smooth</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.smooth" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Smooths a prevalence distribution with <span class="math notranslate nohighlight">\(\epsilon\)</span> (<cite>eps</cite>) as:
|
||||
<span class="math notranslate nohighlight">\(\underline{p}(y)=\frac{\epsilon+p(y)}{\epsilon|\mathcal{Y}|+\displaystyle\sum_{y\in \mathcal{Y}}p(y)}\)</span></p>
|
||||
<span class="math notranslate nohighlight">\(\underline{p}(y)=\frac{\epsilon+p(y)}{\epsilon|\mathcal{Y}|+
|
||||
\displaystyle\sum_{y\in \mathcal{Y}}p(y)}\)</span></p>
|
||||
<dl class="field-list simple">
|
||||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||||
<dd class="field-odd"><ul class="simple">
|
||||
|
|
@ -601,7 +642,7 @@ convenient or not. Set to False to deactivate.</p></li>
|
|||
</div>
|
||||
<span class="target" id="module-quapy.protocol"></span><dl class="py class">
|
||||
<dt class="sig sig-object py" id="quapy.protocol.APP">
|
||||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.protocol.</span></span><span class="sig-name descname"><span class="pre">APP</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_prevalences</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">21</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">smooth_limits_epsilon</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">return_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'sample_prev'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.protocol.APP" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.protocol.</span></span><span class="sig-name descname"><span class="pre">APP</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_prevalences</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">21</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">smooth_limits_epsilon</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">sanity_check</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10000</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">return_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'sample_prev'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.protocol.APP" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Bases: <a class="reference internal" href="#quapy.protocol.AbstractStochasticSeededProtocol" title="quapy.protocol.AbstractStochasticSeededProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">AbstractStochasticSeededProtocol</span></code></a>, <a class="reference internal" href="#quapy.protocol.OnLabelledCollectionProtocol" title="quapy.protocol.OnLabelledCollectionProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">OnLabelledCollectionProtocol</span></code></a></p>
|
||||
<p>Implementation of the artificial prevalence protocol (APP).
|
||||
The APP consists of exploring a grid of prevalence values containing <cite>n_prevalences</cite> points (e.g.,
|
||||
|
|
@ -621,6 +662,8 @@ grid (default is 21)</p></li>
|
|||
<li><p><strong>smooth_limits_epsilon</strong> – the quantity to add and subtract to the limits 0 and 1</p></li>
|
||||
<li><p><strong>random_state</strong> – allows replicating samples across runs (default 0, meaning that the sequence of samples
|
||||
will be the same every time the protocol is called)</p></li>
|
||||
<li><p><strong>sanity_check</strong> – int, raises an exception warning the user that the number of examples to be generated exceed
|
||||
this number; set to None for skipping this check</p></li>
|
||||
<li><p><strong>return_type</strong> – set to “sample_prev” (default) to get the pairs of (sample, prevalence) at each iteration, or
|
||||
to “labelled_collection” to get instead instances of LabelledCollection</p></li>
|
||||
</ul>
|
||||
|
|
@ -1819,6 +1862,7 @@ this function is invoked, it loads the pickled resource. Example:</p>
|
|||
</section>
|
||||
<section id="module-quapy">
|
||||
<span id="module-contents"></span><h2>Module contents<a class="headerlink" href="#module-quapy" title="Permalink to this heading">¶</a></h2>
|
||||
<p>QuaPy module for quantification</p>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
|
|
|
|||
|
|
@ -1064,11 +1064,6 @@ validation data, or as an integer, indicating that the misclassification rates s
|
|||
<span class="sig-prename descclassname"><span class="pre">quapy.method.aggregative.</span></span><span class="sig-name descname"><span class="pre">cross_generate_predictions</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">val_split</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">probabilistic</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">fit_classifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.cross_generate_predictions" title="Permalink to this definition">¶</a></dt>
|
||||
<dd></dd></dl>
|
||||
|
||||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.method.aggregative.cross_generate_predictions_depr">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.method.aggregative.</span></span><span class="sig-name descname"><span class="pre">cross_generate_predictions_depr</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">val_split</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">probabilistic</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">fit_classifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">method_name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.cross_generate_predictions_depr" title="Permalink to this definition">¶</a></dt>
|
||||
<dd></dd></dl>
|
||||
|
||||
<dl class="py function">
|
||||
<dt class="sig sig-object py" id="quapy.method.aggregative.newELM">
|
||||
<span class="sig-prename descclassname"><span class="pre">quapy.method.aggregative.</span></span><span class="sig-name descname"><span class="pre">newELM</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">svmperf_base</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">loss</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'01'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">C</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.newELM" title="Permalink to this definition">¶</a></dt>
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,152 @@
|
|||
from copy import deepcopy
|
||||
|
||||
import quapy as qp
|
||||
from sklearn.calibration import CalibratedClassifierCV
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from quapy.classification.methods import LowRankLogisticRegression
|
||||
from quapy.method.meta import QuaNet
|
||||
from quapy.protocol import APP
|
||||
from quapy.method.aggregative import CC, ACC, PCC, PACC, MAX, MS, MS2, EMQ, HDy, newSVMAE
|
||||
from quapy.method.meta import EHDy
|
||||
import numpy as np
|
||||
import os
|
||||
import pickle
|
||||
import itertools
|
||||
import argparse
|
||||
import torch
|
||||
import shutil
|
||||
|
||||
|
||||
N_JOBS = -1
|
||||
CUDA_N_JOBS = 2
|
||||
ENSEMBLE_N_JOBS = -1
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = 100
|
||||
|
||||
|
||||
def newLR():
|
||||
return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
|
||||
|
||||
|
||||
def calibratedLR():
|
||||
return CalibratedClassifierCV(LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1))
|
||||
|
||||
|
||||
__C_range = np.logspace(-3, 3, 7)
|
||||
lr_params = {'classifier__C': __C_range, 'classifier__class_weight': [None, 'balanced']}
|
||||
svmperf_params = {'classifier__C': __C_range}
|
||||
|
||||
|
||||
def quantification_models():
|
||||
yield 'cc', CC(newLR()), lr_params
|
||||
yield 'acc', ACC(newLR()), lr_params
|
||||
yield 'pcc', PCC(newLR()), lr_params
|
||||
yield 'pacc', PACC(newLR()), lr_params
|
||||
yield 'MAX', MAX(newLR()), lr_params
|
||||
yield 'MS', MS(newLR()), lr_params
|
||||
yield 'MS2', MS2(newLR()), lr_params
|
||||
yield 'sldc', EMQ(newLR(), recalib='platt'), lr_params
|
||||
yield 'svmmae', newSVMAE(), svmperf_params
|
||||
yield 'hdy', HDy(newLR()), lr_params
|
||||
|
||||
|
||||
def quantification_cuda_models():
|
||||
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||
print(f'Running QuaNet in {device}')
|
||||
learner = LowRankLogisticRegression()
|
||||
yield 'quanet', QuaNet(learner, checkpointdir=args.checkpointdir, device=device), lr_params
|
||||
|
||||
|
||||
def evaluate_experiment(true_prevalences, estim_prevalences):
|
||||
print('\nEvaluation Metrics:\n' + '=' * 22)
|
||||
for eval_measure in [qp.error.mae, qp.error.mrae]:
|
||||
err = eval_measure(true_prevalences, estim_prevalences)
|
||||
print(f'\t{eval_measure.__name__}={err:.4f}')
|
||||
print()
|
||||
|
||||
|
||||
def result_path(path, dataset_name, model_name, run, optim_loss):
|
||||
return os.path.join(path, f'{dataset_name}-{model_name}-run{run}-{optim_loss}.pkl')
|
||||
|
||||
|
||||
def is_already_computed(dataset_name, model_name, run, optim_loss):
|
||||
return os.path.exists(result_path(args.results, dataset_name, model_name, run, optim_loss))
|
||||
|
||||
|
||||
def save_results(dataset_name, model_name, run, optim_loss, *results):
|
||||
rpath = result_path(args.results, dataset_name, model_name, run, optim_loss)
|
||||
qp.util.create_parent_dir(rpath)
|
||||
with open(rpath, 'wb') as foo:
|
||||
pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
|
||||
def run(experiment):
|
||||
optim_loss, dataset_name, (model_name, model, hyperparams) = experiment
|
||||
if dataset_name in ['acute.a', 'acute.b', 'iris.1']: return
|
||||
|
||||
collection = qp.datasets.fetch_UCILabelledCollection(dataset_name)
|
||||
for run, data in enumerate(qp.data.Dataset.kFCV(collection, nfolds=5, nrepeats=1)):
|
||||
if is_already_computed(dataset_name, model_name, run=run, optim_loss=optim_loss):
|
||||
print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} run={run+1}/5 already computed.')
|
||||
continue
|
||||
|
||||
print(f'running dataset={dataset_name} model={model_name} loss={optim_loss} run={run+1}/5')
|
||||
# model selection (hyperparameter optimization for a quantification-oriented loss)
|
||||
train, test = data.train_test
|
||||
train, val = train.split_stratified()
|
||||
if hyperparams is not None:
|
||||
model_selection = qp.model_selection.GridSearchQ(
|
||||
deepcopy(model),
|
||||
param_grid=hyperparams,
|
||||
protocol=APP(val, n_prevalences=21, repeats=25),
|
||||
error=optim_loss,
|
||||
refit=True,
|
||||
timeout=60*60,
|
||||
verbose=True
|
||||
)
|
||||
model_selection.fit(data.training)
|
||||
model = model_selection.best_model()
|
||||
best_params = model_selection.best_params_
|
||||
else:
|
||||
model.fit(data.training)
|
||||
best_params = {}
|
||||
|
||||
# model evaluation
|
||||
true_prevalences, estim_prevalences = qp.evaluation.prediction(
|
||||
model,
|
||||
protocol=APP(test, n_prevalences=21, repeats=100)
|
||||
)
|
||||
test_true_prevalence = data.test.prevalence()
|
||||
|
||||
evaluate_experiment(true_prevalences, estim_prevalences)
|
||||
save_results(dataset_name, model_name, run, optim_loss,
|
||||
true_prevalences, estim_prevalences,
|
||||
data.training.prevalence(), test_true_prevalence,
|
||||
best_params)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
|
||||
parser.add_argument('results', metavar='RESULT_PATH', type=str,
|
||||
help='path to the directory where to store the results')
|
||||
parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='../svm_perf_quantification',
|
||||
help='path to the directory with svmperf')
|
||||
parser.add_argument('--checkpointdir', metavar='PATH', type=str, default='./checkpoint',
|
||||
help='path to the directory where to dump QuaNet checkpoints')
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f'Result folder: {args.results}')
|
||||
np.random.seed(0)
|
||||
|
||||
qp.environ['SVMPERF_HOME'] = args.svmperfpath
|
||||
|
||||
optim_losses = ['mae']
|
||||
datasets = qp.datasets.UCI_DATASETS
|
||||
|
||||
models = quantification_models()
|
||||
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS)
|
||||
|
||||
models = quantification_cuda_models()
|
||||
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=CUDA_N_JOBS)
|
||||
|
||||
shutil.rmtree(args.checkpointdir, ignore_errors=True)
|
||||
|
|
@ -0,0 +1,244 @@
|
|||
from scipy.sparse import csc_matrix, csr_matrix
|
||||
from sklearn.base import BaseEstimator, TransformerMixin
|
||||
from sklearn.feature_extraction.text import TfidfTransformer, TfidfVectorizer, CountVectorizer
|
||||
import numpy as np
|
||||
from joblib import Parallel, delayed
|
||||
import sklearn
|
||||
import math
|
||||
from scipy.stats import t
|
||||
|
||||
|
||||
class ContTable:
|
||||
def __init__(self, tp=0, tn=0, fp=0, fn=0):
|
||||
self.tp=tp
|
||||
self.tn=tn
|
||||
self.fp=fp
|
||||
self.fn=fn
|
||||
|
||||
def get_d(self): return self.tp + self.tn + self.fp + self.fn
|
||||
|
||||
def get_c(self): return self.tp + self.fn
|
||||
|
||||
def get_not_c(self): return self.tn + self.fp
|
||||
|
||||
def get_f(self): return self.tp + self.fp
|
||||
|
||||
def get_not_f(self): return self.tn + self.fn
|
||||
|
||||
def p_c(self): return (1.0*self.get_c())/self.get_d()
|
||||
|
||||
def p_not_c(self): return 1.0-self.p_c()
|
||||
|
||||
def p_f(self): return (1.0*self.get_f())/self.get_d()
|
||||
|
||||
def p_not_f(self): return 1.0-self.p_f()
|
||||
|
||||
def p_tp(self): return (1.0*self.tp) / self.get_d()
|
||||
|
||||
def p_tn(self): return (1.0*self.tn) / self.get_d()
|
||||
|
||||
def p_fp(self): return (1.0*self.fp) / self.get_d()
|
||||
|
||||
def p_fn(self): return (1.0*self.fn) / self.get_d()
|
||||
|
||||
def tpr(self):
|
||||
c = 1.0*self.get_c()
|
||||
return self.tp / c if c > 0.0 else 0.0
|
||||
|
||||
def fpr(self):
|
||||
_c = 1.0*self.get_not_c()
|
||||
return self.fp / _c if _c > 0.0 else 0.0
|
||||
|
||||
|
||||
def __ig_factor(p_tc, p_t, p_c):
|
||||
den = p_t * p_c
|
||||
if den != 0.0 and p_tc != 0:
|
||||
return p_tc * math.log(p_tc / den, 2)
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
|
||||
def information_gain(cell):
|
||||
return __ig_factor(cell.p_tp(), cell.p_f(), cell.p_c()) + \
|
||||
__ig_factor(cell.p_fp(), cell.p_f(), cell.p_not_c()) +\
|
||||
__ig_factor(cell.p_fn(), cell.p_not_f(), cell.p_c()) + \
|
||||
__ig_factor(cell.p_tn(), cell.p_not_f(), cell.p_not_c())
|
||||
|
||||
|
||||
def squared_information_gain(cell):
|
||||
return information_gain(cell)**2
|
||||
|
||||
def posneg_information_gain(cell):
|
||||
ig = information_gain(cell)
|
||||
if cell.tpr() < cell.fpr():
|
||||
return -ig
|
||||
else:
|
||||
return ig
|
||||
|
||||
def pos_information_gain(cell):
|
||||
if cell.tpr() < cell.fpr():
|
||||
return 0
|
||||
else:
|
||||
return information_gain(cell)
|
||||
|
||||
def pointwise_mutual_information(cell):
|
||||
return __ig_factor(cell.p_tp(), cell.p_f(), cell.p_c())
|
||||
|
||||
|
||||
def gss(cell):
|
||||
return cell.p_tp()*cell.p_tn() - cell.p_fp()*cell.p_fn()
|
||||
|
||||
|
||||
def chi_square(cell):
|
||||
den = cell.p_f() * cell.p_not_f() * cell.p_c() * cell.p_not_c()
|
||||
if den==0.0: return 0.0
|
||||
num = gss(cell)**2
|
||||
return num / den
|
||||
|
||||
|
||||
def conf_interval(xt, n):
|
||||
if n>30:
|
||||
z2 = 3.84145882069 # norm.ppf(0.5+0.95/2.0)**2
|
||||
else:
|
||||
z2 = t.ppf(0.5 + 0.95 / 2.0, df=max(n-1,1)) ** 2
|
||||
p = (xt + 0.5 * z2) / (n + z2)
|
||||
amplitude = 0.5 * z2 * math.sqrt((p * (1.0 - p)) / (n + z2))
|
||||
return p, amplitude
|
||||
|
||||
|
||||
def strength(minPosRelFreq, minPos, maxNeg):
|
||||
if minPos > maxNeg:
|
||||
return math.log(2.0 * minPosRelFreq, 2.0)
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
|
||||
#set cancel_features=True to allow some features to be weighted as 0 (as in the original article)
|
||||
#however, for some extremely imbalanced dataset caused all documents to be 0
|
||||
def conf_weight(cell, cancel_features=False):
|
||||
c = cell.get_c()
|
||||
not_c = cell.get_not_c()
|
||||
tp = cell.tp
|
||||
fp = cell.fp
|
||||
|
||||
pos_p, pos_amp = conf_interval(tp, c)
|
||||
neg_p, neg_amp = conf_interval(fp, not_c)
|
||||
|
||||
min_pos = pos_p-pos_amp
|
||||
max_neg = neg_p+neg_amp
|
||||
den = (min_pos + max_neg)
|
||||
minpos_relfreq = min_pos / (den if den != 0 else 1)
|
||||
|
||||
str_tplus = strength(minpos_relfreq, min_pos, max_neg);
|
||||
|
||||
if str_tplus == 0 and not cancel_features:
|
||||
return 1e-20
|
||||
|
||||
return str_tplus;
|
||||
|
||||
def get_tsr_matrix(cell_matrix, tsr_score_funtion):
|
||||
nC = len(cell_matrix)
|
||||
nF = len(cell_matrix[0])
|
||||
tsr_matrix = [[tsr_score_funtion(cell_matrix[c,f]) for f in range(nF)] for c in range(nC)]
|
||||
return np.array(tsr_matrix)
|
||||
|
||||
|
||||
def feature_label_contingency_table(positive_document_indexes, feature_document_indexes, nD):
|
||||
tp_ = len(positive_document_indexes & feature_document_indexes)
|
||||
fp_ = len(feature_document_indexes - positive_document_indexes)
|
||||
fn_ = len(positive_document_indexes - feature_document_indexes)
|
||||
tn_ = nD - (tp_ + fp_ + fn_)
|
||||
return ContTable(tp=tp_, tn=tn_, fp=fp_, fn=fn_)
|
||||
|
||||
def category_tables(feature_sets, category_sets, c, nD, nF):
|
||||
return [feature_label_contingency_table(category_sets[c], feature_sets[f], nD) for f in range(nF)]
|
||||
|
||||
def get_supervised_matrix(coocurrence_matrix, label_matrix, n_jobs=-1):
|
||||
"""
|
||||
Computes the nC x nF supervised matrix M where Mcf is the 4-cell contingency table for feature f and class c.
|
||||
Efficiency O(nF x nC x log(S)) where S is the sparse factor
|
||||
"""
|
||||
|
||||
nD, nF = coocurrence_matrix.shape
|
||||
nD2, nC = label_matrix.shape
|
||||
|
||||
if nD != nD2:
|
||||
raise ValueError('Number of rows in coocurrence matrix shape %s and label matrix shape %s is not consistent' %
|
||||
(coocurrence_matrix.shape,label_matrix.shape))
|
||||
|
||||
def nonzero_set(matrix, col):
|
||||
return set(matrix[:, col].nonzero()[0])
|
||||
|
||||
if isinstance(coocurrence_matrix, csr_matrix):
|
||||
coocurrence_matrix = csc_matrix(coocurrence_matrix)
|
||||
feature_sets = [nonzero_set(coocurrence_matrix, f) for f in range(nF)]
|
||||
category_sets = [nonzero_set(label_matrix, c) for c in range(nC)]
|
||||
cell_matrix = Parallel(n_jobs=n_jobs, backend="threading")(delayed(category_tables)(feature_sets, category_sets, c, nD, nF) for c in range(nC))
|
||||
return np.array(cell_matrix)
|
||||
|
||||
|
||||
|
||||
class TSRweighting(BaseEstimator,TransformerMixin):
|
||||
"""
|
||||
Supervised Term Weighting function based on any Term Selection Reduction (TSR) function (e.g., information gain,
|
||||
chi-square, etc.) or, more generally, on any function that could be computed on the 4-cell contingency table for
|
||||
each category-feature pair.
|
||||
The supervised_4cell_matrix (a CxF matrix containing the 4-cell contingency tables
|
||||
for each category-feature pair) can be pre-computed (e.g., during the feature selection phase) and passed as an
|
||||
argument.
|
||||
When C>1, i.e., in multiclass scenarios, a global_policy is used in order to determine a single feature-score which
|
||||
informs about its relevance. Accepted policies include "max" (takes the max score across categories), "ave" and "wave"
|
||||
(take the average, or weighted average, across all categories -- weights correspond to the class prevalence), and "sum"
|
||||
(which sums all category scores).
|
||||
"""
|
||||
|
||||
def __init__(self, tsr_function, global_policy='max', supervised_4cell_matrix=None, sublinear_tf=True, norm='l2', min_df=3, n_jobs=-1):
|
||||
if global_policy not in ['max', 'ave', 'wave', 'sum']: raise ValueError('Global policy should be in {"max", "ave", "wave", "sum"}')
|
||||
self.tsr_function = tsr_function
|
||||
self.global_policy = global_policy
|
||||
self.supervised_4cell_matrix = supervised_4cell_matrix
|
||||
self.sublinear_tf=sublinear_tf
|
||||
self.norm=norm
|
||||
self.min_df = min_df
|
||||
self.n_jobs=n_jobs
|
||||
|
||||
def fit(self, X, y):
|
||||
self.count_vectorizer = CountVectorizer(min_df=self.min_df)
|
||||
X = self.count_vectorizer.fit_transform(X)
|
||||
|
||||
self.tf_vectorizer = TfidfTransformer(
|
||||
norm=None, use_idf=False, smooth_idf=False, sublinear_tf=self.sublinear_tf).fit(X)
|
||||
|
||||
if len(y.shape) == 1:
|
||||
y = np.expand_dims(y, axis=1)
|
||||
|
||||
nD, nC = y.shape
|
||||
nF = len(self.tf_vectorizer.get_feature_names_out())
|
||||
|
||||
if self.supervised_4cell_matrix is None:
|
||||
self.supervised_4cell_matrix = get_supervised_matrix(X, y, n_jobs=self.n_jobs)
|
||||
else:
|
||||
if self.supervised_4cell_matrix.shape != (nC, nF): raise ValueError("Shape of supervised information matrix is inconsistent with X and y")
|
||||
tsr_matrix = get_tsr_matrix(self.supervised_4cell_matrix, self.tsr_function)
|
||||
if self.global_policy == 'ave':
|
||||
self.global_tsr_vector = np.average(tsr_matrix, axis=0)
|
||||
elif self.global_policy == 'wave':
|
||||
category_prevalences = [sum(y[:,c])*1.0/nD for c in range(nC)]
|
||||
self.global_tsr_vector = np.average(tsr_matrix, axis=0, weights=category_prevalences)
|
||||
elif self.global_policy == 'sum':
|
||||
self.global_tsr_vector = np.sum(tsr_matrix, axis=0)
|
||||
elif self.global_policy == 'max':
|
||||
self.global_tsr_vector = np.amax(tsr_matrix, axis=0)
|
||||
return self
|
||||
|
||||
def fit_transform(self, X, y):
|
||||
return self.fit(X,y).transform(X)
|
||||
|
||||
def transform(self, X):
|
||||
if not hasattr(self, 'global_tsr_vector'): raise NameError('TSRweighting: transform method called before fit.')
|
||||
X = self.count_vectorizer.transform(X)
|
||||
tf_X = self.tf_vectorizer.transform(X).toarray()
|
||||
weighted_X = np.multiply(tf_X, self.global_tsr_vector)
|
||||
if self.norm is not None and self.norm!='none':
|
||||
weighted_X = sklearn.preprocessing.normalize(weighted_X, norm=self.norm, axis=1, copy=False)
|
||||
return csr_matrix(weighted_X)
|
||||
|
|
@ -0,0 +1,148 @@
|
|||
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import quapy as qp
|
||||
from data import LabelledCollection
|
||||
import numpy as np
|
||||
|
||||
from laboratory.custom_vectorizers import *
|
||||
from protocol import APP
|
||||
from quapy.method.aggregative import _get_divergence, HDy, DistributionMatching
|
||||
from quapy.method.base import BaseQuantifier
|
||||
from scipy import optimize
|
||||
import pandas as pd
|
||||
|
||||
|
||||
# TODO: explore the bernoulli (term presence/absence) variant
|
||||
# TODO: explore the multinomial (term frequency) variant
|
||||
# TODO: explore the multinomial + length normalization variant
|
||||
# TODO: consolidate the TSR-variant (e.g., using information gain) variant;
|
||||
# - works better with the idf?
|
||||
# - works better with length normalization?
|
||||
# - etc
|
||||
|
||||
class DxS(BaseQuantifier):
|
||||
def __init__(self, vectorizer=None, divergence='topsoe'):
|
||||
self.vectorizer = vectorizer
|
||||
self.divergence = divergence
|
||||
|
||||
# def __as_distribution(self, instances):
|
||||
# return np.asarray(instances.sum(axis=0) / instances.sum()).flatten()
|
||||
|
||||
def __as_distribution(self, instances):
|
||||
dist = instances.sum(axis=0) / instances.sum()
|
||||
return np.asarray(dist).flatten()
|
||||
|
||||
def fit(self, data: LabelledCollection):
|
||||
|
||||
text_instances, labels = data.Xy
|
||||
|
||||
if self.vectorizer is not None:
|
||||
text_instances = self.vectorizer.fit_transform(text_instances, y=labels)
|
||||
|
||||
distributions = []
|
||||
for class_i in data.classes_:
|
||||
distributions.append(self.__as_distribution(text_instances[labels == class_i]))
|
||||
self.validation_distribution = np.asarray(distributions)
|
||||
return self
|
||||
|
||||
def quantify(self, text_instances):
|
||||
if self.vectorizer is not None:
|
||||
text_instances = self.vectorizer.transform(text_instances)
|
||||
|
||||
test_distribution = self.__as_distribution(text_instances)
|
||||
divergence = _get_divergence(self.divergence)
|
||||
n_classes, n_feats = self.validation_distribution.shape
|
||||
|
||||
def match(prev):
|
||||
prev = np.expand_dims(prev, axis=0)
|
||||
mixture_distribution = (prev @ self.validation_distribution).flatten()
|
||||
return divergence(test_distribution, mixture_distribution)
|
||||
|
||||
# the initial point is set as the uniform distribution
|
||||
uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
|
||||
|
||||
# solutions are bounded to those contained in the unit-simplex
|
||||
bounds = tuple((0, 1) for x in range(n_classes)) # values in [0,1]
|
||||
constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # values summing up to 1
|
||||
r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
|
||||
return r.x
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = 250
|
||||
qp.environ['N_JOBS'] = -1
|
||||
min_df = 10
|
||||
# dataset = 'imdb'
|
||||
repeats = 10
|
||||
error = 'mae'
|
||||
|
||||
div = 'HD'
|
||||
|
||||
# generates tuples (dataset, method, method_name)
|
||||
# (the dataset is needed for methods that process the dataset differently)
|
||||
def gen_methods():
|
||||
|
||||
for dataset in qp.datasets.REVIEWS_SENTIMENT_DATASETS:
|
||||
|
||||
data = qp.datasets.fetch_reviews(dataset, tfidf=False)
|
||||
|
||||
bernoulli_vectorizer = CountVectorizer(min_df=min_df, binary=True)
|
||||
dxs = DxS(divergence=div, vectorizer=bernoulli_vectorizer)
|
||||
yield data, dxs, 'DxS-Bernoulli'
|
||||
|
||||
multinomial_vectorizer = CountVectorizer(min_df=min_df, binary=False)
|
||||
dxs = DxS(divergence=div, vectorizer=multinomial_vectorizer)
|
||||
yield data, dxs, 'DxS-multinomial'
|
||||
|
||||
tf_vectorizer = TfidfVectorizer(sublinear_tf=False, use_idf=False, min_df=min_df, norm=None)
|
||||
dxs = DxS(divergence=div, vectorizer=tf_vectorizer)
|
||||
yield data, dxs, 'DxS-TF'
|
||||
|
||||
logtf_vectorizer = TfidfVectorizer(sublinear_tf=True, use_idf=False, min_df=min_df, norm=None)
|
||||
dxs = DxS(divergence=div, vectorizer=logtf_vectorizer)
|
||||
yield data, dxs, 'DxS-logTF'
|
||||
|
||||
tfidf_vectorizer = TfidfVectorizer(use_idf=True, min_df=min_df, norm=None)
|
||||
dxs = DxS(divergence=div, vectorizer=tfidf_vectorizer)
|
||||
yield data, dxs, 'DxS-TFIDF'
|
||||
|
||||
tfidf_vectorizer = TfidfVectorizer(use_idf=True, min_df=min_df, norm='l2')
|
||||
dxs = DxS(divergence=div, vectorizer=tfidf_vectorizer)
|
||||
yield data, dxs, 'DxS-TFIDF-l2'
|
||||
|
||||
tsr_vectorizer = TSRweighting(tsr_function=information_gain, min_df=min_df, norm='l2')
|
||||
dxs = DxS(divergence=div, vectorizer=tsr_vectorizer)
|
||||
yield data, dxs, 'DxS-TFTSR-l2'
|
||||
|
||||
data = qp.datasets.fetch_reviews(dataset, tfidf=True, min_df=min_df)
|
||||
hdy = HDy(LogisticRegression())
|
||||
yield data, hdy, 'HDy'
|
||||
|
||||
dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=5)
|
||||
yield data, dm, 'DM-5b'
|
||||
|
||||
dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=10)
|
||||
yield data, dm, 'DM-10b'
|
||||
|
||||
|
||||
result_path = 'results.csv'
|
||||
with open(result_path, 'wt') as csv:
|
||||
csv.write(f'Method\tDataset\tMAE\tMRAE\n')
|
||||
for data, quantifier, quant_name in gen_methods():
|
||||
quantifier.fit(data.training)
|
||||
report = qp.evaluation.evaluation_report(quantifier, APP(data.test, repeats=repeats), error_metrics=['mae','mrae'], verbose=True)
|
||||
means = report.mean()
|
||||
csv.write(f'{quant_name}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
|
||||
|
||||
df = pd.read_csv(result_path, sep='\t')
|
||||
# print(df)
|
||||
|
||||
pv = df.pivot_table(index='Method', columns="Dataset", values=["MAE", "MRAE"])
|
||||
print(pv)
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,168 @@
|
|||
from typing import Union, Callable
|
||||
import numpy as np
|
||||
from sklearn.base import BaseEstimator
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
import pandas as pd
|
||||
from sklearn.neighbors import KernelDensity
|
||||
|
||||
import quapy as qp
|
||||
from data import LabelledCollection
|
||||
from protocol import APP, UPP
|
||||
from quapy.method.aggregative import AggregativeProbabilisticQuantifier, _training_helper, cross_generate_predictions, \
|
||||
DistributionMatching, _get_divergence
|
||||
import scipy
|
||||
from scipy import optimize
|
||||
|
||||
|
||||
class KDEy(AggregativeProbabilisticQuantifier):
|
||||
|
||||
BANDWIDTH_METHOD = ['auto', 'scott', 'silverman']
|
||||
ENGINE = ['scipy', 'sklearn']
|
||||
|
||||
def __init__(self, classifier: BaseEstimator, val_split=0.4, divergence: Union[str, Callable]='HD',
|
||||
bandwidth_method='scott', engine='sklearn', n_jobs=None):
|
||||
self.classifier = classifier
|
||||
self.val_split = val_split
|
||||
self.divergence = divergence
|
||||
self.bandwidth_method = bandwidth_method
|
||||
self.engine = engine
|
||||
self.n_jobs = n_jobs
|
||||
assert bandwidth_method in KDEy.BANDWIDTH_METHOD, f'unknown bandwidth_method, valid ones are {KDEy.BANDWIDTH_METHOD}'
|
||||
assert engine in KDEy.ENGINE, f'unknown engine, valid ones are {KDEy.ENGINE}'
|
||||
|
||||
def get_kde(self, posteriors):
|
||||
if self.engine == 'scipy':
|
||||
# scipy treats columns as datapoints, and need the datapoints not to lie in a lower-dimensional subspace, which
|
||||
# requires removing the last dimension which is constrained
|
||||
posteriors = posteriors[:,:-1].T
|
||||
kde = scipy.stats.gaussian_kde(posteriors)
|
||||
kde.set_bandwidth(self.bandwidth_method)
|
||||
elif self.engine == 'sklearn':
|
||||
kde = KernelDensity(bandwidth=self.bandwidth_method).fit(posteriors)
|
||||
return kde
|
||||
|
||||
def pdf(self, kde, posteriors):
|
||||
if self.engine == 'scipy':
|
||||
return kde(posteriors[:,:-1].T)
|
||||
elif self.engine == 'sklearn':
|
||||
return np.exp(kde.score_samples(posteriors))
|
||||
|
||||
def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
|
||||
"""
|
||||
Trains the classifier (if requested) and generates the validation distributions out of the training data.
|
||||
The validation distributions have shape `(n, ch, nbins)`, with `n` the number of classes, `ch` the number of
|
||||
channels (a channel is a description, in form of a histogram, of a specific class -- there are as many channels
|
||||
as classes, although in the binary case one can use only one channel, since the other one is constrained),
|
||||
and `nbins` the number of bins. In particular, let `V` be the validation distributions; `di=V[i]`
|
||||
are the distributions obtained from training data labelled with class `i`; `dij = di[j]` is the discrete
|
||||
distribution of posterior probabilities `P(Y=j|X=x)` for training data labelled with class `i`, and `dij[k]`
|
||||
is the fraction of instances with a value in the `k`-th bin.
|
||||
|
||||
:param data: the training set
|
||||
:param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)
|
||||
:param val_split: either a float in (0,1) indicating the proportion of training instances to use for
|
||||
validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection
|
||||
indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV
|
||||
to estimate the parameters
|
||||
"""
|
||||
if val_split is None:
|
||||
val_split = self.val_split
|
||||
|
||||
self.classifier, y, posteriors, classes, class_count = cross_generate_predictions(
|
||||
data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
|
||||
)
|
||||
|
||||
self.val_densities = [self.get_kde(posteriors[y == cat]) for cat in range(data.n_classes)]
|
||||
self.val_posteriors = posteriors
|
||||
|
||||
return self
|
||||
|
||||
def val_pdf(self, prev):
|
||||
"""
|
||||
Returns a function that computes the mixture model with the given prev as mixture factor
|
||||
:param prev: a prevalence vector, ndarray
|
||||
:return: a function implementing the validation distribution with fixed mixture factor
|
||||
"""
|
||||
return lambda posteriors: sum(prev_i * self.pdf(kde_i, posteriors) for kde_i, prev_i in zip(self.val_densities, prev))
|
||||
|
||||
|
||||
def aggregate(self, posteriors: np.ndarray):
|
||||
"""
|
||||
Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution
|
||||
(the mixture) that best matches the test distribution, in terms of the divergence measure of choice.
|
||||
In the multiclass case, with `n` the number of classes, the test and mixture distributions contain
|
||||
`n` channels (proper distributions of binned posterior probabilities), on which the divergence is computed
|
||||
independently. The matching is computed as an average of the divergence across all channels.
|
||||
|
||||
:param instances: instances in the sample
|
||||
:return: a vector of class prevalence estimates
|
||||
"""
|
||||
test_density = self.get_kde(posteriors)
|
||||
# val_test_posteriors = np.concatenate([self.val_posteriors, posteriors])
|
||||
test_likelihood = self.pdf(test_density, posteriors)
|
||||
divergence = _get_divergence(self.divergence)
|
||||
|
||||
|
||||
n_classes = len(self.val_densities)
|
||||
|
||||
def match(prev):
|
||||
val_pdf = self.val_pdf(prev)
|
||||
val_likelihood = val_pdf(posteriors)
|
||||
return divergence(val_likelihood, test_likelihood)
|
||||
|
||||
# the initial point is set as the uniform distribution
|
||||
uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
|
||||
|
||||
# solutions are bounded to those contained in the unit-simplex
|
||||
bounds = tuple((0, 1) for _ in range(n_classes)) # values in [0,1]
|
||||
constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # values summing up to 1
|
||||
r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
|
||||
return r.x
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = 100
|
||||
qp.environ['N_JOBS'] = -1
|
||||
div = 'HD'
|
||||
|
||||
# generates tuples (dataset, method, method_name)
|
||||
# (the dataset is needed for methods that process the dataset differently)
|
||||
def gen_methods():
|
||||
|
||||
for dataset in qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST:
|
||||
|
||||
data = qp.datasets.fetch_twitter(dataset, min_df=3, pickle=True)
|
||||
|
||||
# kdey = KDEy(LogisticRegression(), divergence=div, bandwidth_method='scott')
|
||||
# yield data, kdey, f'KDEy-{div}-scott'
|
||||
|
||||
kdey = KDEy(LogisticRegression(), divergence=div, bandwidth_method='silverman', engine='sklearn')
|
||||
yield data, kdey, f'KDEy-{div}-silverman'
|
||||
|
||||
dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=5)
|
||||
yield data, dm, f'DM-5b-{div}'
|
||||
|
||||
# dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=10)
|
||||
# yield data, dm, f'DM-10b-{div}'
|
||||
|
||||
|
||||
result_path = 'results_kdey.csv'
|
||||
with open(result_path, 'wt') as csv:
|
||||
csv.write(f'Method\tDataset\tMAE\tMRAE\n')
|
||||
for data, quantifier, quant_name in gen_methods():
|
||||
quantifier.fit(data.training)
|
||||
protocol = UPP(data.test, repeats=100)
|
||||
report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae','mrae'], verbose=True)
|
||||
means = report.mean()
|
||||
csv.write(f'{quant_name}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
|
||||
csv.flush()
|
||||
|
||||
df = pd.read_csv(result_path, sep='\t')
|
||||
# print(df)
|
||||
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.max_rows', None)
|
||||
pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE", "MRAE"])
|
||||
print(pv)
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
"""QuaPy module for quantification"""
|
||||
from quapy.data import datasets
|
||||
from . import error
|
||||
from . import data
|
||||
from quapy.data import datasets
|
||||
from . import functional
|
||||
# from . import method
|
||||
from . import evaluation
|
||||
|
|
@ -25,7 +26,8 @@ environ = {
|
|||
|
||||
def _get_njobs(n_jobs):
|
||||
"""
|
||||
If `n_jobs` is None, then it returns `environ['N_JOBS']`; if otherwise, returns `n_jobs`.
|
||||
If `n_jobs` is None, then it returns `environ['N_JOBS']`;
|
||||
if otherwise, returns `n_jobs`.
|
||||
|
||||
:param n_jobs: the number of `n_jobs` or None if not specified
|
||||
:return: int
|
||||
|
|
@ -35,7 +37,8 @@ def _get_njobs(n_jobs):
|
|||
|
||||
def _get_sample_size(sample_size):
|
||||
"""
|
||||
If `sample_size` is None, then it returns `environ['SAMPLE_SIZE']`; if otherwise, returns `sample_size`.
|
||||
If `sample_size` is None, then it returns `environ['SAMPLE_SIZE']`;
|
||||
if otherwise, returns `sample_size`.
|
||||
If none of these are set, then a ValueError exception is raised.
|
||||
|
||||
:param sample_size: integer or None
|
||||
|
|
@ -45,6 +48,3 @@ def _get_sample_size(sample_size):
|
|||
if sample_size is None:
|
||||
raise ValueError('neither sample_size nor qp.environ["SAMPLE_SIZE"] have been specified')
|
||||
return sample_size
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ class LowRankLogisticRegression(BaseEstimator):
|
|||
|
||||
def __init__(self, n_components=100, **kwargs):
|
||||
self.n_components = n_components
|
||||
self.learner = LogisticRegression(**kwargs)
|
||||
self.classifier = LogisticRegression(**kwargs)
|
||||
|
||||
def get_params(self):
|
||||
"""
|
||||
|
|
@ -28,7 +28,7 @@ class LowRankLogisticRegression(BaseEstimator):
|
|||
:return: a dictionary with parameter names mapped to their values
|
||||
"""
|
||||
params = {'n_components': self.n_components}
|
||||
params.update(self.learner.get_params())
|
||||
params.update(self.classifier.get_params())
|
||||
return params
|
||||
|
||||
def set_params(self, **params):
|
||||
|
|
@ -43,7 +43,7 @@ class LowRankLogisticRegression(BaseEstimator):
|
|||
if 'n_components' in params_:
|
||||
self.n_components = params_['n_components']
|
||||
del params_['n_components']
|
||||
self.learner.set_params(**params_)
|
||||
self.classifier.set_params(**params_)
|
||||
|
||||
def fit(self, X, y):
|
||||
"""
|
||||
|
|
@ -59,8 +59,8 @@ class LowRankLogisticRegression(BaseEstimator):
|
|||
if nF > self.n_components:
|
||||
self.pca = TruncatedSVD(self.n_components).fit(X)
|
||||
X = self.transform(X)
|
||||
self.learner.fit(X, y)
|
||||
self.classes_ = self.learner.classes_
|
||||
self.classifier.fit(X, y)
|
||||
self.classes_ = self.classifier.classes_
|
||||
return self
|
||||
|
||||
def predict(self, X):
|
||||
|
|
@ -72,7 +72,7 @@ class LowRankLogisticRegression(BaseEstimator):
|
|||
instances in `X`
|
||||
"""
|
||||
X = self.transform(X)
|
||||
return self.learner.predict(X)
|
||||
return self.classifier.predict(X)
|
||||
|
||||
def predict_proba(self, X):
|
||||
"""
|
||||
|
|
@ -82,7 +82,7 @@ class LowRankLogisticRegression(BaseEstimator):
|
|||
:return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities
|
||||
"""
|
||||
X = self.transform(X)
|
||||
return self.learner.predict_proba(X)
|
||||
return self.classifier.predict_proba(X)
|
||||
|
||||
def transform(self, X):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -207,7 +207,7 @@ def fetch_UCIDataset(dataset_name, data_home=None, test_split=0.3, verbose=False
|
|||
return Dataset(*data.split_stratified(1 - test_split, random_state=0))
|
||||
|
||||
|
||||
def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) -> Dataset:
|
||||
def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) -> LabelledCollection:
|
||||
"""
|
||||
Loads a UCI collection as an instance of :class:`quapy.data.base.LabelledCollection`, as used in
|
||||
`Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017).
|
||||
|
|
@ -223,7 +223,7 @@ def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) ->
|
|||
|
||||
>>> import quapy as qp
|
||||
>>> collection = qp.datasets.fetch_UCILabelledCollection("yeast")
|
||||
>>> for data in qp.data.Dataset.kFCV(collection, nfolds=5, nrepeats=2):
|
||||
>>> for data in qp.domains.Dataset.kFCV(collection, nfolds=5, nrepeats=2):
|
||||
>>> ...
|
||||
|
||||
The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS`
|
||||
|
|
@ -233,7 +233,7 @@ def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) ->
|
|||
~/quay_data/ directory)
|
||||
:param test_split: proportion of documents to be included in the test set. The rest conforms the training set
|
||||
:param verbose: set to True (default is False) to get information (from the UCI ML repository) about the datasets
|
||||
:return: a :class:`quapy.data.base.Dataset` instance
|
||||
:return: a :class:`quapy.data.base.LabelledCollection` instance
|
||||
"""
|
||||
|
||||
assert dataset_name in UCI_DATASETS, \
|
||||
|
|
|
|||
237
quapy/error.py
237
quapy/error.py
|
|
@ -1,10 +1,13 @@
|
|||
import quapy as qp
|
||||
"""Implementation of error measures used for quantification"""
|
||||
|
||||
import numpy as np
|
||||
from sklearn.metrics import f1_score
|
||||
import quapy as qp
|
||||
|
||||
|
||||
def from_name(err_name):
|
||||
"""Gets an error function from its name. E.g., `from_name("mae")` will return function :meth:`quapy.error.mae`
|
||||
"""Gets an error function from its name. E.g., `from_name("mae")`
|
||||
will return function :meth:`quapy.error.mae`
|
||||
|
||||
:param err_name: string, the error name
|
||||
:return: a callable implementing the requested error
|
||||
|
|
@ -15,10 +18,12 @@ def from_name(err_name):
|
|||
|
||||
|
||||
def f1e(y_true, y_pred):
|
||||
"""F1 error: simply computes the error in terms of macro :math:`F_1`, i.e., :math:`1-F_1^M`,
|
||||
where :math:`F_1` is the harmonic mean of precision and recall, defined as :math:`\\frac{2tp}{2tp+fp+fn}`,
|
||||
with `tp`, `fp`, and `fn` standing for true positives, false positives, and false negatives, respectively.
|
||||
`Macro` averaging means the :math:`F_1` is computed for each category independently, and then averaged.
|
||||
"""F1 error: simply computes the error in terms of macro :math:`F_1`, i.e.,
|
||||
:math:`1-F_1^M`, where :math:`F_1` is the harmonic mean of precision and recall,
|
||||
defined as :math:`\\frac{2tp}{2tp+fp+fn}`, with `tp`, `fp`, and `fn` standing
|
||||
for true positives, false positives, and false negatives, respectively.
|
||||
`Macro` averaging means the :math:`F_1` is computed for each category independently,
|
||||
and then averaged.
|
||||
|
||||
:param y_true: array-like of true labels
|
||||
:param y_pred: array-like of predicted labels
|
||||
|
|
@ -28,8 +33,9 @@ def f1e(y_true, y_pred):
|
|||
|
||||
|
||||
def acce(y_true, y_pred):
|
||||
"""Computes the error in terms of 1-accuracy. The accuracy is computed as :math:`\\frac{tp+tn}{tp+fp+fn+tn}`, with
|
||||
`tp`, `fp`, `fn`, and `tn` standing for true positives, false positives, false negatives, and true negatives,
|
||||
"""Computes the error in terms of 1-accuracy. The accuracy is computed as
|
||||
:math:`\\frac{tp+tn}{tp+fp+fn+tn}`, with `tp`, `fp`, `fn`, and `tn` standing
|
||||
for true positives, false positives, false negatives, and true negatives,
|
||||
respectively
|
||||
|
||||
:param y_true: array-like of true labels
|
||||
|
|
@ -43,7 +49,8 @@ def mae(prevs, prevs_hat):
|
|||
"""Computes the mean absolute error (see :meth:`quapy.error.ae`) across the sample pairs.
|
||||
|
||||
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
|
||||
prevalence values
|
||||
:return: mean absolute error
|
||||
"""
|
||||
return ae(prevs, prevs_hat).mean()
|
||||
|
|
@ -52,7 +59,7 @@ def mae(prevs, prevs_hat):
|
|||
def ae(prevs, prevs_hat):
|
||||
"""Computes the absolute error between the two prevalence vectors.
|
||||
Absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as
|
||||
:math:`AE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\in \mathcal{Y}}|\\hat{p}(y)-p(y)|`,
|
||||
:math:`AE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}|\\hat{p}(y)-p(y)|`,
|
||||
where :math:`\\mathcal{Y}` are the classes of interest.
|
||||
|
||||
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
|
||||
|
|
@ -66,129 +73,153 @@ def ae(prevs, prevs_hat):
|
|||
def mse(prevs, prevs_hat):
|
||||
"""Computes the mean squared error (see :meth:`quapy.error.se`) across the sample pairs.
|
||||
|
||||
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
|
||||
:param prevs: array-like of shape `(n_samples, n_classes,)` with the
|
||||
true prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the
|
||||
predicted prevalence values
|
||||
:return: mean squared error
|
||||
"""
|
||||
return se(prevs, prevs_hat).mean()
|
||||
|
||||
|
||||
def se(p, p_hat):
|
||||
def se(prevs, prevs_hat):
|
||||
"""Computes the squared error between the two prevalence vectors.
|
||||
Squared error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as
|
||||
:math:`SE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\in \mathcal{Y}}(\\hat{p}(y)-p(y))^2`, where
|
||||
:math:`SE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}(\\hat{p}(y)-p(y))^2`,
|
||||
where
|
||||
:math:`\\mathcal{Y}` are the classes of interest.
|
||||
|
||||
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
|
||||
:return: absolute error
|
||||
"""
|
||||
return ((p_hat-p)**2).mean(axis=-1)
|
||||
return ((prevs_hat - prevs) ** 2).mean(axis=-1)
|
||||
|
||||
|
||||
def mkld(prevs, prevs_hat, eps=None):
|
||||
"""Computes the mean Kullback-Leibler divergence (see :meth:`quapy.error.kld`) across the sample pairs.
|
||||
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
|
||||
"""Computes the mean Kullback-Leibler divergence (see :meth:`quapy.error.kld`) across the
|
||||
sample pairs. The distributions are smoothed using the `eps` factor
|
||||
(see :meth:`quapy.error.smooth`).
|
||||
|
||||
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
|
||||
:param eps: smoothing factor. KLD is not defined in cases in which the distributions contain zeros; `eps`
|
||||
is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
|
||||
will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
|
||||
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true
|
||||
prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
|
||||
prevalence values
|
||||
:param eps: smoothing factor. KLD is not defined in cases in which the distributions contain
|
||||
zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.
|
||||
If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`
|
||||
(which has thus to be set beforehand).
|
||||
:return: mean Kullback-Leibler distribution
|
||||
"""
|
||||
return kld(prevs, prevs_hat, eps).mean()
|
||||
|
||||
|
||||
def kld(p, p_hat, eps=None):
|
||||
def kld(prevs, prevs_hat, eps=None):
|
||||
"""Computes the Kullback-Leibler divergence between the two prevalence distributions.
|
||||
Kullback-Leibler divergence between two prevalence distributions :math:`p` and :math:`\\hat{p}` is computed as
|
||||
:math:`KLD(p,\\hat{p})=D_{KL}(p||\\hat{p})=\\sum_{y\\in \\mathcal{Y}} p(y)\\log\\frac{p(y)}{\\hat{p}(y)}`, where
|
||||
:math:`\\mathcal{Y}` are the classes of interest.
|
||||
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
|
||||
|
||||
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
|
||||
:param eps: smoothing factor. KLD is not defined in cases in which the distributions contain zeros; `eps`
|
||||
is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
|
||||
will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
|
||||
:return: Kullback-Leibler divergence between the two distributions
|
||||
"""
|
||||
eps = __check_eps(eps)
|
||||
sp = p+eps
|
||||
sp_hat = p_hat + eps
|
||||
return (sp*np.log(sp/sp_hat)).sum(axis=-1)
|
||||
|
||||
|
||||
def mnkld(prevs, prevs_hat, eps=None):
|
||||
"""Computes the mean Normalized Kullback-Leibler divergence (see :meth:`quapy.error.nkld`) across the sample pairs.
|
||||
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
|
||||
|
||||
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
|
||||
:param eps: smoothing factor. NKLD is not defined in cases in which the distributions contain zeros; `eps`
|
||||
is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
|
||||
will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
|
||||
:return: mean Normalized Kullback-Leibler distribution
|
||||
"""
|
||||
return nkld(prevs, prevs_hat, eps).mean()
|
||||
|
||||
|
||||
def nkld(p, p_hat, eps=None):
|
||||
"""Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.
|
||||
Normalized Kullback-Leibler divergence between two prevalence distributions :math:`p` and :math:`\\hat{p}`
|
||||
is computed as :math:`NKLD(p,\\hat{p}) = 2\\frac{e^{KLD(p,\\hat{p})}}{e^{KLD(p,\\hat{p})}+1}-1`, where
|
||||
:math:`\\mathcal{Y}` are the classes of interest.
|
||||
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
|
||||
|
||||
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
|
||||
:param eps: smoothing factor. NKLD is not defined in cases in which the distributions contain zeros; `eps`
|
||||
is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
|
||||
will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
|
||||
:return: Normalized Kullback-Leibler divergence between the two distributions
|
||||
"""
|
||||
ekld = np.exp(kld(p, p_hat, eps))
|
||||
return 2. * ekld / (1 + ekld) - 1.
|
||||
|
||||
|
||||
def mrae(p, p_hat, eps=None):
|
||||
"""Computes the mean relative absolute error (see :meth:`quapy.error.rae`) across the sample pairs.
|
||||
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
|
||||
|
||||
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
|
||||
:param eps: smoothing factor. `mrae` is not defined in cases in which the true distribution contains zeros; `eps`
|
||||
is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
|
||||
will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
|
||||
:return: mean relative absolute error
|
||||
"""
|
||||
return rae(p, p_hat, eps).mean()
|
||||
|
||||
|
||||
def rae(p, p_hat, eps=None):
|
||||
"""Computes the absolute relative error between the two prevalence vectors.
|
||||
Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as
|
||||
:math:`RAE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\in \mathcal{Y}}\\frac{|\\hat{p}(y)-p(y)|}{p(y)}`,
|
||||
Kullback-Leibler divergence between two prevalence distributions :math:`p` and :math:`\\hat{p}`
|
||||
is computed as
|
||||
:math:`KLD(p,\\hat{p})=D_{KL}(p||\\hat{p})=
|
||||
\\sum_{y\\in \\mathcal{Y}} p(y)\\log\\frac{p(y)}{\\hat{p}(y)}`,
|
||||
where :math:`\\mathcal{Y}` are the classes of interest.
|
||||
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
|
||||
|
||||
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
|
||||
:param eps: smoothing factor. `rae` is not defined in cases in which the true distribution contains zeros; `eps`
|
||||
is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
|
||||
will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
|
||||
:param eps: smoothing factor. KLD is not defined in cases in which the distributions contain
|
||||
zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.
|
||||
If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`
|
||||
(which has thus to be set beforehand).
|
||||
:return: Kullback-Leibler divergence between the two distributions
|
||||
"""
|
||||
eps = __check_eps(eps)
|
||||
smooth_prevs = prevs + eps
|
||||
smooth_prevs_hat = prevs_hat + eps
|
||||
return (smooth_prevs*np.log(smooth_prevs/smooth_prevs_hat)).sum(axis=-1)
|
||||
|
||||
|
||||
def mnkld(prevs, prevs_hat, eps=None):
|
||||
"""Computes the mean Normalized Kullback-Leibler divergence (see :meth:`quapy.error.nkld`)
|
||||
across the sample pairs. The distributions are smoothed using the `eps` factor
|
||||
(see :meth:`quapy.error.smooth`).
|
||||
|
||||
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
|
||||
prevalence values
|
||||
:param eps: smoothing factor. NKLD is not defined in cases in which the distributions contain
|
||||
zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.
|
||||
If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`
|
||||
(which has thus to be set beforehand).
|
||||
:return: mean Normalized Kullback-Leibler distribution
|
||||
"""
|
||||
return nkld(prevs, prevs_hat, eps).mean()
|
||||
|
||||
|
||||
def nkld(prevs, prevs_hat, eps=None):
|
||||
"""Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.
|
||||
Normalized Kullback-Leibler divergence between two prevalence distributions :math:`p` and
|
||||
:math:`\\hat{p}` is computed as
|
||||
math:`NKLD(p,\\hat{p}) = 2\\frac{e^{KLD(p,\\hat{p})}}{e^{KLD(p,\\hat{p})}+1}-1`,
|
||||
where
|
||||
:math:`\\mathcal{Y}` are the classes of interest.
|
||||
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
|
||||
|
||||
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
|
||||
:param eps: smoothing factor. NKLD is not defined in cases in which the distributions
|
||||
contain zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample
|
||||
size. If `eps=None`, the sample size will be taken from the environment variable
|
||||
`SAMPLE_SIZE` (which has thus to be set beforehand).
|
||||
:return: Normalized Kullback-Leibler divergence between the two distributions
|
||||
"""
|
||||
ekld = np.exp(kld(prevs, prevs_hat, eps))
|
||||
return 2. * ekld / (1 + ekld) - 1.
|
||||
|
||||
|
||||
def mrae(prevs, prevs_hat, eps=None):
|
||||
"""Computes the mean relative absolute error (see :meth:`quapy.error.rae`) across
|
||||
the sample pairs. The distributions are smoothed using the `eps` factor (see
|
||||
:meth:`quapy.error.smooth`).
|
||||
|
||||
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true
|
||||
prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
|
||||
prevalence values
|
||||
:param eps: smoothing factor. `mrae` is not defined in cases in which the true
|
||||
distribution contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`,
|
||||
with :math:`T` the sample size. If `eps=None`, the sample size will be taken from
|
||||
the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
|
||||
:return: mean relative absolute error
|
||||
"""
|
||||
return rae(prevs, prevs_hat, eps).mean()
|
||||
|
||||
|
||||
def rae(prevs, prevs_hat, eps=None):
|
||||
"""Computes the absolute relative error between the two prevalence vectors.
|
||||
Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}`
|
||||
is computed as
|
||||
:math:`RAE(p,\\hat{p})=
|
||||
\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}\\frac{|\\hat{p}(y)-p(y)|}{p(y)}`,
|
||||
where :math:`\\mathcal{Y}` are the classes of interest.
|
||||
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
|
||||
|
||||
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
|
||||
:param eps: smoothing factor. `rae` is not defined in cases in which the true distribution
|
||||
contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the
|
||||
sample size. If `eps=None`, the sample size will be taken from the environment variable
|
||||
`SAMPLE_SIZE` (which has thus to be set beforehand).
|
||||
:return: relative absolute error
|
||||
"""
|
||||
eps = __check_eps(eps)
|
||||
p = smooth(p, eps)
|
||||
p_hat = smooth(p_hat, eps)
|
||||
return (abs(p-p_hat)/p).mean(axis=-1)
|
||||
prevs = smooth(prevs, eps)
|
||||
prevs_hat = smooth(prevs_hat, eps)
|
||||
return (abs(prevs - prevs_hat) / prevs).mean(axis=-1)
|
||||
|
||||
|
||||
def smooth(prevs, eps):
|
||||
""" Smooths a prevalence distribution with :math:`\epsilon` (`eps`) as:
|
||||
:math:`\\underline{p}(y)=\\frac{\\epsilon+p(y)}{\\epsilon|\\mathcal{Y}|+\\displaystyle\\sum_{y\\in \\mathcal{Y}}p(y)}`
|
||||
""" Smooths a prevalence distribution with :math:`\\epsilon` (`eps`) as:
|
||||
:math:`\\underline{p}(y)=\\frac{\\epsilon+p(y)}{\\epsilon|\\mathcal{Y}|+
|
||||
\\displaystyle\\sum_{y\\in \\mathcal{Y}}p(y)}`
|
||||
|
||||
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
|
||||
:param eps: smoothing factor
|
||||
|
|
@ -200,12 +231,10 @@ def smooth(prevs, eps):
|
|||
|
||||
def __check_eps(eps=None):
|
||||
if eps is None:
|
||||
import quapy as qp
|
||||
sample_size = qp.environ['SAMPLE_SIZE']
|
||||
if sample_size is None:
|
||||
raise ValueError('eps was not defined, and qp.environ["SAMPLE_SIZE"] was not set')
|
||||
else:
|
||||
eps = 1. / (2. * sample_size)
|
||||
eps = 1. / (2. * sample_size)
|
||||
return eps
|
||||
|
||||
|
||||
|
|
@ -217,7 +246,8 @@ CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
|
|||
QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
|
||||
QUANTIFICATION_ERROR_SINGLE_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR_SINGLE}
|
||||
QUANTIFICATION_ERROR_SMOOTH_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR_SMOOTH}
|
||||
ERROR_NAMES = CLASSIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_SINGLE_NAMES
|
||||
ERROR_NAMES = \
|
||||
CLASSIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_SINGLE_NAMES
|
||||
|
||||
f1_error = f1e
|
||||
acc_error = acce
|
||||
|
|
@ -225,4 +255,3 @@ mean_absolute_error = mae
|
|||
absolute_error = ae
|
||||
mean_relative_absolute_error = mrae
|
||||
relative_absolute_error = rae
|
||||
|
||||
|
|
|
|||
|
|
@ -444,24 +444,28 @@ class EMQ(AggregativeProbabilisticQuantifier):
|
|||
|
||||
def __init__(self, classifier: BaseEstimator, exact_train_prev=True, recalib=None):
|
||||
self.classifier = classifier
|
||||
self.non_calibrated = classifier
|
||||
self.exact_train_prev = exact_train_prev
|
||||
self.recalib = recalib
|
||||
|
||||
def fit(self, data: LabelledCollection, fit_classifier=True):
|
||||
if self.recalib is not None:
|
||||
if self.recalib == 'nbvs':
|
||||
self.classifier = NBVSCalibration(self.classifier)
|
||||
self.classifier = NBVSCalibration(self.non_calibrated)
|
||||
elif self.recalib == 'bcts':
|
||||
self.classifier = BCTSCalibration(self.classifier)
|
||||
self.classifier = BCTSCalibration(self.non_calibrated)
|
||||
elif self.recalib == 'ts':
|
||||
self.classifier = TSCalibration(self.classifier)
|
||||
self.classifier = TSCalibration(self.non_calibrated)
|
||||
elif self.recalib == 'vs':
|
||||
self.classifier = VSCalibration(self.classifier)
|
||||
self.classifier = VSCalibration(self.non_calibrated)
|
||||
elif self.recalib == 'platt':
|
||||
self.classifier = CalibratedClassifierCV(self.classifier, ensemble=False)
|
||||
else:
|
||||
raise ValueError('invalid param argument for recalibration method; available ones are '
|
||||
'"nbvs", "bcts", "ts", and "vs".')
|
||||
self.recalib = None
|
||||
else:
|
||||
self.classifier = self.non_calibrated
|
||||
self.classifier, _ = _training_helper(self.classifier, data, fit_classifier, ensure_probabilistic=True)
|
||||
if self.exact_train_prev:
|
||||
self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_)
|
||||
|
|
@ -766,7 +770,9 @@ class DistributionMatching(AggregativeProbabilisticQuantifier):
|
|||
"""
|
||||
Trains the classifier (if requested) and generates the validation distributions out of the training data.
|
||||
The validation distributions have shape `(n, ch, nbins)`, with `n` the number of classes, `ch` the number of
|
||||
channels, and `nbins` the number of bins. In particular, let `V` be the validation distributions; `di=V[i]`
|
||||
channels (a channel is a description, in form of a histogram, of a specific class -- there are as many channels
|
||||
as classes, although in the binary case one can use only one channel, since the other one is constrained),
|
||||
and `nbins` the number of bins. In particular, let `V` be the validation distributions; `di=V[i]`
|
||||
are the distributions obtained from training data labelled with class `i`; `dij = di[j]` is the discrete
|
||||
distribution of posterior probabilities `P(Y=j|X=x)` for training data labelled with class `i`, and `dij[k]`
|
||||
is the fraction of instances with a value in the `k`-th bin.
|
||||
|
|
@ -815,7 +821,7 @@ class DistributionMatching(AggregativeProbabilisticQuantifier):
|
|||
uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
|
||||
|
||||
# solutions are bounded to those contained in the unit-simplex
|
||||
bounds = tuple((0, 1) for x in range(n_classes)) # values in [0,1]
|
||||
bounds = tuple((0, 1) for _ in range(n_classes)) # values in [0,1]
|
||||
constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # values summing up to 1
|
||||
r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
|
||||
return r.x
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from torch.nn.functional import relu
|
|||
from quapy.protocol import UPP
|
||||
from quapy.method.aggregative import *
|
||||
from quapy.util import EarlyStop
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class QuaNetTrainer(BaseQuantifier):
|
||||
|
|
@ -28,7 +29,7 @@ class QuaNetTrainer(BaseQuantifier):
|
|||
>>>
|
||||
>>> # load the kindle dataset as text, and convert words to numerical indexes
|
||||
>>> dataset = qp.datasets.fetch_reviews('kindle', pickle=True)
|
||||
>>> qp.data.preprocessing.index(dataset, min_df=5, inplace=True)
|
||||
>>> qp.domains.preprocessing.index(dataset, min_df=5, inplace=True)
|
||||
>>>
|
||||
>>> # the text classifier is a CNN trained by NeuralClassifierTrainer
|
||||
>>> cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes)
|
||||
|
|
@ -263,15 +264,19 @@ class QuaNetTrainer(BaseQuantifier):
|
|||
f'patience={early_stop.patience}/{early_stop.PATIENCE_LIMIT}')
|
||||
|
||||
def get_params(self, deep=True):
|
||||
return {**self.classifier.get_params(), **self.quanet_params}
|
||||
classifier_params = self.classifier.get_params()
|
||||
classifier_params = {'classifier__'+k:v for k,v in classifier_params.items()}
|
||||
return {**classifier_params, **self.quanet_params}
|
||||
|
||||
def set_params(self, **parameters):
|
||||
learner_params = {}
|
||||
for key, val in parameters.items():
|
||||
if key in self.quanet_params:
|
||||
self.quanet_params[key] = val
|
||||
elif key.startswith('classifier__'):
|
||||
learner_params[key.replace('classifier__', '')] = val
|
||||
else:
|
||||
learner_params[key] = val
|
||||
raise ValueError('unknown parameter ', key)
|
||||
self.classifier.set_params(**learner_params)
|
||||
|
||||
def __check_params_colision(self, quanet_params, learner_params):
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ class GridSearchQ(BaseQuantifier):
|
|||
|
||||
def _sout(self, msg):
|
||||
if self.verbose:
|
||||
print(f'[{self.__class__.__name__}]: {msg}')
|
||||
print(f'[{self.__class__.__name__}:{self.model.__class__.__name__}]: {msg}')
|
||||
|
||||
def __check_error(self, error):
|
||||
if error in qp.error.QUANTIFICATION_ERROR:
|
||||
|
|
|
|||
|
|
@ -383,6 +383,9 @@ def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs
|
|||
# x_error function) and 'y' is the estim-test shift (computed as according to y_error)
|
||||
data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order)
|
||||
|
||||
if method_order is None:
|
||||
method_order = method_names
|
||||
|
||||
if binning == 'isomerous':
|
||||
# take bins containing the same amount of examples
|
||||
tr_test_drifts = np.concatenate([data[m]['x'] for m in method_order])
|
||||
|
|
|
|||
4
setup.py
4
setup.py
|
|
@ -89,8 +89,6 @@ setup(
|
|||
'License :: OSI Approved :: BSD License',
|
||||
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
'Programming Language :: Python :: 3 :: Only',
|
||||
|
|
@ -113,7 +111,7 @@ setup(
|
|||
#
|
||||
packages=find_packages(include=['quapy', 'quapy.*']), # Required
|
||||
|
||||
python_requires='>=3.6, <4',
|
||||
python_requires='>=3.8, <4',
|
||||
|
||||
install_requires=['scikit-learn', 'pandas', 'tqdm', 'matplotlib', 'joblib', 'xlrd', 'abstention'],
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue