QuaPy/docs/build/html/quapy.data.html



<!doctype html>

<html lang="en">
  <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />

    <title>quapy.data package &#8212; QuaPy 0.1.7 documentation</title>
    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
    <link rel="stylesheet" type="text/css" href="_static/bizstyle.css" />
    
    <script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
    <script src="_static/jquery.js"></script>
    <script src="_static/underscore.js"></script>
    <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
    <script src="_static/doctools.js"></script>
    <script src="_static/sphinx_highlight.js"></script>
    <script src="_static/bizstyle.js"></script>
    <link rel="index" title="Index" href="genindex.html" />
    <link rel="search" title="Search" href="search.html" />
    <link rel="next" title="quapy.method package" href="quapy.method.html" />
    <link rel="prev" title="quapy.classification package" href="quapy.classification.html" />
    <meta name="viewport" content="width=device-width,initial-scale=1.0" />
    <!--[if lt IE 9]>
    <script src="_static/css3-mediaqueries.js"></script>
    <![endif]-->
  </head><body>
    <div class="related" role="navigation" aria-label="related navigation">
      <h3>Navigation</h3>
      <ul>
        <li class="right" style="margin-right: 10px">
          <a href="genindex.html" title="General Index"
             accesskey="I">index</a></li>
        <li class="right" >
          <a href="py-modindex.html" title="Python Module Index"
             >modules</a> |</li>
        <li class="right" >
          <a href="quapy.method.html" title="quapy.method package"
             accesskey="N">next</a> |</li>
        <li class="right" >
          <a href="quapy.classification.html" title="quapy.classification package"
             accesskey="P">previous</a> |</li>
        <li class="nav-item nav-item-0"><a href="index.html">QuaPy 0.1.7 documentation</a> &#187;</li>
          <li class="nav-item nav-item-1"><a href="modules.html" >quapy</a> &#187;</li>
          <li class="nav-item nav-item-2"><a href="quapy.html" accesskey="U">quapy package</a> &#187;</li>
        <li class="nav-item nav-item-this"><a href="">quapy.data package</a></li> 
      </ul>
    </div>  

    <div class="document">
      <div class="documentwrapper">
        <div class="bodywrapper">
          <div class="body" role="main">
            
  <section id="quapy-data-package">
<h1>quapy.data package<a class="headerlink" href="#quapy-data-package" title="Permalink to this heading">¶</a></h1>
<section id="submodules">
<h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this heading">¶</a></h2>
</section>
<section id="module-quapy.data.base">
<span id="quapy-data-base"></span><h2>quapy.data.base<a class="headerlink" href="#module-quapy.data.base" title="Permalink to this heading">¶</a></h2>
<dl class="py class">
<dt class="sig sig-object py" id="quapy.data.base.Dataset">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.data.base.</span></span><span class="sig-name descname"><span class="pre">Dataset</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">training</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocabulary</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">dict</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.Dataset" title="Permalink to this definition">¶</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<p>Abstraction of training and test <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> objects.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>training</strong> – a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> instance</p></li>
<li><p><strong>test</strong> – a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> instance</p></li>
<li><p><strong>vocabulary</strong> – if indicated, is a dictionary of the terms used in this textual dataset</p></li>
<li><p><strong>name</strong> – a string representing the name of the dataset</p></li>
</ul>
</dd>
</dl>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.Dataset.SplitStratified">
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">SplitStratified</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">collection</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">train_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.6</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.Dataset.SplitStratified" title="Permalink to this definition">¶</a></dt>
<dd><p>Generates a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a> from a stratified split of a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> instance.
See <a class="reference internal" href="#quapy.data.base.LabelledCollection.split_stratified" title="quapy.data.base.LabelledCollection.split_stratified"><code class="xref py py-meth docutils literal notranslate"><span class="pre">LabelledCollection.split_stratified()</span></code></a></p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>collection</strong> – <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a></p></li>
<li><p><strong>train_size</strong> – the proportion of training documents (the rest conforms the test split)</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>an instance of <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a></p>
</dd>
</dl>
</dd></dl>

<dl class="py property">
<dt class="sig sig-object py" id="quapy.data.base.Dataset.binary">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">binary</span></span><a class="headerlink" href="#quapy.data.base.Dataset.binary" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns True if the training collection is labelled according to two classes</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>boolean</p>
</dd>
</dl>
</dd></dl>

<dl class="py property">
<dt class="sig sig-object py" id="quapy.data.base.Dataset.classes_">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">classes_</span></span><a class="headerlink" href="#quapy.data.base.Dataset.classes_" title="Permalink to this definition">¶</a></dt>
<dd><p>The classes according to which the training collection is labelled</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>The classes according to which the training collection is labelled</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.Dataset.kFCV">
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">kFCV</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">nfolds</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nrepeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.Dataset.kFCV" title="Permalink to this definition">¶</a></dt>
<dd><p>Generator of stratified folds to be used in k-fold cross validation. This function is only a wrapper around
<a class="reference internal" href="#quapy.data.base.LabelledCollection.kFCV" title="quapy.data.base.LabelledCollection.kFCV"><code class="xref py py-meth docutils literal notranslate"><span class="pre">LabelledCollection.kFCV()</span></code></a> that returns <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a> instances made of training and test folds.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>nfolds</strong> – integer (default 5), the number of folds to generate</p></li>
<li><p><strong>nrepeats</strong> – integer (default 1), the number of rounds of k-fold cross validation to run</p></li>
<li><p><strong>random_state</strong> – integer (default 0), guarantees that the folds generated are reproducible</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>yields <cite>nfolds * nrepeats</cite> folds for k-fold cross validation as instances of <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a></p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.Dataset.load">
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">load</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">train_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">test_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">loader_func</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">callable</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classes</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">loader_kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.Dataset.load" title="Permalink to this definition">¶</a></dt>
<dd><p>Loads a training and a test labelled set of data and convert it into a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a> instance.
The function in charge of reading the instances must be specified. This function can be a custom one, or any of
the reading functions defined in <a class="reference internal" href="#module-quapy.data.reader" title="quapy.data.reader"><code class="xref py py-mod docutils literal notranslate"><span class="pre">quapy.data.reader</span></code></a> module.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>train_path</strong> – string, the path to the file containing the training instances</p></li>
<li><p><strong>test_path</strong> – string, the path to the file containing the test instances</p></li>
<li><p><strong>loader_func</strong> – a custom function that implements the data loader and returns a tuple with instances and
labels</p></li>
<li><p><strong>classes</strong> – array-like, the classes according to which the instances are labelled</p></li>
<li><p><strong>loader_kwargs</strong> – any argument that the <cite>loader_func</cite> function needs in order to read the instances.
See <a class="reference internal" href="#quapy.data.base.LabelledCollection.load" title="quapy.data.base.LabelledCollection.load"><code class="xref py py-meth docutils literal notranslate"><span class="pre">LabelledCollection.load()</span></code></a> for further details.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a> object</p>
</dd>
</dl>
</dd></dl>

<dl class="py property">
<dt class="sig sig-object py" id="quapy.data.base.Dataset.n_classes">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">n_classes</span></span><a class="headerlink" href="#quapy.data.base.Dataset.n_classes" title="Permalink to this definition">¶</a></dt>
<dd><p>The number of classes according to which the training collection is labelled</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>integer</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.Dataset.stats">
<span class="sig-name descname"><span class="pre">stats</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">show</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.Dataset.stats" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns (and eventually prints) a dictionary with some stats of this dataset. E.g.,:</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">&#39;kindle&#39;</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">data</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">Dataset</span><span class="o">=</span><span class="n">kindle</span> <span class="c1">#tr-instances=3821, #te-instances=21591, type=&lt;class &#39;scipy.sparse.csr.csr_matrix&#39;&gt;, #features=4403, #classes=[0 1], tr-prevs=[0.081, 0.919], te-prevs=[0.063, 0.937]</span>
</pre></div>
</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>show</strong> – if set to True (default), prints the stats in standard output</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a dictionary containing some stats of this collection for the training and test collections. The keys
are <cite>train</cite> and <cite>test</cite>, and point to dedicated dictionaries of stats, for each collection, with keys
<cite>#instances</cite> (the number of instances), <cite>type</cite> (the type representing the instances),
<cite>#features</cite> (the number of features, if the instances are in array-like format), <cite>#classes</cite> (the classes of
the collection), <cite>prevs</cite> (the prevalence values for each class)</p>
</dd>
</dl>
</dd></dl>

<dl class="py property">
<dt class="sig sig-object py" id="quapy.data.base.Dataset.train_test">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">train_test</span></span><a class="headerlink" href="#quapy.data.base.Dataset.train_test" title="Permalink to this definition">¶</a></dt>
<dd><p>Alias to <cite>self.training</cite> and <cite>self.test</cite></p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>the training and test collections</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>the training and test collections</p>
</dd>
</dl>
</dd></dl>

<dl class="py property">
<dt class="sig sig-object py" id="quapy.data.base.Dataset.vocabulary_size">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">vocabulary_size</span></span><a class="headerlink" href="#quapy.data.base.Dataset.vocabulary_size" title="Permalink to this definition">¶</a></dt>
<dd><p>If the dataset is textual, and the vocabulary was indicated, returns the size of the vocabulary</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>integer</p>
</dd>
</dl>
</dd></dl>

</dd></dl>

<dl class="py class">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.data.base.</span></span><span class="sig-name descname"><span class="pre">LabelledCollection</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">labels</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classes_</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection" title="Permalink to this definition">¶</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<p>A LabelledCollection is a set of objects each with a label associated to it. This class implements many sampling
routines.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>instances</strong> – array-like (np.ndarray, list, or csr_matrix are supported)</p></li>
<li><p><strong>labels</strong> – array-like with the same length of instances</p></li>
<li><p><strong>classes</strong> – optional, list of classes from which labels are taken. If not specified, the classes are inferred
from the labels. The classes must be indicated in cases in which some of the labels might have no examples
(i.e., a prevalence of 0)</p></li>
</ul>
</dd>
</dl>
<dl class="py property">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.X">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">X</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.X" title="Permalink to this definition">¶</a></dt>
<dd><p>An alias to self.instances</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>self.instances</p>
</dd>
</dl>
</dd></dl>

<dl class="py property">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.Xp">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">Xp</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.Xp" title="Permalink to this definition">¶</a></dt>
<dd><p>Gets the instances and the true prevalence. This is useful when implementing evaluation protocols from
a <cite>LabelledCollection</cite> object.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>a tuple <cite>(instances, prevalence)</cite> from this collection</p>
</dd>
</dl>
</dd></dl>

<dl class="py property">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.Xy">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">Xy</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.Xy" title="Permalink to this definition">¶</a></dt>
<dd><p>Gets the instances and labels. This is useful when working with <cite>sklearn</cite> estimators, e.g.:</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">svm</span> <span class="o">=</span> <span class="n">LinearSVC</span><span class="p">()</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="o">*</span><span class="n">my_collection</span><span class="o">.</span><span class="n">Xy</span><span class="p">)</span>
</pre></div>
</div>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>a tuple <cite>(instances, labels)</cite> from this collection</p>
</dd>
</dl>
</dd></dl>

<dl class="py property">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.binary">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">binary</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.binary" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns True if the number of classes is 2</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>boolean</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.counts">
<span class="sig-name descname"><span class="pre">counts</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.counts" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns the number of instances for each of the classes in the codeframe.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>a np.ndarray of shape <cite>(n_classes)</cite> with the number of instances of each class, in the same order
as listed by <cite>self.classes_</cite></p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.kFCV">
<span class="sig-name descname"><span class="pre">kFCV</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">nfolds</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nrepeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.kFCV" title="Permalink to this definition">¶</a></dt>
<dd><p>Generator of stratified folds to be used in k-fold cross validation.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>nfolds</strong> – integer (default 5), the number of folds to generate</p></li>
<li><p><strong>nrepeats</strong> – integer (default 1), the number of rounds of k-fold cross validation to run</p></li>
<li><p><strong>random_state</strong> – integer (default 0), guarantees that the folds generated are reproducible</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>yields <cite>nfolds * nrepeats</cite> folds for k-fold cross validation</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.load">
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">load</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">loader_func</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">callable</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classes</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">loader_kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.load" title="Permalink to this definition">¶</a></dt>
<dd><p>Loads a labelled set of data and convert it into a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> instance. The function in charge
of reading the instances must be specified. This function can be a custom one, or any of the reading functions
defined in <a class="reference internal" href="#module-quapy.data.reader" title="quapy.data.reader"><code class="xref py py-mod docutils literal notranslate"><span class="pre">quapy.data.reader</span></code></a> module.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>path</strong> – string, the path to the file containing the labelled instances</p></li>
<li><p><strong>loader_func</strong> – a custom function that implements the data loader and returns a tuple with instances and
labels</p></li>
<li><p><strong>classes</strong> – array-like, the classes according to which the instances are labelled</p></li>
<li><p><strong>loader_kwargs</strong> – any argument that the <cite>loader_func</cite> function needs in order to read the instances, i.e.,
these arguments are used to call <cite>loader_func(path, **loader_kwargs)</cite></p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> object</p>
</dd>
</dl>
</dd></dl>

<dl class="py property">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.n_classes">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">n_classes</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.n_classes" title="Permalink to this definition">¶</a></dt>
<dd><p>The number of classes</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>integer</p>
</dd>
</dl>
</dd></dl>

<dl class="py property">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.p">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">p</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.p" title="Permalink to this definition">¶</a></dt>
<dd><p>An alias to self.prevalence()</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>self.prevalence()</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.prevalence">
<span class="sig-name descname"><span class="pre">prevalence</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.prevalence" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns the prevalence, or relative frequency, of the classes in the codeframe.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>a np.ndarray of shape <cite>(n_classes)</cite> with the relative frequencies of each class, in the same order
as listed by <cite>self.classes_</cite></p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.sampling">
<span class="sig-name descname"><span class="pre">sampling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shuffle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling" title="Permalink to this definition">¶</a></dt>
<dd><p>Return a random sample (an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>) of desired size and desired prevalence
values. For each class, the sampling is drawn without replacement if the requested prevalence is larger than
the actual prevalence of the class, or with replacement otherwise.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>size</strong> – integer, the requested size</p></li>
<li><p><strong>prevs</strong> – the prevalence for each class; the prevalence value for the last class can be lead empty since
it is constrained. E.g., for binary collections, only the prevalence <cite>p</cite> for the first class (as listed in
<cite>self.classes_</cite> can be specified, while the other class takes prevalence value <cite>1-p</cite></p></li>
<li><p><strong>shuffle</strong> – if set to True (default), shuffles the index before returning it</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> with length == <cite>size</cite> and prevalence close to <cite>prevs</cite> (or
prevalence == <cite>prevs</cite> if the exact prevalence values can be met as proportions of instances)</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.sampling_from_index">
<span class="sig-name descname"><span class="pre">sampling_from_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">index</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling_from_index" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> whose elements are sampled from this collection using the
index.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>index</strong> – np.ndarray</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a></p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.sampling_index">
<span class="sig-name descname"><span class="pre">sampling_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shuffle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling_index" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the
prevalence values are not specified, then returns the index of a uniform sampling.
For each class, the sampling is drawn without replacement if the requested prevalence is larger than
the actual prevalence of the class, or with replacement otherwise.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>size</strong> – integer, the requested size</p></li>
<li><p><strong>prevs</strong> – the prevalence for each class; the prevalence value for the last class can be lead empty since
it is constrained. E.g., for binary collections, only the prevalence <cite>p</cite> for the first class (as listed in
<cite>self.classes_</cite> can be specified, while the other class takes prevalence value <cite>1-p</cite></p></li>
<li><p><strong>shuffle</strong> – if set to True (default), shuffles the index before returning it</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a np.ndarray of shape <cite>(size)</cite> with the indexes</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.split_random">
<span class="sig-name descname"><span class="pre">split_random</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">train_prop</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.6</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.split_random" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns two instances of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> split randomly from this collection, at desired
proportion.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>train_prop</strong> – the proportion of elements to include in the left-most returned collection (typically used
as the training collection). The rest of elements are included in the right-most returned collection
(typically used as a test collection).</p></li>
<li><p><strong>random_state</strong> – if specified, guarantees reproducibility of the split.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>two instances of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>, the first one with <cite>train_prop</cite> elements, and the
second one with <cite>1-train_prop</cite> elements</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.split_stratified">
<span class="sig-name descname"><span class="pre">split_stratified</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">train_prop</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.6</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.split_stratified" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns two instances of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> split with stratification from this collection, at desired
proportion.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>train_prop</strong> – the proportion of elements to include in the left-most returned collection (typically used
as the training collection). The rest of elements are included in the right-most returned collection
(typically used as a test collection).</p></li>
<li><p><strong>random_state</strong> – if specified, guarantees reproducibility of the split.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>two instances of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>, the first one with <cite>train_prop</cite> elements, and the
second one with <cite>1-train_prop</cite> elements</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.stats">
<span class="sig-name descname"><span class="pre">stats</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">show</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.stats" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns (and eventually prints) a dictionary with some stats of this collection. E.g.,:</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">&#39;kindle&#39;</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="c1">#instances=3821, type=&lt;class &#39;scipy.sparse.csr.csr_matrix&#39;&gt;, #features=4403, #classes=[0 1], prevs=[0.081, 0.919]</span>
</pre></div>
</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>show</strong> – if set to True (default), prints the stats in standard output</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a dictionary containing some stats of this collection. Keys include <cite>#instances</cite> (the number of
instances), <cite>type</cite> (the type representing the instances), <cite>#features</cite> (the number of features, if the
instances are in array-like format), <cite>#classes</cite> (the classes of the collection), <cite>prevs</cite> (the prevalence
values for each class)</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.uniform_sampling">
<span class="sig-name descname"><span class="pre">uniform_sampling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.uniform_sampling" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns a uniform sample (an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>) of desired size. The sampling is drawn
without replacement if the requested size is greater than the number of instances, or with replacement
otherwise.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>size</strong> – integer, the requested size</p></li>
<li><p><strong>random_state</strong> – if specified, guarantees reproducibility of the split.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> with length == <cite>size</cite></p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.uniform_sampling_index">
<span class="sig-name descname"><span class="pre">uniform_sampling_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.uniform_sampling_index" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn
without replacement if the requested size is greater than the number of instances, or with replacement
otherwise.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>size</strong> – integer, the size of the uniform sample</p></li>
<li><p><strong>random_state</strong> – if specified, guarantees reproducibility of the split.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a np.ndarray of shape <cite>(size)</cite> with the indexes</p>
</dd>
</dl>
</dd></dl>

<dl class="py property">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.y">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">y</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.y" title="Permalink to this definition">¶</a></dt>
<dd><p>An alias to self.labels</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>self.labels</p>
</dd>
</dl>
</dd></dl>

</dd></dl>

</section>
<section id="module-quapy.data.datasets">
<span id="quapy-data-datasets"></span><h2>quapy.data.datasets<a class="headerlink" href="#module-quapy.data.datasets" title="Permalink to this heading">¶</a></h2>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.data.datasets.fetch_UCIDataset">
<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_UCIDataset</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">test_split</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.3</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></span><a class="headerlink" href="#quapy.data.datasets.fetch_UCIDataset" title="Permalink to this definition">¶</a></dt>
<dd><p>Loads a UCI dataset as an instance of <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a>, as used in
<a class="reference external" href="https://www.sciencedirect.com/science/article/pii/S1566253516300628">Pérez-Gállego, P., Quevedo, J. R., &amp; del Coz, J. J. (2017).
Using ensembles for problems with characterizable changes in data distribution: A case study on quantification.
Information Fusion, 34, 87-100.</a>
and
<a class="reference external" href="https://www.sciencedirect.com/science/article/pii/S1566253517303652">Pérez-Gállego, P., Castano, A., Quevedo, J. R., &amp; del Coz, J. J. (2019).
Dynamic ensemble selection for quantification tasks.
Information Fusion, 45, 1-15.</a>.
The datasets do not come with a predefined train-test split (see <a class="reference internal" href="#quapy.data.datasets.fetch_UCILabelledCollection" title="quapy.data.datasets.fetch_UCILabelledCollection"><code class="xref py py-meth docutils literal notranslate"><span class="pre">fetch_UCILabelledCollection()</span></code></a> for further
information on how to use these collections), and so a train-test split is generated at desired proportion.
The list of valid dataset names can be accessed in <cite>quapy.data.datasets.UCI_DATASETS</cite></p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>dataset_name</strong> – a dataset name</p></li>
<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
~/quay_data/ directory)</p></li>
<li><p><strong>test_split</strong> – proportion of documents to be included in the test set. The rest conforms the training set</p></li>
<li><p><strong>verbose</strong> – set to True (default is False) to get information (from the UCI ML repository) about the datasets</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> instance</p>
</dd>
</dl>
</dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="quapy.data.datasets.fetch_UCILabelledCollection">
<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_UCILabelledCollection</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></span><a class="headerlink" href="#quapy.data.datasets.fetch_UCILabelledCollection" title="Permalink to this definition">¶</a></dt>
<dd><p>Loads a UCI collection as an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.LabelledCollection</span></code></a>, as used in
<a class="reference external" href="https://www.sciencedirect.com/science/article/pii/S1566253516300628">Pérez-Gállego, P., Quevedo, J. R., &amp; del Coz, J. J. (2017).
Using ensembles for problems with characterizable changes in data distribution: A case study on quantification.
Information Fusion, 34, 87-100.</a>
and
<a class="reference external" href="https://www.sciencedirect.com/science/article/pii/S1566253517303652">Pérez-Gállego, P., Castano, A., Quevedo, J. R., &amp; del Coz, J. J. (2019).
Dynamic ensemble selection for quantification tasks.
Information Fusion, 45, 1-15.</a>.
The datasets do not come with a predefined train-test split, and so Pérez-Gállego et al. adopted a 5FCVx2 evaluation
protocol, meaning that each collection was used to generate two rounds (hence the x2) of 5 fold cross validation.
This can be reproduced by using <a class="reference internal" href="#quapy.data.base.Dataset.kFCV" title="quapy.data.base.Dataset.kFCV"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.data.base.Dataset.kFCV()</span></code></a>, e.g.:</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">collection</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCILabelledCollection</span><span class="p">(</span><span class="s2">&quot;yeast&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="k">for</span> <span class="n">data</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">Dataset</span><span class="o">.</span><span class="n">kFCV</span><span class="p">(</span><span class="n">collection</span><span class="p">,</span> <span class="n">nfolds</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">nrepeats</span><span class="o">=</span><span class="mi">2</span><span class="p">):</span>
<span class="gp">&gt;&gt;&gt; </span>    <span class="o">...</span>
</pre></div>
</div>
<p>The list of valid dataset names can be accessed in <cite>quapy.data.datasets.UCI_DATASETS</cite></p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>dataset_name</strong> – a dataset name</p></li>
<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
~/quay_data/ directory)</p></li>
<li><p><strong>test_split</strong> – proportion of documents to be included in the test set. The rest conforms the training set</p></li>
<li><p><strong>verbose</strong> – set to True (default is False) to get information (from the UCI ML repository) about the datasets</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> instance</p>
</dd>
</dl>
</dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="quapy.data.datasets.fetch_lequa2022">
<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_lequa2022</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">task</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.datasets.fetch_lequa2022" title="Permalink to this definition">¶</a></dt>
<dd><p>Loads the official datasets provided for the <a class="reference external" href="https://lequa2022.github.io/index">LeQua</a> competition.
In brief, there are 4 tasks (T1A, T1B, T2A, T2B) having to do with text quantification
problems. Tasks T1A and T1B provide documents in vector form, while T2A and T2B provide raw documents instead.
Tasks T1A and T2A are binary sentiment quantification problems, while T2A and T2B are multiclass quantification
problems consisting of estimating the class prevalence values of 28 different merchandise products.
We refer to the <a class="reference external" href="https://ceur-ws.org/Vol-3180/paper-146.pdf">Esuli, A., Moreo, A., Sebastiani, F., &amp; Sperduti, G. (2022).
A Detailed Overview of LeQua&#64; CLEF 2022: Learning to Quantify.</a> for a detailed description
on the tasks and datasets.</p>
<p>The datasets are downloaded only once, and stored for fast reuse.</p>
<p>See <cite>lequa2022_experiments.py</cite> provided in the example folder, that can serve as a guide on how to use these
datasets.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>task</strong> – a string representing the task name; valid ones are T1A, T1B, T2A, and T2B</p></li>
<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
~/quay_data/ directory)</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a tuple <cite>(train, val_gen, test_gen)</cite> where <cite>train</cite> is an instance of
<a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.LabelledCollection</span></code></a>, <cite>val_gen</cite> and <cite>test_gen</cite> are instances of
<code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.protocol.SamplesFromDir</span></code>, i.e., are sampling protocols that return a series of samples
labelled by prevalence.</p>
</dd>
</dl>
</dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="quapy.data.datasets.fetch_reviews">
<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_reviews</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tfidf</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pickle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></span><a class="headerlink" href="#quapy.data.datasets.fetch_reviews" title="Permalink to this definition">¶</a></dt>
<dd><p>Loads a Reviews dataset as a Dataset instance, as used in
<a class="reference external" href="https://dl.acm.org/doi/abs/10.1145/3269206.3269287">Esuli, A., Moreo, A., and Sebastiani, F. “A recurrent neural network for sentiment quantification.”
Proceedings of the 27th ACM International Conference on Information and Knowledge Management. 2018.</a>.
The list of valid dataset names can be accessed in <cite>quapy.data.datasets.REVIEWS_SENTIMENT_DATASETS</cite></p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>dataset_name</strong> – the name of the dataset: valid ones are ‘hp’, ‘kindle’, ‘imdb’</p></li>
<li><p><strong>tfidf</strong> – set to True to transform the raw documents into tfidf weighted matrices</p></li>
<li><p><strong>min_df</strong> – minimun number of documents that should contain a term in order for the term to be
kept (ignored if tfidf==False)</p></li>
<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
~/quay_data/ directory)</p></li>
<li><p><strong>pickle</strong> – set to True to pickle the Dataset object the first time it is generated, in order to allow for
faster subsequent invokations</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> instance</p>
</dd>
</dl>
</dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="quapy.data.datasets.fetch_twitter">
<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_twitter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">for_model_selection</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pickle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></span><a class="headerlink" href="#quapy.data.datasets.fetch_twitter" title="Permalink to this definition">¶</a></dt>
<dd><p>Loads a Twitter dataset as a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> instance, as used in:
<a class="reference external" href="https://link.springer.com/content/pdf/10.1007/s13278-016-0327-z.pdf">Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis.
Social Network Analysis and Mining6(19), 1–22 (2016)</a>
Note that the datasets ‘semeval13’, ‘semeval14’, ‘semeval15’ share the same training set.
The list of valid dataset names corresponding to training sets can be accessed in
<cite>quapy.data.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN</cite>, while the test sets can be accessed in
<cite>quapy.data.datasets.TWITTER_SENTIMENT_DATASETS_TEST</cite></p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>dataset_name</strong> – the name of the dataset: valid ones are ‘gasp’, ‘hcr’, ‘omd’, ‘sanders’, ‘semeval13’,
‘semeval14’, ‘semeval15’, ‘semeval16’, ‘sst’, ‘wa’, ‘wb’</p></li>
<li><p><strong>for_model_selection</strong> – if True, then returns the train split as the training set and the devel split
as the test set; if False, then returns the train+devel split as the training set and the test set as the
test set</p></li>
<li><p><strong>min_df</strong> – minimun number of documents that should contain a term in order for the term to be kept</p></li>
<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
~/quay_data/ directory)</p></li>
<li><p><strong>pickle</strong> – set to True to pickle the Dataset object the first time it is generated, in order to allow for
faster subsequent invokations</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> instance</p>
</dd>
</dl>
</dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="quapy.data.datasets.warn">
<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">warn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.datasets.warn" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

</section>
<section id="module-quapy.data.preprocessing">
<span id="quapy-data-preprocessing"></span><h2>quapy.data.preprocessing<a class="headerlink" href="#module-quapy.data.preprocessing" title="Permalink to this heading">¶</a></h2>
<dl class="py class">
<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.data.preprocessing.</span></span><span class="sig-name descname"><span class="pre">IndexTransformer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer" title="Permalink to this definition">¶</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<p>This class implements a sklearn’s-style transformer that indexes text as numerical ids for the tokens it
contains, and that would be generated by sklearn’s
<a class="reference external" href="https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html">CountVectorizer</a></p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>kwargs</strong> – <p>keyworded arguments from
<a class="reference external" href="https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html">CountVectorizer</a></p>
</p>
</dd>
</dl>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer.add_word">
<span class="sig-name descname"><span class="pre">add_word</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">word</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">id</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nogaps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer.add_word" title="Permalink to this definition">¶</a></dt>
<dd><p>Adds a new token (regardless of whether it has been found in the text or not), with dedicated id.
Useful to define special tokens for codifying unknown words, or padding tokens.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>word</strong> – string, surface form of the token</p></li>
<li><p><strong>id</strong> – integer, numerical value to assign to the token (leave as None for indicating the next valid id,
default)</p></li>
<li><p><strong>nogaps</strong> – if set to True (default) asserts that the id indicated leads to no numerical gaps with
precedent ids stored so far</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>integer, the numerical id for the new token</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer.fit">
<span class="sig-name descname"><span class="pre">fit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer.fit" title="Permalink to this definition">¶</a></dt>
<dd><p>Fits the transformer, i.e., decides on the vocabulary, given a list of strings.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>X</strong> – a list of strings</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>self</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer.fit_transform">
<span class="sig-name descname"><span class="pre">fit_transform</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer.fit_transform" title="Permalink to this definition">¶</a></dt>
<dd><p>Fits the transform on <cite>X</cite> and transforms it.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>X</strong> – a list of strings</p></li>
<li><p><strong>n_jobs</strong> – the number of parallel workers to carry out this task</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a <cite>np.ndarray</cite> of numerical ids</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer.transform">
<span class="sig-name descname"><span class="pre">transform</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer.transform" title="Permalink to this definition">¶</a></dt>
<dd><p>Transforms the strings in <cite>X</cite> as lists of numerical ids</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>X</strong> – a list of strings</p></li>
<li><p><strong>n_jobs</strong> – the number of parallel workers to carry out this task</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a <cite>np.ndarray</cite> of numerical ids</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer.vocabulary_size">
<span class="sig-name descname"><span class="pre">vocabulary_size</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer.vocabulary_size" title="Permalink to this definition">¶</a></dt>
<dd><p>Gets the length of the vocabulary according to which the document tokens have been indexed</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>integer</p>
</dd>
</dl>
</dd></dl>

</dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="quapy.data.preprocessing.index">
<span class="sig-prename descclassname"><span class="pre">quapy.data.preprocessing.</span></span><span class="sig-name descname"><span class="pre">index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inplace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.index" title="Permalink to this definition">¶</a></dt>
<dd><p>Indexes the tokens of a textual <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> of string documents.
To index a document means to replace each different token by a unique numerical index.
Rare words (i.e., words occurring less than <cite>min_df</cite> times) are replaced by a special token <cite>UNK</cite></p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>dataset</strong> – a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> object where the instances of training and test documents
are lists of str</p></li>
<li><p><strong>min_df</strong> – minimum number of occurrences below which the term is replaced by a <cite>UNK</cite> index</p></li>
<li><p><strong>inplace</strong> – whether or not to apply the transformation inplace (True), or to a new copy (False, default)</p></li>
<li><p><strong>kwargs</strong> – the rest of parameters of the transformation (as for sklearn’s
<cite>CountVectorizer &lt;https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html&gt;_</cite>)</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a new <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> (if inplace=False) or a reference to the current
<a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> (inplace=True) consisting of lists of integer values representing indices.</p>
</dd>
</dl>
</dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="quapy.data.preprocessing.reduce_columns">
<span class="sig-prename descclassname"><span class="pre">quapy.data.preprocessing.</span></span><span class="sig-name descname"><span class="pre">reduce_columns</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inplace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.reduce_columns" title="Permalink to this definition">¶</a></dt>
<dd><p>Reduces the dimensionality of the instances, represented as a <cite>csr_matrix</cite> (or any subtype of
<cite>scipy.sparse.spmatrix</cite>), of training and test documents by removing the columns of words which are not present
in at least <cite>min_df</cite> instances in the training set</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>dataset</strong> – a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> in which instances are represented in sparse format (any
subtype of scipy.sparse.spmatrix)</p></li>
<li><p><strong>min_df</strong> – integer, minimum number of instances below which the columns are removed</p></li>
<li><p><strong>inplace</strong> – whether or not to apply the transformation inplace (True), or to a new copy (False, default)</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a new <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> (if inplace=False) or a reference to the current
<a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> (inplace=True) where the dimensions corresponding to infrequent terms
in the training set have been removed</p>
</dd>
</dl>
</dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="quapy.data.preprocessing.standardize">
<span class="sig-prename descclassname"><span class="pre">quapy.data.preprocessing.</span></span><span class="sig-name descname"><span class="pre">standardize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">inplace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.standardize" title="Permalink to this definition">¶</a></dt>
<dd><p>Standardizes the real-valued columns of a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a>.
Standardization, aka z-scoring, of a variable <cite>X</cite> comes down to subtracting the average and normalizing by the
standard deviation.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>dataset</strong> – a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> object</p></li>
<li><p><strong>inplace</strong> – set to True if the transformation is to be applied inplace, or to False (default) if a new
<a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> is to be returned</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>an instance of <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a></p>
</dd>
</dl>
</dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="quapy.data.preprocessing.text2tfidf">
<span class="sig-prename descclassname"><span class="pre">quapy.data.preprocessing.</span></span><span class="sig-name descname"><span class="pre">text2tfidf</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">3</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">sublinear_tf</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inplace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.text2tfidf" title="Permalink to this definition">¶</a></dt>
<dd><p>Transforms a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> of textual instances into a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> of
tfidf weighted sparse vectors</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>dataset</strong> – a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> where the instances of training and test collections are
lists of str</p></li>
<li><p><strong>min_df</strong> – minimum number of occurrences for a word to be considered as part of the vocabulary (default 3)</p></li>
<li><p><strong>sublinear_tf</strong> – whether or not to apply the log scalling to the tf counters (default True)</p></li>
<li><p><strong>inplace</strong> – whether or not to apply the transformation inplace (True), or to a new copy (False, default)</p></li>
<li><p><strong>kwargs</strong> – the rest of parameters of the transformation (as for sklearn’s
<a class="reference external" href="https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html">TfidfVectorizer</a>)</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a new <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> in <cite>csr_matrix</cite> format (if inplace=False) or a reference to the
current Dataset (if inplace=True) where the instances are stored in a <cite>csr_matrix</cite> of real-valued tfidf scores</p>
</dd>
</dl>
</dd></dl>

</section>
<section id="module-quapy.data.reader">
<span id="quapy-data-reader"></span><h2>quapy.data.reader<a class="headerlink" href="#module-quapy.data.reader" title="Permalink to this heading">¶</a></h2>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.data.reader.binarize">
<span class="sig-prename descclassname"><span class="pre">quapy.data.reader.</span></span><span class="sig-name descname"><span class="pre">binarize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pos_class</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.reader.binarize" title="Permalink to this definition">¶</a></dt>
<dd><p>Binarizes a categorical array-like collection of labels towards the positive class <cite>pos_class</cite>. E.g.,:</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">binarize</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">array</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">])</span>
</pre></div>
</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>y</strong> – array-like of labels</p></li>
<li><p><strong>pos_class</strong> – integer, the positive class</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a binary np.ndarray, in which values 1 corresponds to positions in whcih <cite>y</cite> had <cite>pos_class</cite> labels, and
0 otherwise</p>
</dd>
</dl>
</dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="quapy.data.reader.from_csv">
<span class="sig-prename descclassname"><span class="pre">quapy.data.reader.</span></span><span class="sig-name descname"><span class="pre">from_csv</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoding</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'utf-8'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.reader.from_csv" title="Permalink to this definition">¶</a></dt>
<dd><p>Reads a csv file in which columns are separated by ‘,’.
File format &lt;label&gt;,&lt;feat1&gt;,&lt;feat2&gt;,…,&lt;featn&gt;</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>path</strong> – path to the csv file</p></li>
<li><p><strong>encoding</strong> – the text encoding used to open the file</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a np.ndarray for the labels and a ndarray (float) for the covariates</p>
</dd>
</dl>
</dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="quapy.data.reader.from_sparse">
<span class="sig-prename descclassname"><span class="pre">quapy.data.reader.</span></span><span class="sig-name descname"><span class="pre">from_sparse</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.reader.from_sparse" title="Permalink to this definition">¶</a></dt>
<dd><p>Reads a labelled collection of real-valued instances expressed in sparse format
File format &lt;-1 or 0 or 1&gt;[s col(int):val(float)]</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>path</strong> – path to the labelled collection</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a <cite>csr_matrix</cite> containing the instances (rows), and a ndarray containing the labels</p>
</dd>
</dl>
</dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="quapy.data.reader.from_text">
<span class="sig-prename descclassname"><span class="pre">quapy.data.reader.</span></span><span class="sig-name descname"><span class="pre">from_text</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoding</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'utf-8'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">class2int</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.reader.from_text" title="Permalink to this definition">¶</a></dt>
<dd><p>Reads a labelled colletion of documents.
File fomart &lt;0 or 1&gt;        &lt;document&gt;</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>path</strong> – path to the labelled collection</p></li>
<li><p><strong>encoding</strong> – the text encoding used to open the file</p></li>
<li><p><strong>verbose</strong> – if &gt;0 (default) shows some progress information in standard output</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a list of sentences, and a list of labels</p>
</dd>
</dl>
</dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="quapy.data.reader.reindex_labels">
<span class="sig-prename descclassname"><span class="pre">quapy.data.reader.</span></span><span class="sig-name descname"><span class="pre">reindex_labels</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.reader.reindex_labels" title="Permalink to this definition">¶</a></dt>
<dd><p>Re-indexes a list of labels as a list of indexes, and returns the classnames corresponding to the indexes.
E.g.:</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">reindex_labels</span><span class="p">([</span><span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="p">(</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">]),</span> <span class="n">array</span><span class="p">([</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="s1">&#39;&lt;U1&#39;</span><span class="p">))</span>
</pre></div>
</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>y</strong> – the list or array of original labels</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a ndarray (int) of class indexes, and a ndarray of classnames corresponding to the indexes.</p>
</dd>
</dl>
</dd></dl>

</section>
<section id="module-quapy.data">
<span id="module-contents"></span><h2>Module contents<a class="headerlink" href="#module-quapy.data" title="Permalink to this heading">¶</a></h2>
</section>
</section>


            <div class="clearer"></div>
          </div>
        </div>
      </div>
      <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
        <div class="sphinxsidebarwrapper">
  <div>
    <h3><a href="index.html">Table of Contents</a></h3>
    <ul>
<li><a class="reference internal" href="#">quapy.data package</a><ul>
<li><a class="reference internal" href="#submodules">Submodules</a></li>
<li><a class="reference internal" href="#module-quapy.data.base">quapy.data.base</a></li>
<li><a class="reference internal" href="#module-quapy.data.datasets">quapy.data.datasets</a></li>
<li><a class="reference internal" href="#module-quapy.data.preprocessing">quapy.data.preprocessing</a></li>
<li><a class="reference internal" href="#module-quapy.data.reader">quapy.data.reader</a></li>
<li><a class="reference internal" href="#module-quapy.data">Module contents</a></li>
</ul>
</li>
</ul>

  </div>
  <div>
    <h4>Previous topic</h4>
    <p class="topless"><a href="quapy.classification.html"
                          title="previous chapter">quapy.classification package</a></p>
  </div>
  <div>
    <h4>Next topic</h4>
    <p class="topless"><a href="quapy.method.html"
                          title="next chapter">quapy.method package</a></p>
  </div>
  <div role="note" aria-label="source link">
    <h3>This Page</h3>
    <ul class="this-page-menu">
      <li><a href="_sources/quapy.data.rst.txt"
            rel="nofollow">Show Source</a></li>
    </ul>
   </div>
<div id="searchbox" style="display: none" role="search">
  <h3 id="searchlabel">Quick search</h3>
    <div class="searchformwrapper">
    <form class="search" action="search.html" method="get">
      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
      <input type="submit" value="Go" />
    </form>
    </div>
</div>
<script>document.getElementById('searchbox').style.display = "block"</script>
        </div>
      </div>
      <div class="clearer"></div>
    </div>
    <div class="related" role="navigation" aria-label="related navigation">
      <h3>Navigation</h3>
      <ul>
        <li class="right" style="margin-right: 10px">
          <a href="genindex.html" title="General Index"
             >index</a></li>
        <li class="right" >
          <a href="py-modindex.html" title="Python Module Index"
             >modules</a> |</li>
        <li class="right" >
          <a href="quapy.method.html" title="quapy.method package"
             >next</a> |</li>
        <li class="right" >
          <a href="quapy.classification.html" title="quapy.classification package"
             >previous</a> |</li>
        <li class="nav-item nav-item-0"><a href="index.html">QuaPy 0.1.7 documentation</a> &#187;</li>
          <li class="nav-item nav-item-1"><a href="modules.html" >quapy</a> &#187;</li>
          <li class="nav-item nav-item-2"><a href="quapy.html" >quapy package</a> &#187;</li>
        <li class="nav-item nav-item-this"><a href="">quapy.data package</a></li> 
      </ul>
    </div>
    <div class="footer" role="contentinfo">
        &#169; Copyright 2021, Alejandro Moreo.
      Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 5.3.0.
    </div>
  </body>
</html>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<!doctype html>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<html lang="en">
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								  <head>
 								    <meta charset="utf-8" />
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
-												adding documentation, adding brokenbar plots, merging plots from tweetsent with density

											
										
										
											2021-11-22 18:10:48 +01:00
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								    <title>quapy.data package &#8212; QuaPy 0.1.7 documentation</title>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
 								    <link rel="stylesheet" type="text/css" href="_static/bizstyle.css" />
 								    <script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
 								    <script src="_static/jquery.js"></script>
 								    <script src="_static/underscore.js"></script>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								    <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								    <script src="_static/doctools.js"></script>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								    <script src="_static/sphinx_highlight.js"></script>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								    <script src="_static/bizstyle.js"></script>
 								    <link rel="index" title="Index" href="genindex.html" />
 								    <link rel="search" title="Search" href="search.html" />
 								    <link rel="next" title="quapy.method package" href="quapy.method.html" />
 								    <link rel="prev" title="quapy.classification package" href="quapy.classification.html" />
 								    <meta name="viewport" content="width=device-width,initial-scale=1.0" />
 								    <!--[if lt IE 9]>
 								    <script src="_static/css3-mediaqueries.js"></script>
 								    <![endif]-->
 								  </head><body>
 								    <div class="related" role="navigation" aria-label="related navigation">
 								      <h3>Navigation</h3>
 								      <ul>
 								        <li class="right" style="margin-right: 10px">
 								          <a href="genindex.html" title="General Index"
 								             accesskey="I">index</a></li>
 								        <li class="right" >
 								          <a href="py-modindex.html" title="Python Module Index"
 								             >modules</a> |</li>
 								        <li class="right" >
 								          <a href="quapy.method.html" title="quapy.method package"
 								             accesskey="N">next</a> |</li>
 								        <li class="right" >
 								          <a href="quapy.classification.html" title="quapy.classification package"
 								             accesskey="P">previous</a> |</li>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								        <li class="nav-item nav-item-0"><a href="index.html">QuaPy 0.1.7 documentation</a> &#187;</li>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								          <li class="nav-item nav-item-1"><a href="modules.html" >quapy</a> &#187;</li>
 								          <li class="nav-item nav-item-2"><a href="quapy.html" accesskey="U">quapy package</a> &#187;</li>
 								        <li class="nav-item nav-item-this"><a href="">quapy.data package</a></li>
 								      </ul>
 								    </div>
 								    <div class="document">
 								      <div class="documentwrapper">
 								        <div class="bodywrapper">
 								          <div class="body" role="main">
-												adding documentation, adding brokenbar plots, merging plots from tweetsent with density

											
										
										
											2021-11-22 18:10:48 +01:00
+								  <section id="quapy-data-package">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<h1>quapy.data package<a class="headerlink" href="#quapy-data-package" title="Permalink to this heading">¶</a></h1>
-												adding documentation, adding brokenbar plots, merging plots from tweetsent with density

											
										
										
											2021-11-22 18:10:48 +01:00
+								<section id="submodules">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this heading">¶</a></h2>
-												adding documentation, adding brokenbar plots, merging plots from tweetsent with density

											
										
										
											2021-11-22 18:10:48 +01:00
+								</section>
 								<section id="module-quapy.data.base">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span id="quapy-data-base"></span><h2>quapy.data.base<a class="headerlink" href="#module-quapy.data.base" title="Permalink to this heading">¶</a></h2>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dl class="py class">
 								<dt class="sig sig-object py" id="quapy.data.base.Dataset">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.data.base.</span></span><span class="sig-name descname"><span class="pre">Dataset</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">training</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocabulary</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">dict</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.Dataset" title="Permalink to this definition">¶</a></dt>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<p>Abstraction of training and test <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> objects.</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>training</strong> – a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> instance</p></li>
 								<li><p><strong>test</strong> – a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> instance</p></li>
 								<li><p><strong>vocabulary</strong> – if indicated, is a dictionary of the terms used in this textual dataset</p></li>
 								<li><p><strong>name</strong> – a string representing the name of the dataset</p></li>
 								</ul>
 								</dd>
 								</dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.base.Dataset.SplitStratified">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">SplitStratified</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">collection</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">train_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.6</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.Dataset.SplitStratified" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Generates a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a> from a stratified split of a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> instance.
 								See <a class="reference internal" href="#quapy.data.base.LabelledCollection.split_stratified" title="quapy.data.base.LabelledCollection.split_stratified"><code class="xref py py-meth docutils literal notranslate"><span class="pre">LabelledCollection.split_stratified()</span></code></a></p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>collection</strong> – <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a></p></li>
 								<li><p><strong>train_size</strong> – the proportion of training documents (the rest conforms the test split)</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>an instance of <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a></p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py property">
 								<dt class="sig sig-object py" id="quapy.data.base.Dataset.binary">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">binary</span></span><a class="headerlink" href="#quapy.data.base.Dataset.binary" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Returns True if the training collection is labelled according to two classes</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><p>boolean</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py property">
 								<dt class="sig sig-object py" id="quapy.data.base.Dataset.classes_">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">classes_</span></span><a class="headerlink" href="#quapy.data.base.Dataset.classes_" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>The classes according to which the training collection is labelled</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><p>The classes according to which the training collection is labelled</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.base.Dataset.kFCV">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">kFCV</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">nfolds</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nrepeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.Dataset.kFCV" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Generator of stratified folds to be used in k-fold cross validation. This function is only a wrapper around
 								<a class="reference internal" href="#quapy.data.base.LabelledCollection.kFCV" title="quapy.data.base.LabelledCollection.kFCV"><code class="xref py py-meth docutils literal notranslate"><span class="pre">LabelledCollection.kFCV()</span></code></a> that returns <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a> instances made of training and test folds.</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>nfolds</strong> – integer (default 5), the number of folds to generate</p></li>
 								<li><p><strong>nrepeats</strong> – integer (default 1), the number of rounds of k-fold cross validation to run</p></li>
 								<li><p><strong>random_state</strong> – integer (default 0), guarantees that the folds generated are reproducible</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>yields <cite>nfolds * nrepeats</cite> folds for k-fold cross validation as instances of <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a></p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.base.Dataset.load">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">load</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">train_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">test_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">loader_func</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">callable</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classes</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">loader_kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.Dataset.load" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Loads a training and a test labelled set of data and convert it into a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a> instance.
 								The function in charge of reading the instances must be specified. This function can be a custom one, or any of
 								the reading functions defined in <a class="reference internal" href="#module-quapy.data.reader" title="quapy.data.reader"><code class="xref py py-mod docutils literal notranslate"><span class="pre">quapy.data.reader</span></code></a> module.</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>train_path</strong> – string, the path to the file containing the training instances</p></li>
 								<li><p><strong>test_path</strong> – string, the path to the file containing the test instances</p></li>
 								<li><p><strong>loader_func</strong> – a custom function that implements the data loader and returns a tuple with instances and
 								labels</p></li>
 								<li><p><strong>classes</strong> – array-like, the classes according to which the instances are labelled</p></li>
 								<li><p><strong>loader_kwargs</strong> – any argument that the <cite>loader_func</cite> function needs in order to read the instances.
 								See <a class="reference internal" href="#quapy.data.base.LabelledCollection.load" title="quapy.data.base.LabelledCollection.load"><code class="xref py py-meth docutils literal notranslate"><span class="pre">LabelledCollection.load()</span></code></a> for further details.</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a> object</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py property">
 								<dt class="sig sig-object py" id="quapy.data.base.Dataset.n_classes">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">n_classes</span></span><a class="headerlink" href="#quapy.data.base.Dataset.n_classes" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>The number of classes according to which the training collection is labelled</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><p>integer</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.base.Dataset.stats">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span class="sig-name descname"><span class="pre">stats</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">show</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.Dataset.stats" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Returns (and eventually prints) a dictionary with some stats of this dataset. E.g.,:</p>
 								<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">&#39;kindle&#39;</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
 								<span class="gp">&gt;&gt;&gt; </span><span class="n">data</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
 								<span class="gp">&gt;&gt;&gt; </span><span class="n">Dataset</span><span class="o">=</span><span class="n">kindle</span> <span class="c1">#tr-instances=3821, #te-instances=21591, type=&lt;class &#39;scipy.sparse.csr.csr_matrix&#39;&gt;, #features=4403, #classes=[0 1], tr-prevs=[0.081, 0.919], te-prevs=[0.063, 0.937]</span>
 								</pre></div>
 								</div>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><p><strong>show</strong> – if set to True (default), prints the stats in standard output</p>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a dictionary containing some stats of this collection for the training and test collections. The keys
 								are <cite>train</cite> and <cite>test</cite>, and point to dedicated dictionaries of stats, for each collection, with keys
 								<cite>#instances</cite> (the number of instances), <cite>type</cite> (the type representing the instances),
 								<cite>#features</cite> (the number of features, if the instances are in array-like format), <cite>#classes</cite> (the classes of
 								the collection), <cite>prevs</cite> (the prevalence values for each class)</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dl class="py property">
 								<dt class="sig sig-object py" id="quapy.data.base.Dataset.train_test">
 								<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">train_test</span></span><a class="headerlink" href="#quapy.data.base.Dataset.train_test" title="Permalink to this definition">¶</a></dt>
 								<dd><p>Alias to <cite>self.training</cite> and <cite>self.test</cite></p>
 								<dl class="field-list simple">
 								<dt class="field-odd">Returns<span class="colon">:</span></dt>
 								<dd class="field-odd"><p>the training and test collections</p>
 								</dd>
 								<dt class="field-even">Returns<span class="colon">:</span></dt>
 								<dd class="field-even"><p>the training and test collections</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dl class="py property">
 								<dt class="sig sig-object py" id="quapy.data.base.Dataset.vocabulary_size">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">vocabulary_size</span></span><a class="headerlink" href="#quapy.data.base.Dataset.vocabulary_size" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>If the dataset is textual, and the vocabulary was indicated, returns the size of the vocabulary</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><p>integer</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								</dd></dl>
 								<dl class="py class">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.data.base.</span></span><span class="sig-name descname"><span class="pre">LabelledCollection</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">labels</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classes_</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection" title="Permalink to this definition">¶</a></dt>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<p>A LabelledCollection is a set of objects each with a label associated to it. This class implements many sampling
 								routines.</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>instances</strong> – array-like (np.ndarray, list, or csr_matrix are supported)</p></li>
 								<li><p><strong>labels</strong> – array-like with the same length of instances</p></li>
 								<li><p><strong>classes</strong> – optional, list of classes from which labels are taken. If not specified, the classes are inferred
 								from the labels. The classes must be indicated in cases in which some of the labels might have no examples
 								(i.e., a prevalence of 0)</p></li>
 								</ul>
 								</dd>
 								</dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dl class="py property">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.X">
 								<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">X</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.X" title="Permalink to this definition">¶</a></dt>
 								<dd><p>An alias to self.instances</p>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Returns<span class="colon">:</span></dt>
 								<dd class="field-odd"><p>self.instances</p>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dl class="py property">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.Xp">
 								<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">Xp</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.Xp" title="Permalink to this definition">¶</a></dt>
 								<dd><p>Gets the instances and the true prevalence. This is useful when implementing evaluation protocols from
 								a <cite>LabelledCollection</cite> object.</p>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Returns<span class="colon">:</span></dt>
 								<dd class="field-odd"><p>a tuple <cite>(instances, prevalence)</cite> from this collection</p>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dl class="py property">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.Xy">
 								<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">Xy</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.Xy" title="Permalink to this definition">¶</a></dt>
 								<dd><p>Gets the instances and labels. This is useful when working with <cite>sklearn</cite> estimators, e.g.:</p>
 								<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">svm</span> <span class="o">=</span> <span class="n">LinearSVC</span><span class="p">()</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="o">*</span><span class="n">my_collection</span><span class="o">.</span><span class="n">Xy</span><span class="p">)</span>
 								</pre></div>
 								</div>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Returns<span class="colon">:</span></dt>
 								<dd class="field-odd"><p>a tuple <cite>(instances, labels)</cite> from this collection</p>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py property">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.binary">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">binary</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.binary" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Returns True if the number of classes is 2</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><p>boolean</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.counts">
 								<span class="sig-name descname"><span class="pre">counts</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.counts" title="Permalink to this definition">¶</a></dt>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dd><p>Returns the number of instances for each of the classes in the codeframe.</p>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><p>a np.ndarray of shape <cite>(n_classes)</cite> with the number of instances of each class, in the same order
 								as listed by <cite>self.classes_</cite></p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.kFCV">
 								<span class="sig-name descname"><span class="pre">kFCV</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">nfolds</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nrepeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.kFCV" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Generator of stratified folds to be used in k-fold cross validation.</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>nfolds</strong> – integer (default 5), the number of folds to generate</p></li>
 								<li><p><strong>nrepeats</strong> – integer (default 1), the number of rounds of k-fold cross validation to run</p></li>
 								<li><p><strong>random_state</strong> – integer (default 0), guarantees that the folds generated are reproducible</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>yields <cite>nfolds * nrepeats</cite> folds for k-fold cross validation</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.load">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">load</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">loader_func</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">callable</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classes</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">loader_kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.load" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Loads a labelled set of data and convert it into a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> instance. The function in charge
 								of reading the instances must be specified. This function can be a custom one, or any of the reading functions
 								defined in <a class="reference internal" href="#module-quapy.data.reader" title="quapy.data.reader"><code class="xref py py-mod docutils literal notranslate"><span class="pre">quapy.data.reader</span></code></a> module.</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>path</strong> – string, the path to the file containing the labelled instances</p></li>
 								<li><p><strong>loader_func</strong> – a custom function that implements the data loader and returns a tuple with instances and
 								labels</p></li>
 								<li><p><strong>classes</strong> – array-like, the classes according to which the instances are labelled</p></li>
 								<li><p><strong>loader_kwargs</strong> – any argument that the <cite>loader_func</cite> function needs in order to read the instances, i.e.,
 								these arguments are used to call <cite>loader_func(path, **loader_kwargs)</cite></p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> object</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py property">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.n_classes">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">n_classes</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.n_classes" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>The number of classes</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><p>integer</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dl class="py property">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.p">
 								<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">p</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.p" title="Permalink to this definition">¶</a></dt>
 								<dd><p>An alias to self.prevalence()</p>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Returns<span class="colon">:</span></dt>
 								<dd class="field-odd"><p>self.prevalence()</p>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.prevalence">
 								<span class="sig-name descname"><span class="pre">prevalence</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.prevalence" title="Permalink to this definition">¶</a></dt>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dd><p>Returns the prevalence, or relative frequency, of the classes in the codeframe.</p>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><p>a np.ndarray of shape <cite>(n_classes)</cite> with the relative frequencies of each class, in the same order
 								as listed by <cite>self.classes_</cite></p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.sampling">
 								<span class="sig-name descname"><span class="pre">sampling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shuffle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Return a random sample (an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>) of desired size and desired prevalence
 								values. For each class, the sampling is drawn without replacement if the requested prevalence is larger than
 								the actual prevalence of the class, or with replacement otherwise.</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>size</strong> – integer, the requested size</p></li>
 								<li><p><strong>prevs</strong> – the prevalence for each class; the prevalence value for the last class can be lead empty since
 								it is constrained. E.g., for binary collections, only the prevalence <cite>p</cite> for the first class (as listed in
 								<cite>self.classes_</cite> can be specified, while the other class takes prevalence value <cite>1-p</cite></p></li>
 								<li><p><strong>shuffle</strong> – if set to True (default), shuffles the index before returning it</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> with length == <cite>size</cite> and prevalence close to <cite>prevs</cite> (or
 								prevalence == <cite>prevs</cite> if the exact prevalence values can be met as proportions of instances)</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.sampling_from_index">
 								<span class="sig-name descname"><span class="pre">sampling_from_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">index</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling_from_index" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Returns an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> whose elements are sampled from this collection using the
 								index.</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><p><strong>index</strong> – np.ndarray</p>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a></p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.sampling_index">
 								<span class="sig-name descname"><span class="pre">sampling_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shuffle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling_index" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the
 								prevalence values are not specified, then returns the index of a uniform sampling.
 								For each class, the sampling is drawn without replacement if the requested prevalence is larger than
 								the actual prevalence of the class, or with replacement otherwise.</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>size</strong> – integer, the requested size</p></li>
 								<li><p><strong>prevs</strong> – the prevalence for each class; the prevalence value for the last class can be lead empty since
 								it is constrained. E.g., for binary collections, only the prevalence <cite>p</cite> for the first class (as listed in
 								<cite>self.classes_</cite> can be specified, while the other class takes prevalence value <cite>1-p</cite></p></li>
 								<li><p><strong>shuffle</strong> – if set to True (default), shuffles the index before returning it</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a np.ndarray of shape <cite>(size)</cite> with the indexes</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.split_random">
 								<span class="sig-name descname"><span class="pre">split_random</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">train_prop</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.6</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.split_random" title="Permalink to this definition">¶</a></dt>
 								<dd><p>Returns two instances of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> split randomly from this collection, at desired
 								proportion.</p>
 								<dl class="field-list simple">
 								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
 								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>train_prop</strong> – the proportion of elements to include in the left-most returned collection (typically used
 								as the training collection). The rest of elements are included in the right-most returned collection
 								(typically used as a test collection).</p></li>
 								<li><p><strong>random_state</strong> – if specified, guarantees reproducibility of the split.</p></li>
 								</ul>
 								</dd>
 								<dt class="field-even">Returns<span class="colon">:</span></dt>
 								<dd class="field-even"><p>two instances of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>, the first one with <cite>train_prop</cite> elements, and the
 								second one with <cite>1-train_prop</cite> elements</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.split_stratified">
 								<span class="sig-name descname"><span class="pre">split_stratified</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">train_prop</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.6</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.split_stratified" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Returns two instances of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> split with stratification from this collection, at desired
 								proportion.</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>train_prop</strong> – the proportion of elements to include in the left-most returned collection (typically used
 								as the training collection). The rest of elements are included in the right-most returned collection
 								(typically used as a test collection).</p></li>
 								<li><p><strong>random_state</strong> – if specified, guarantees reproducibility of the split.</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>two instances of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>, the first one with <cite>train_prop</cite> elements, and the
 								second one with <cite>1-train_prop</cite> elements</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.stats">
 								<span class="sig-name descname"><span class="pre">stats</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">show</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.stats" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Returns (and eventually prints) a dictionary with some stats of this collection. E.g.,:</p>
 								<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">&#39;kindle&#39;</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
 								<span class="gp">&gt;&gt;&gt; </span><span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
 								<span class="gp">&gt;&gt;&gt; </span><span class="c1">#instances=3821, type=&lt;class &#39;scipy.sparse.csr.csr_matrix&#39;&gt;, #features=4403, #classes=[0 1], prevs=[0.081, 0.919]</span>
 								</pre></div>
 								</div>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><p><strong>show</strong> – if set to True (default), prints the stats in standard output</p>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a dictionary containing some stats of this collection. Keys include <cite>#instances</cite> (the number of
 								instances), <cite>type</cite> (the type representing the instances), <cite>#features</cite> (the number of features, if the
 								instances are in array-like format), <cite>#classes</cite> (the classes of the collection), <cite>prevs</cite> (the prevalence
 								values for each class)</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.uniform_sampling">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span class="sig-name descname"><span class="pre">uniform_sampling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.uniform_sampling" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Returns a uniform sample (an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>) of desired size. The sampling is drawn
 								without replacement if the requested size is greater than the number of instances, or with replacement
 								otherwise.</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
 								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>size</strong> – integer, the requested size</p></li>
 								<li><p><strong>random_state</strong> – if specified, guarantees reproducibility of the split.</p></li>
 								</ul>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> with length == <cite>size</cite></p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.uniform_sampling_index">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span class="sig-name descname"><span class="pre">uniform_sampling_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.uniform_sampling_index" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn
 								without replacement if the requested size is greater than the number of instances, or with replacement
 								otherwise.</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
 								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>size</strong> – integer, the size of the uniform sample</p></li>
 								<li><p><strong>random_state</strong> – if specified, guarantees reproducibility of the split.</p></li>
 								</ul>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a np.ndarray of shape <cite>(size)</cite> with the indexes</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dl class="py property">
 								<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.y">
 								<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">y</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.y" title="Permalink to this definition">¶</a></dt>
 								<dd><p>An alias to self.labels</p>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Returns<span class="colon">:</span></dt>
 								<dd class="field-odd"><p>self.labels</p>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								</dd></dl>
-												adding documentation, adding brokenbar plots, merging plots from tweetsent with density

											
										
										
											2021-11-22 18:10:48 +01:00
+								</section>
 								<section id="module-quapy.data.datasets">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span id="quapy-data-datasets"></span><h2>quapy.data.datasets<a class="headerlink" href="#module-quapy.data.datasets" title="Permalink to this heading">¶</a></h2>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dl class="py function">
 								<dt class="sig sig-object py" id="quapy.data.datasets.fetch_UCIDataset">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_UCIDataset</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">test_split</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.3</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></span><a class="headerlink" href="#quapy.data.datasets.fetch_UCIDataset" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Loads a UCI dataset as an instance of <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a>, as used in
 								<a class="reference external" href="https://www.sciencedirect.com/science/article/pii/S1566253516300628">Pérez-Gállego, P., Quevedo, J. R., &amp; del Coz, J. J. (2017).
 								Using ensembles for problems with characterizable changes in data distribution: A case study on quantification.
 								Information Fusion, 34, 87-100.</a>
 								and
 								<a class="reference external" href="https://www.sciencedirect.com/science/article/pii/S1566253517303652">Pérez-Gállego, P., Castano, A., Quevedo, J. R., &amp; del Coz, J. J. (2019).
 								Dynamic ensemble selection for quantification tasks.
 								Information Fusion, 45, 1-15.</a>.
 								The datasets do not come with a predefined train-test split (see <a class="reference internal" href="#quapy.data.datasets.fetch_UCILabelledCollection" title="quapy.data.datasets.fetch_UCILabelledCollection"><code class="xref py py-meth docutils literal notranslate"><span class="pre">fetch_UCILabelledCollection()</span></code></a> for further
 								information on how to use these collections), and so a train-test split is generated at desired proportion.
 								The list of valid dataset names can be accessed in <cite>quapy.data.datasets.UCI_DATASETS</cite></p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>dataset_name</strong> – a dataset name</p></li>
 								<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
 								~/quay_data/ directory)</p></li>
 								<li><p><strong>test_split</strong> – proportion of documents to be included in the test set. The rest conforms the training set</p></li>
 								<li><p><strong>verbose</strong> – set to True (default is False) to get information (from the UCI ML repository) about the datasets</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> instance</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py function">
 								<dt class="sig sig-object py" id="quapy.data.datasets.fetch_UCILabelledCollection">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_UCILabelledCollection</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></span><a class="headerlink" href="#quapy.data.datasets.fetch_UCILabelledCollection" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Loads a UCI collection as an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.LabelledCollection</span></code></a>, as used in
 								<a class="reference external" href="https://www.sciencedirect.com/science/article/pii/S1566253516300628">Pérez-Gállego, P., Quevedo, J. R., &amp; del Coz, J. J. (2017).
 								Using ensembles for problems with characterizable changes in data distribution: A case study on quantification.
 								Information Fusion, 34, 87-100.</a>
 								and
 								<a class="reference external" href="https://www.sciencedirect.com/science/article/pii/S1566253517303652">Pérez-Gállego, P., Castano, A., Quevedo, J. R., &amp; del Coz, J. J. (2019).
 								Dynamic ensemble selection for quantification tasks.
 								Information Fusion, 45, 1-15.</a>.
 								The datasets do not come with a predefined train-test split, and so Pérez-Gállego et al. adopted a 5FCVx2 evaluation
 								protocol, meaning that each collection was used to generate two rounds (hence the x2) of 5 fold cross validation.
 								This can be reproduced by using <a class="reference internal" href="#quapy.data.base.Dataset.kFCV" title="quapy.data.base.Dataset.kFCV"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.data.base.Dataset.kFCV()</span></code></a>, e.g.:</p>
 								<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
 								<span class="gp">&gt;&gt;&gt; </span><span class="n">collection</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCILabelledCollection</span><span class="p">(</span><span class="s2">&quot;yeast&quot;</span><span class="p">)</span>
 								<span class="gp">&gt;&gt;&gt; </span><span class="k">for</span> <span class="n">data</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">Dataset</span><span class="o">.</span><span class="n">kFCV</span><span class="p">(</span><span class="n">collection</span><span class="p">,</span> <span class="n">nfolds</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">nrepeats</span><span class="o">=</span><span class="mi">2</span><span class="p">):</span>
 								<span class="gp">&gt;&gt;&gt; </span>    <span class="o">...</span>
 								</pre></div>
 								</div>
 								<p>The list of valid dataset names can be accessed in <cite>quapy.data.datasets.UCI_DATASETS</cite></p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>dataset_name</strong> – a dataset name</p></li>
 								<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
 								~/quay_data/ directory)</p></li>
 								<li><p><strong>test_split</strong> – proportion of documents to be included in the test set. The rest conforms the training set</p></li>
 								<li><p><strong>verbose</strong> – set to True (default is False) to get information (from the UCI ML repository) about the datasets</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> instance</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dl class="py function">
 								<dt class="sig sig-object py" id="quapy.data.datasets.fetch_lequa2022">
 								<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_lequa2022</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">task</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.datasets.fetch_lequa2022" title="Permalink to this definition">¶</a></dt>
 								<dd><p>Loads the official datasets provided for the <a class="reference external" href="https://lequa2022.github.io/index">LeQua</a> competition.
 								In brief, there are 4 tasks (T1A, T1B, T2A, T2B) having to do with text quantification
 								problems. Tasks T1A and T1B provide documents in vector form, while T2A and T2B provide raw documents instead.
 								Tasks T1A and T2A are binary sentiment quantification problems, while T2A and T2B are multiclass quantification
 								problems consisting of estimating the class prevalence values of 28 different merchandise products.
 								We refer to the <a class="reference external" href="https://ceur-ws.org/Vol-3180/paper-146.pdf">Esuli, A., Moreo, A., Sebastiani, F., &amp; Sperduti, G. (2022).
 								A Detailed Overview of LeQua&#64; CLEF 2022: Learning to Quantify.</a> for a detailed description
 								on the tasks and datasets.</p>
 								<p>The datasets are downloaded only once, and stored for fast reuse.</p>
 								<p>See <cite>lequa2022_experiments.py</cite> provided in the example folder, that can serve as a guide on how to use these
 								datasets.</p>
 								<dl class="field-list simple">
 								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
 								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>task</strong> – a string representing the task name; valid ones are T1A, T1B, T2A, and T2B</p></li>
 								<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
 								~/quay_data/ directory)</p></li>
 								</ul>
 								</dd>
 								<dt class="field-even">Returns<span class="colon">:</span></dt>
 								<dd class="field-even"><p>a tuple <cite>(train, val_gen, test_gen)</cite> where <cite>train</cite> is an instance of
 								<a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.LabelledCollection</span></code></a>, <cite>val_gen</cite> and <cite>test_gen</cite> are instances of
 								<code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.protocol.SamplesFromDir</span></code>, i.e., are sampling protocols that return a series of samples
 								labelled by prevalence.</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dl class="py function">
 								<dt class="sig sig-object py" id="quapy.data.datasets.fetch_reviews">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_reviews</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tfidf</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pickle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></span><a class="headerlink" href="#quapy.data.datasets.fetch_reviews" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Loads a Reviews dataset as a Dataset instance, as used in
 								<a class="reference external" href="https://dl.acm.org/doi/abs/10.1145/3269206.3269287">Esuli, A., Moreo, A., and Sebastiani, F. “A recurrent neural network for sentiment quantification.”
 								Proceedings of the 27th ACM International Conference on Information and Knowledge Management. 2018.</a>.
 								The list of valid dataset names can be accessed in <cite>quapy.data.datasets.REVIEWS_SENTIMENT_DATASETS</cite></p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>dataset_name</strong> – the name of the dataset: valid ones are ‘hp’, ‘kindle’, ‘imdb’</p></li>
 								<li><p><strong>tfidf</strong> – set to True to transform the raw documents into tfidf weighted matrices</p></li>
 								<li><p><strong>min_df</strong> – minimun number of documents that should contain a term in order for the term to be
 								kept (ignored if tfidf==False)</p></li>
 								<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
 								~/quay_data/ directory)</p></li>
 								<li><p><strong>pickle</strong> – set to True to pickle the Dataset object the first time it is generated, in order to allow for
 								faster subsequent invokations</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> instance</p>
 								</dd>
 								</dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								</dd></dl>
 								<dl class="py function">
 								<dt class="sig sig-object py" id="quapy.data.datasets.fetch_twitter">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_twitter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">for_model_selection</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pickle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></span><a class="headerlink" href="#quapy.data.datasets.fetch_twitter" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Loads a Twitter dataset as a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> instance, as used in:
 								<a class="reference external" href="https://link.springer.com/content/pdf/10.1007/s13278-016-0327-z.pdf">Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis.
 								Social Network Analysis and Mining6(19), 1–22 (2016)</a>
 								Note that the datasets ‘semeval13’, ‘semeval14’, ‘semeval15’ share the same training set.
 								The list of valid dataset names corresponding to training sets can be accessed in
 								<cite>quapy.data.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN</cite>, while the test sets can be accessed in
 								<cite>quapy.data.datasets.TWITTER_SENTIMENT_DATASETS_TEST</cite></p>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>dataset_name</strong> – the name of the dataset: valid ones are ‘gasp’, ‘hcr’, ‘omd’, ‘sanders’, ‘semeval13’,
 								‘semeval14’, ‘semeval15’, ‘semeval16’, ‘sst’, ‘wa’, ‘wb’</p></li>
 								<li><p><strong>for_model_selection</strong> – if True, then returns the train split as the training set and the devel split
 								as the test set; if False, then returns the train+devel split as the training set and the test set as the
 								test set</p></li>
 								<li><p><strong>min_df</strong> – minimun number of documents that should contain a term in order for the term to be kept</p></li>
 								<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
 								~/quay_data/ directory)</p></li>
 								<li><p><strong>pickle</strong> – set to True to pickle the Dataset object the first time it is generated, in order to allow for
 								faster subsequent invokations</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> instance</p>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								</dd>
 								</dl>
 								</dd></dl>
 								<dl class="py function">
 								<dt class="sig sig-object py" id="quapy.data.datasets.warn">
 								<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">warn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.datasets.warn" title="Permalink to this definition">¶</a></dt>
 								<dd></dd></dl>
-												adding documentation, adding brokenbar plots, merging plots from tweetsent with density

											
										
										
											2021-11-22 18:10:48 +01:00
+								</section>
 								<section id="module-quapy.data.preprocessing">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span id="quapy-data-preprocessing"></span><h2>quapy.data.preprocessing<a class="headerlink" href="#module-quapy.data.preprocessing" title="Permalink to this heading">¶</a></h2>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dl class="py class">
 								<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.data.preprocessing.</span></span><span class="sig-name descname"><span class="pre">IndexTransformer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer" title="Permalink to this definition">¶</a></dt>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<p>This class implements a sklearn’s-style transformer that indexes text as numerical ids for the tokens it
 								contains, and that would be generated by sklearn’s
 								<a class="reference external" href="https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html">CountVectorizer</a></p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
 								<dd class="field-odd"><p><strong>kwargs</strong> – <p>keyworded arguments from
 								<a class="reference external" href="https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html">CountVectorizer</a></p>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								</p>
 								</dd>
 								</dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer.add_word">
 								<span class="sig-name descname"><span class="pre">add_word</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">word</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">id</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nogaps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer.add_word" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Adds a new token (regardless of whether it has been found in the text or not), with dedicated id.
 								Useful to define special tokens for codifying unknown words, or padding tokens.</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>word</strong> – string, surface form of the token</p></li>
 								<li><p><strong>id</strong> – integer, numerical value to assign to the token (leave as None for indicating the next valid id,
 								default)</p></li>
 								<li><p><strong>nogaps</strong> – if set to True (default) asserts that the id indicated leads to no numerical gaps with
 								precedent ids stored so far</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>integer, the numerical id for the new token</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer.fit">
 								<span class="sig-name descname"><span class="pre">fit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer.fit" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Fits the transformer, i.e., decides on the vocabulary, given a list of strings.</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dd class="field-odd"><p><strong>X</strong> – a list of strings</p>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dd class="field-even"><p>self</p>
 								</dd>
 								</dl>
 								</dd></dl>
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer.fit_transform">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span class="sig-name descname"><span class="pre">fit_transform</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer.fit_transform" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Fits the transform on <cite>X</cite> and transforms it.</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>X</strong> – a list of strings</p></li>
 								<li><p><strong>n_jobs</strong> – the number of parallel workers to carry out this task</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a <cite>np.ndarray</cite> of numerical ids</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer.transform">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span class="sig-name descname"><span class="pre">transform</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer.transform" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Transforms the strings in <cite>X</cite> as lists of numerical ids</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>X</strong> – a list of strings</p></li>
 								<li><p><strong>n_jobs</strong> – the number of parallel workers to carry out this task</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a <cite>np.ndarray</cite> of numerical ids</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py method">
 								<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer.vocabulary_size">
 								<span class="sig-name descname"><span class="pre">vocabulary_size</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer.vocabulary_size" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Gets the length of the vocabulary according to which the document tokens have been indexed</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><p>integer</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								</dd></dl>
 								<dl class="py function">
 								<dt class="sig sig-object py" id="quapy.data.preprocessing.index">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span class="sig-prename descclassname"><span class="pre">quapy.data.preprocessing.</span></span><span class="sig-name descname"><span class="pre">index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inplace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.index" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Indexes the tokens of a textual <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> of string documents.
 								To index a document means to replace each different token by a unique numerical index.
 								Rare words (i.e., words occurring less than <cite>min_df</cite> times) are replaced by a special token <cite>UNK</cite></p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>dataset</strong> – a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> object where the instances of training and test documents
 								are lists of str</p></li>
 								<li><p><strong>min_df</strong> – minimum number of occurrences below which the term is replaced by a <cite>UNK</cite> index</p></li>
 								<li><p><strong>inplace</strong> – whether or not to apply the transformation inplace (True), or to a new copy (False, default)</p></li>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<li><p><strong>kwargs</strong> – the rest of parameters of the transformation (as for sklearn’s
 								<cite>CountVectorizer &lt;https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html&gt;_</cite>)</p></li>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
 								<dd class="field-even"><p>a new <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> (if inplace=False) or a reference to the current
 								<a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> (inplace=True) consisting of lists of integer values representing indices.</p>
 								</dd>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								</dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								</dd></dl>
 								<dl class="py function">
 								<dt class="sig sig-object py" id="quapy.data.preprocessing.reduce_columns">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span class="sig-prename descclassname"><span class="pre">quapy.data.preprocessing.</span></span><span class="sig-name descname"><span class="pre">reduce_columns</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inplace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.reduce_columns" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Reduces the dimensionality of the instances, represented as a <cite>csr_matrix</cite> (or any subtype of
 								<cite>scipy.sparse.spmatrix</cite>), of training and test documents by removing the columns of words which are not present
 								in at least <cite>min_df</cite> instances in the training set</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>dataset</strong> – a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> in which instances are represented in sparse format (any
 								subtype of scipy.sparse.spmatrix)</p></li>
 								<li><p><strong>min_df</strong> – integer, minimum number of instances below which the columns are removed</p></li>
 								<li><p><strong>inplace</strong> – whether or not to apply the transformation inplace (True), or to a new copy (False, default)</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a new <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> (if inplace=False) or a reference to the current
 								<a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> (inplace=True) where the dimensions corresponding to infrequent terms
 								in the training set have been removed</p>
 								</dd>
 								</dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								</dd></dl>
 								<dl class="py function">
 								<dt class="sig sig-object py" id="quapy.data.preprocessing.standardize">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span class="sig-prename descclassname"><span class="pre">quapy.data.preprocessing.</span></span><span class="sig-name descname"><span class="pre">standardize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">inplace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.standardize" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Standardizes the real-valued columns of a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a>.
 								Standardization, aka z-scoring, of a variable <cite>X</cite> comes down to subtracting the average and normalizing by the
 								standard deviation.</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>dataset</strong> – a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> object</p></li>
 								<li><p><strong>inplace</strong> – set to True if the transformation is to be applied inplace, or to False (default) if a new
 								<a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> is to be returned</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
 								<dd class="field-even"><p>an instance of <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a></p>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py function">
 								<dt class="sig sig-object py" id="quapy.data.preprocessing.text2tfidf">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span class="sig-prename descclassname"><span class="pre">quapy.data.preprocessing.</span></span><span class="sig-name descname"><span class="pre">text2tfidf</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">3</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">sublinear_tf</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inplace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.preprocessing.text2tfidf" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Transforms a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> of textual instances into a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> of
 								tfidf weighted sparse vectors</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>dataset</strong> – a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> where the instances of training and test collections are
 								lists of str</p></li>
 								<li><p><strong>min_df</strong> – minimum number of occurrences for a word to be considered as part of the vocabulary (default 3)</p></li>
 								<li><p><strong>sublinear_tf</strong> – whether or not to apply the log scalling to the tf counters (default True)</p></li>
 								<li><p><strong>inplace</strong> – whether or not to apply the transformation inplace (True), or to a new copy (False, default)</p></li>
 								<li><p><strong>kwargs</strong> – the rest of parameters of the transformation (as for sklearn’s
 								<a class="reference external" href="https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html">TfidfVectorizer</a>)</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a new <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> in <cite>csr_matrix</cite> format (if inplace=False) or a reference to the
 								current Dataset (if inplace=True) where the instances are stored in a <cite>csr_matrix</cite> of real-valued tfidf scores</p>
 								</dd>
 								</dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								</dd></dl>
-												adding documentation, adding brokenbar plots, merging plots from tweetsent with density

											
										
										
											2021-11-22 18:10:48 +01:00
+								</section>
 								<section id="module-quapy.data.reader">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span id="quapy-data-reader"></span><h2>quapy.data.reader<a class="headerlink" href="#module-quapy.data.reader" title="Permalink to this heading">¶</a></h2>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dl class="py function">
 								<dt class="sig sig-object py" id="quapy.data.reader.binarize">
 								<span class="sig-prename descclassname"><span class="pre">quapy.data.reader.</span></span><span class="sig-name descname"><span class="pre">binarize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pos_class</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.reader.binarize" title="Permalink to this definition">¶</a></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd><p>Binarizes a categorical array-like collection of labels towards the positive class <cite>pos_class</cite>. E.g.,:</p>
 								<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">binarize</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
 								<span class="gp">&gt;&gt;&gt; </span><span class="n">array</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">])</span>
 								</pre></div>
 								</div>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>y</strong> – array-like of labels</p></li>
 								<li><p><strong>pos_class</strong> – integer, the positive class</p></li>
 								</ul>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a binary np.ndarray, in which values 1 corresponds to positions in whcih <cite>y</cite> had <cite>pos_class</cite> labels, and
 otherwise</p>
 								</dd>
 								</dl>
 								</dd></dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								<dl class="py function">
 								<dt class="sig sig-object py" id="quapy.data.reader.from_csv">
 								<span class="sig-prename descclassname"><span class="pre">quapy.data.reader.</span></span><span class="sig-name descname"><span class="pre">from_csv</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoding</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'utf-8'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.reader.from_csv" title="Permalink to this definition">¶</a></dt>
 								<dd><p>Reads a csv file in which columns are separated by ‘,’.
 								File format &lt;label&gt;,&lt;feat1&gt;,&lt;feat2&gt;,…,&lt;featn&gt;</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>path</strong> – path to the csv file</p></li>
 								<li><p><strong>encoding</strong> – the text encoding used to open the file</p></li>
 								</ul>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a np.ndarray for the labels and a ndarray (float) for the covariates</p>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								</dd>
 								</dl>
 								</dd></dl>
 								<dl class="py function">
 								<dt class="sig sig-object py" id="quapy.data.reader.from_sparse">
 								<span class="sig-prename descclassname"><span class="pre">quapy.data.reader.</span></span><span class="sig-name descname"><span class="pre">from_sparse</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.reader.from_sparse" title="Permalink to this definition">¶</a></dt>
 								<dd><p>Reads a labelled collection of real-valued instances expressed in sparse format
 								File format &lt;-1 or 0 or 1&gt;[s col(int):val(float)]</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dd class="field-odd"><p><strong>path</strong> – path to the labelled collection</p>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a <cite>csr_matrix</cite> containing the instances (rows), and a ndarray containing the labels</p>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								</dd>
 								</dl>
 								</dd></dl>
 								<dl class="py function">
 								<dt class="sig sig-object py" id="quapy.data.reader.from_text">
 								<span class="sig-prename descclassname"><span class="pre">quapy.data.reader.</span></span><span class="sig-name descname"><span class="pre">from_text</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoding</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'utf-8'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">class2int</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.reader.from_text" title="Permalink to this definition">¶</a></dt>
 								<dd><p>Reads a labelled colletion of documents.
 								File fomart &lt;0 or 1&gt;        &lt;document&gt;</p>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><ul class="simple">
 								<li><p><strong>path</strong> – path to the labelled collection</p></li>
 								<li><p><strong>encoding</strong> – the text encoding used to open the file</p></li>
 								<li><p><strong>verbose</strong> – if &gt;0 (default) shows some progress information in standard output</p></li>
 								</ul>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<dd class="field-even"><p>a list of sentences, and a list of labels</p>
 								</dd>
 								</dl>
 								</dd></dl>
 								<dl class="py function">
 								<dt class="sig sig-object py" id="quapy.data.reader.reindex_labels">
 								<span class="sig-prename descclassname"><span class="pre">quapy.data.reader.</span></span><span class="sig-name descname"><span class="pre">reindex_labels</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.reader.reindex_labels" title="Permalink to this definition">¶</a></dt>
 								<dd><p>Re-indexes a list of labels as a list of indexes, and returns the classnames corresponding to the indexes.
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								E.g.:</p>
 								<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">reindex_labels</span><span class="p">([</span><span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">])</span>
 								<span class="gp">&gt;&gt;&gt; </span><span class="p">(</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">]),</span> <span class="n">array</span><span class="p">([</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="s1">&#39;&lt;U1&#39;</span><span class="p">))</span>
 								</pre></div>
 								</div>
 								<dl class="field-list simple">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-odd"><p><strong>y</strong> – the list or array of original labels</p>
 								</dd>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<dt class="field-even">Returns<span class="colon">:</span></dt>
-												updating the documentation

											
										
										
											2021-12-06 18:25:47 +01:00
+								<dd class="field-even"><p>a ndarray (int) of class indexes, and a ndarray of classnames corresponding to the indexes.</p>
 								</dd>
 								</dl>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								</dd></dl>
-												adding documentation, adding brokenbar plots, merging plots from tweetsent with density

											
										
										
											2021-11-22 18:10:48 +01:00
+								</section>
 								<section id="module-quapy.data">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<span id="module-contents"></span><h2>Module contents<a class="headerlink" href="#module-quapy.data" title="Permalink to this heading">¶</a></h2>
-												adding documentation, adding brokenbar plots, merging plots from tweetsent with density

											
										
										
											2021-11-22 18:10:48 +01:00
+								</section>
 								</section>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
 								            <div class="clearer"></div>
 								          </div>
 								        </div>
 								      </div>
 								      <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
 								        <div class="sphinxsidebarwrapper">
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								  <div>
 								    <h3><a href="index.html">Table of Contents</a></h3>
 								    <ul>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<li><a class="reference internal" href="#">quapy.data package</a><ul>
 								<li><a class="reference internal" href="#submodules">Submodules</a></li>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<li><a class="reference internal" href="#module-quapy.data.base">quapy.data.base</a></li>
 								<li><a class="reference internal" href="#module-quapy.data.datasets">quapy.data.datasets</a></li>
 								<li><a class="reference internal" href="#module-quapy.data.preprocessing">quapy.data.preprocessing</a></li>
 								<li><a class="reference internal" href="#module-quapy.data.reader">quapy.data.reader</a></li>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								<li><a class="reference internal" href="#module-quapy.data">Module contents</a></li>
 								</ul>
 								</li>
 								</ul>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								  </div>
 								  <div>
 								    <h4>Previous topic</h4>
 								    <p class="topless"><a href="quapy.classification.html"
 								                          title="previous chapter">quapy.classification package</a></p>
 								  </div>
 								  <div>
 								    <h4>Next topic</h4>
 								    <p class="topless"><a href="quapy.method.html"
 								                          title="next chapter">quapy.method package</a></p>
 								  </div>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								  <div role="note" aria-label="source link">
 								    <h3>This Page</h3>
 								    <ul class="this-page-menu">
 								      <li><a href="_sources/quapy.data.rst.txt"
 								            rel="nofollow">Show Source</a></li>
 								    </ul>
 								   </div>
 								<div id="searchbox" style="display: none" role="search">
 								  <h3 id="searchlabel">Quick search</h3>
 								    <div class="searchformwrapper">
 								    <form class="search" action="search.html" method="get">
 								      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
 								      <input type="submit" value="Go" />
 								    </form>
 								    </div>
 								</div>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								<script>document.getElementById('searchbox').style.display = "block"</script>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								        </div>
 								      </div>
 								      <div class="clearer"></div>
 								    </div>
 								    <div class="related" role="navigation" aria-label="related navigation">
 								      <h3>Navigation</h3>
 								      <ul>
 								        <li class="right" style="margin-right: 10px">
 								          <a href="genindex.html" title="General Index"
 								             >index</a></li>
 								        <li class="right" >
 								          <a href="py-modindex.html" title="Python Module Index"
 								             >modules</a> |</li>
 								        <li class="right" >
 								          <a href="quapy.method.html" title="quapy.method package"
 								             >next</a> |</li>
 								        <li class="right" >
 								          <a href="quapy.classification.html" title="quapy.classification package"
 								             >previous</a> |</li>
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								        <li class="nav-item nav-item-0"><a href="index.html">QuaPy 0.1.7 documentation</a> &#187;</li>
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								          <li class="nav-item nav-item-1"><a href="modules.html" >quapy</a> &#187;</li>
 								          <li class="nav-item nav-item-2"><a href="quapy.html" >quapy package</a> &#187;</li>
 								        <li class="nav-item nav-item-this"><a href="">quapy.data package</a></li>
 								      </ul>
 								    </div>
 								    <div class="footer" role="contentinfo">
 								        &#169; Copyright 2021, Alejandro Moreo.
-												adding documentation and adding one new example

											
										
										
											2023-02-08 19:06:53 +01:00
+								      Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 5.3.0.
-												doc with sphinx

											
										
										
											2021-11-09 15:50:53 +01:00
+								    </div>
 								  </body>
 								</html>