forked from moreo/QuaPy
1208 lines
122 KiB
HTML
1208 lines
122 KiB
HTML
<!DOCTYPE html>
|
||
<html class="writer-html5" lang="en" data-content_root="./">
|
||
<head>
|
||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||
<title>quapy.data package — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=92fd9be5" />
|
||
<link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=19f00094" />
|
||
|
||
|
||
<!--[if lt IE 9]>
|
||
<script src="_static/js/html5shiv.min.js"></script>
|
||
<![endif]-->
|
||
|
||
<script src="_static/jquery.js?v=5d32c60e"></script>
|
||
<script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||
<script src="_static/documentation_options.js?v=22607128"></script>
|
||
<script src="_static/doctools.js?v=9a2dae69"></script>
|
||
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
|
||
<script src="_static/js/theme.js"></script>
|
||
<link rel="index" title="Index" href="genindex.html" />
|
||
<link rel="search" title="Search" href="search.html" />
|
||
<link rel="next" title="quapy.method package" href="quapy.method.html" />
|
||
<link rel="prev" title="quapy.classification package" href="quapy.classification.html" />
|
||
</head>
|
||
|
||
<body class="wy-body-for-nav">
|
||
<div class="wy-grid-for-nav">
|
||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||
<div class="wy-side-scroll">
|
||
<div class="wy-side-nav-search" >
|
||
|
||
|
||
|
||
<a href="index.html" class="icon icon-home">
|
||
QuaPy: A Python-based open-source framework for quantification
|
||
</a>
|
||
<div role="search">
|
||
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
|
||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||
<input type="hidden" name="check_keywords" value="yes" />
|
||
<input type="hidden" name="area" value="default" />
|
||
</form>
|
||
</div>
|
||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||
<ul class="current">
|
||
<li class="toctree-l1 current"><a class="reference internal" href="modules.html">quapy</a><ul class="current">
|
||
<li class="toctree-l2 current"><a class="reference internal" href="quapy.html">quapy package</a><ul class="current">
|
||
<li class="toctree-l3 current"><a class="reference internal" href="quapy.html#subpackages">Subpackages</a><ul class="current">
|
||
<li class="toctree-l4"><a class="reference internal" href="quapy.classification.html">quapy.classification package</a></li>
|
||
<li class="toctree-l4 current"><a class="current reference internal" href="#">quapy.data package</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="quapy.method.html">quapy.method package</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l3"><a class="reference internal" href="quapy.html#submodules">Submodules</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="quapy.html#module-quapy.error">quapy.error module</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="quapy.html#module-quapy.evaluation">quapy.evaluation module</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="quapy.html#module-quapy.functional">quapy.functional module</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="quapy.html#module-quapy.model_selection">quapy.model_selection module</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="quapy.html#module-quapy.plot">quapy.plot module</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="quapy.html#module-quapy.protocol">quapy.protocol module</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="quapy.html#module-quapy.util">quapy.util module</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="quapy.html#module-quapy">Module contents</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
|
||
</div>
|
||
</div>
|
||
</nav>
|
||
|
||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||
<a href="index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||
</nav>
|
||
|
||
<div class="wy-nav-content">
|
||
<div class="rst-content">
|
||
<div role="navigation" aria-label="Page navigation">
|
||
<ul class="wy-breadcrumbs">
|
||
<li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||
<li class="breadcrumb-item"><a href="modules.html">quapy</a></li>
|
||
<li class="breadcrumb-item"><a href="quapy.html">quapy package</a></li>
|
||
<li class="breadcrumb-item active">quapy.data package</li>
|
||
<li class="wy-breadcrumbs-aside">
|
||
<a href="_sources/quapy.data.rst.txt" rel="nofollow"> View page source</a>
|
||
</li>
|
||
</ul>
|
||
<hr/>
|
||
</div>
|
||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||
<div itemprop="articleBody">
|
||
|
||
<section id="quapy-data-package">
|
||
<h1>quapy.data package<a class="headerlink" href="#quapy-data-package" title="Link to this heading"></a></h1>
|
||
<section id="submodules">
|
||
<h2>Submodules<a class="headerlink" href="#submodules" title="Link to this heading"></a></h2>
|
||
</section>
|
||
<section id="module-quapy.data.base">
|
||
<span id="quapy-data-base-module"></span><h2>quapy.data.base module<a class="headerlink" href="#module-quapy.data.base" title="Link to this heading"></a></h2>
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="quapy.data.base.Dataset">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.data.base.</span></span><span class="sig-name descname"><span class="pre">Dataset</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">training</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocabulary</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#Dataset"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.Dataset" title="Link to this definition"></a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
|
||
<p>Abstraction of training and test <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> objects.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>training</strong> – a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> instance</p></li>
|
||
<li><p><strong>test</strong> – a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> instance</p></li>
|
||
<li><p><strong>vocabulary</strong> – if indicated, is a dictionary of the terms used in this textual dataset</p></li>
|
||
<li><p><strong>name</strong> – a string representing the name of the dataset</p></li>
|
||
</ul>
|
||
</dd>
|
||
</dl>
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.Dataset.SplitStratified">
|
||
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">SplitStratified</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">collection</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">train_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.6</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#Dataset.SplitStratified"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.Dataset.SplitStratified" title="Link to this definition"></a></dt>
|
||
<dd><p>Generates a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a> from a stratified split of a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> instance.
|
||
See <a class="reference internal" href="#quapy.data.base.LabelledCollection.split_stratified" title="quapy.data.base.LabelledCollection.split_stratified"><code class="xref py py-meth docutils literal notranslate"><span class="pre">LabelledCollection.split_stratified()</span></code></a></p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>collection</strong> – <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a></p></li>
|
||
<li><p><strong>train_size</strong> – the proportion of training documents (the rest conforms the test split)</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>an instance of <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a></p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="quapy.data.base.Dataset.binary">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">binary</span></span><a class="headerlink" href="#quapy.data.base.Dataset.binary" title="Link to this definition"></a></dt>
|
||
<dd><p>Returns True if the training collection is labelled according to two classes</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>boolean</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="quapy.data.base.Dataset.classes_">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">classes_</span></span><a class="headerlink" href="#quapy.data.base.Dataset.classes_" title="Link to this definition"></a></dt>
|
||
<dd><p>The classes according to which the training collection is labelled</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>The classes according to which the training collection is labelled</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.Dataset.kFCV">
|
||
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">kFCV</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">nfolds</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nrepeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#Dataset.kFCV"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.Dataset.kFCV" title="Link to this definition"></a></dt>
|
||
<dd><p>Generator of stratified folds to be used in k-fold cross validation. This function is only a wrapper around
|
||
<a class="reference internal" href="#quapy.data.base.LabelledCollection.kFCV" title="quapy.data.base.LabelledCollection.kFCV"><code class="xref py py-meth docutils literal notranslate"><span class="pre">LabelledCollection.kFCV()</span></code></a> that returns <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a> instances made of training and test folds.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>nfolds</strong> – integer (default 5), the number of folds to generate</p></li>
|
||
<li><p><strong>nrepeats</strong> – integer (default 1), the number of rounds of k-fold cross validation to run</p></li>
|
||
<li><p><strong>random_state</strong> – integer (default 0), guarantees that the folds generated are reproducible</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>yields <cite>nfolds * nrepeats</cite> folds for k-fold cross validation as instances of <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a></p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.Dataset.load">
|
||
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">load</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">train_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">test_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">loader_func</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">callable</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classes</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">loader_kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#Dataset.load"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.Dataset.load" title="Link to this definition"></a></dt>
|
||
<dd><p>Loads a training and a test labelled set of data and convert it into a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a> instance.
|
||
The function in charge of reading the instances must be specified. This function can be a custom one, or any of
|
||
the reading functions defined in <a class="reference internal" href="#module-quapy.data.reader" title="quapy.data.reader"><code class="xref py py-mod docutils literal notranslate"><span class="pre">quapy.data.reader</span></code></a> module.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>train_path</strong> – string, the path to the file containing the training instances</p></li>
|
||
<li><p><strong>test_path</strong> – string, the path to the file containing the test instances</p></li>
|
||
<li><p><strong>loader_func</strong> – a custom function that implements the data loader and returns a tuple with instances and
|
||
labels</p></li>
|
||
<li><p><strong>classes</strong> – array-like, the classes according to which the instances are labelled</p></li>
|
||
<li><p><strong>loader_kwargs</strong> – any argument that the <cite>loader_func</cite> function needs in order to read the instances.
|
||
See <a class="reference internal" href="#quapy.data.base.LabelledCollection.load" title="quapy.data.base.LabelledCollection.load"><code class="xref py py-meth docutils literal notranslate"><span class="pre">LabelledCollection.load()</span></code></a> for further details.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a> object</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="quapy.data.base.Dataset.n_classes">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">n_classes</span></span><a class="headerlink" href="#quapy.data.base.Dataset.n_classes" title="Link to this definition"></a></dt>
|
||
<dd><p>The number of classes according to which the training collection is labelled</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>integer</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.Dataset.reduce">
|
||
<span class="sig-name descname"><span class="pre">reduce</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n_train</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_test</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#Dataset.reduce"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.Dataset.reduce" title="Link to this definition"></a></dt>
|
||
<dd><p>Reduce the number of instances in place for quick experiments. Preserves the prevalence of each set.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>n_train</strong> – number of training documents to keep (default 100)</p></li>
|
||
<li><p><strong>n_test</strong> – number of test documents to keep (default 100)</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>self</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.Dataset.stats">
|
||
<span class="sig-name descname"><span class="pre">stats</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">show</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#Dataset.stats"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.Dataset.stats" title="Link to this definition"></a></dt>
|
||
<dd><p>Returns (and eventually prints) a dictionary with some stats of this dataset. E.g.,:</p>
|
||
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'kindle'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
|
||
<span class="gp">>>> </span><span class="n">data</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
|
||
<span class="gp">>>> </span><span class="n">Dataset</span><span class="o">=</span><span class="n">kindle</span> <span class="c1">#tr-instances=3821, #te-instances=21591, type=<class 'scipy.sparse.csr.csr_matrix'>, #features=4403, #classes=[0 1], tr-prevs=[0.081, 0.919], te-prevs=[0.063, 0.937]</span>
|
||
</pre></div>
|
||
</div>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>show</strong> – if set to True (default), prints the stats in standard output</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a dictionary containing some stats of this collection for the training and test collections. The keys
|
||
are <cite>train</cite> and <cite>test</cite>, and point to dedicated dictionaries of stats, for each collection, with keys
|
||
<cite>#instances</cite> (the number of instances), <cite>type</cite> (the type representing the instances),
|
||
<cite>#features</cite> (the number of features, if the instances are in array-like format), <cite>#classes</cite> (the classes of
|
||
the collection), <cite>prevs</cite> (the prevalence values for each class)</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="quapy.data.base.Dataset.train_test">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">train_test</span></span><a class="headerlink" href="#quapy.data.base.Dataset.train_test" title="Link to this definition"></a></dt>
|
||
<dd><p>Alias to <cite>self.training</cite> and <cite>self.test</cite></p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>the training and test collections</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>the training and test collections</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="quapy.data.base.Dataset.vocabulary_size">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">vocabulary_size</span></span><a class="headerlink" href="#quapy.data.base.Dataset.vocabulary_size" title="Link to this definition"></a></dt>
|
||
<dd><p>If the dataset is textual, and the vocabulary was indicated, returns the size of the vocabulary</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>integer</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.data.base.</span></span><span class="sig-name descname"><span class="pre">LabelledCollection</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">labels</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classes</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#LabelledCollection"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.LabelledCollection" title="Link to this definition"></a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
|
||
<p>A LabelledCollection is a set of objects each with a label attached to each of them.
|
||
This class implements several sampling routines and other utilities.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>instances</strong> – array-like (np.ndarray, list, or csr_matrix are supported)</p></li>
|
||
<li><p><strong>labels</strong> – array-like with the same length of instances</p></li>
|
||
<li><p><strong>classes</strong> – optional, list of classes from which labels are taken. If not specified, the classes are inferred
|
||
from the labels. The classes must be indicated in cases in which some of the labels might have no examples
|
||
(i.e., a prevalence of 0)</p></li>
|
||
</ul>
|
||
</dd>
|
||
</dl>
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.X">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">X</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.X" title="Link to this definition"></a></dt>
|
||
<dd><p>An alias to self.instances</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>self.instances</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.Xp">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">Xp</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.Xp" title="Link to this definition"></a></dt>
|
||
<dd><p>Gets the instances and the true prevalence. This is useful when implementing evaluation protocols from
|
||
a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> object.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>a tuple <cite>(instances, prevalence)</cite> from this collection</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.Xy">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">Xy</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.Xy" title="Link to this definition"></a></dt>
|
||
<dd><p>Gets the instances and labels. This is useful when working with <cite>sklearn</cite> estimators, e.g.:</p>
|
||
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">svm</span> <span class="o">=</span> <span class="n">LinearSVC</span><span class="p">()</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="o">*</span><span class="n">my_collection</span><span class="o">.</span><span class="n">Xy</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>a tuple <cite>(instances, labels)</cite> from this collection</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.binary">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">binary</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.binary" title="Link to this definition"></a></dt>
|
||
<dd><p>Returns True if the number of classes is 2</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>boolean</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.counts">
|
||
<span class="sig-name descname"><span class="pre">counts</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#LabelledCollection.counts"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.LabelledCollection.counts" title="Link to this definition"></a></dt>
|
||
<dd><p>Returns the number of instances for each of the classes in the codeframe.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>a np.ndarray of shape <cite>(n_classes)</cite> with the number of instances of each class, in the same order
|
||
as listed by <cite>self.classes_</cite></p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.join">
|
||
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">join</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Iterable</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#LabelledCollection.join"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.LabelledCollection.join" title="Link to this definition"></a></dt>
|
||
<dd><p>Returns a new <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> as the union of the collections given in input.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>args</strong> – instances of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a></p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> representing the union of both collections</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.kFCV">
|
||
<span class="sig-name descname"><span class="pre">kFCV</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">nfolds</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nrepeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#LabelledCollection.kFCV"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.LabelledCollection.kFCV" title="Link to this definition"></a></dt>
|
||
<dd><p>Generator of stratified folds to be used in k-fold cross validation.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>nfolds</strong> – integer (default 5), the number of folds to generate</p></li>
|
||
<li><p><strong>nrepeats</strong> – integer (default 1), the number of rounds of k-fold cross validation to run</p></li>
|
||
<li><p><strong>random_state</strong> – integer (default 0), guarantees that the folds generated are reproducible</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>yields <cite>nfolds * nrepeats</cite> folds for k-fold cross validation</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.load">
|
||
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">load</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">loader_func</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">callable</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classes</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">loader_kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#LabelledCollection.load"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.LabelledCollection.load" title="Link to this definition"></a></dt>
|
||
<dd><p>Loads a labelled set of data and convert it into a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> instance. The function in charge
|
||
of reading the instances must be specified. This function can be a custom one, or any of the reading functions
|
||
defined in <a class="reference internal" href="#module-quapy.data.reader" title="quapy.data.reader"><code class="xref py py-mod docutils literal notranslate"><span class="pre">quapy.data.reader</span></code></a> module.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>path</strong> – string, the path to the file containing the labelled instances</p></li>
|
||
<li><p><strong>loader_func</strong> – a custom function that implements the data loader and returns a tuple with instances and
|
||
labels</p></li>
|
||
<li><p><strong>classes</strong> – array-like, the classes according to which the instances are labelled</p></li>
|
||
<li><p><strong>loader_kwargs</strong> – any argument that the <cite>loader_func</cite> function needs in order to read the instances, i.e.,
|
||
these arguments are used to call <cite>loader_func(path, **loader_kwargs)</cite></p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> object</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.n_classes">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">n_classes</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.n_classes" title="Link to this definition"></a></dt>
|
||
<dd><p>The number of classes</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>integer</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.p">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">p</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.p" title="Link to this definition"></a></dt>
|
||
<dd><p>An alias to self.prevalence()</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>self.prevalence()</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.prevalence">
|
||
<span class="sig-name descname"><span class="pre">prevalence</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#LabelledCollection.prevalence"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.LabelledCollection.prevalence" title="Link to this definition"></a></dt>
|
||
<dd><p>Returns the prevalence, or relative frequency, of the classes in the codeframe.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>a np.ndarray of shape <cite>(n_classes)</cite> with the relative frequencies of each class, in the same order
|
||
as listed by <cite>self.classes_</cite></p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.sampling">
|
||
<span class="sig-name descname"><span class="pre">sampling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shuffle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#LabelledCollection.sampling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling" title="Link to this definition"></a></dt>
|
||
<dd><p>Return a random sample (an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>) of desired size and desired prevalence
|
||
values. For each class, the sampling is drawn without replacement if the requested prevalence is larger than
|
||
the actual prevalence of the class, or with replacement otherwise.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>size</strong> – integer, the requested size</p></li>
|
||
<li><p><strong>prevs</strong> – the prevalence for each class; the prevalence value for the last class can be lead empty since
|
||
it is constrained. E.g., for binary collections, only the prevalence <cite>p</cite> for the first class (as listed in
|
||
<cite>self.classes_</cite> can be specified, while the other class takes prevalence value <cite>1-p</cite></p></li>
|
||
<li><p><strong>shuffle</strong> – if set to True (default), shuffles the index before returning it</p></li>
|
||
<li><p><strong>random_state</strong> – seed for reproducing sampling</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> with length == <cite>size</cite> and prevalence close to <cite>prevs</cite> (or
|
||
prevalence == <cite>prevs</cite> if the exact prevalence values can be met as proportions of instances)</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.sampling_from_index">
|
||
<span class="sig-name descname"><span class="pre">sampling_from_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">index</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#LabelledCollection.sampling_from_index"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling_from_index" title="Link to this definition"></a></dt>
|
||
<dd><p>Returns an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> whose elements are sampled from this collection using the
|
||
index.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>index</strong> – np.ndarray</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a></p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.sampling_index">
|
||
<span class="sig-name descname"><span class="pre">sampling_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shuffle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#LabelledCollection.sampling_index"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling_index" title="Link to this definition"></a></dt>
|
||
<dd><p>Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the
|
||
prevalence values are not specified, then returns the index of a uniform sampling.
|
||
For each class, the sampling is drawn with replacement if the requested prevalence is larger than
|
||
the actual prevalence of the class, or without replacement otherwise.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>size</strong> – integer, the requested size</p></li>
|
||
<li><p><strong>prevs</strong> – the prevalence for each class; the prevalence value for the last class can be lead empty since
|
||
it is constrained. E.g., for binary collections, only the prevalence <cite>p</cite> for the first class (as listed in
|
||
<cite>self.classes_</cite> can be specified, while the other class takes prevalence value <cite>1-p</cite></p></li>
|
||
<li><p><strong>shuffle</strong> – if set to True (default), shuffles the index before returning it</p></li>
|
||
<li><p><strong>random_state</strong> – seed for reproducing sampling</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a np.ndarray of shape <cite>(size)</cite> with the indexes</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.split_random">
|
||
<span class="sig-name descname"><span class="pre">split_random</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">train_prop</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.6</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#LabelledCollection.split_random"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.LabelledCollection.split_random" title="Link to this definition"></a></dt>
|
||
<dd><p>Returns two instances of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> split randomly from this collection, at desired
|
||
proportion.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>train_prop</strong> – the proportion of elements to include in the left-most returned collection (typically used
|
||
as the training collection). The rest of elements are included in the right-most returned collection
|
||
(typically used as a test collection).</p></li>
|
||
<li><p><strong>random_state</strong> – if specified, guarantees reproducibility of the split.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>two instances of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>, the first one with <cite>train_prop</cite> elements, and the
|
||
second one with <cite>1-train_prop</cite> elements</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.split_stratified">
|
||
<span class="sig-name descname"><span class="pre">split_stratified</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">train_prop</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.6</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#LabelledCollection.split_stratified"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.LabelledCollection.split_stratified" title="Link to this definition"></a></dt>
|
||
<dd><p>Returns two instances of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> split with stratification from this collection, at desired
|
||
proportion.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>train_prop</strong> – the proportion of elements to include in the left-most returned collection (typically used
|
||
as the training collection). The rest of elements are included in the right-most returned collection
|
||
(typically used as a test collection).</p></li>
|
||
<li><p><strong>random_state</strong> – if specified, guarantees reproducibility of the split.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>two instances of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>, the first one with <cite>train_prop</cite> elements, and the
|
||
second one with <cite>1-train_prop</cite> elements</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.stats">
|
||
<span class="sig-name descname"><span class="pre">stats</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">show</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#LabelledCollection.stats"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.LabelledCollection.stats" title="Link to this definition"></a></dt>
|
||
<dd><p>Returns (and eventually prints) a dictionary with some stats of this collection. E.g.,:</p>
|
||
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'kindle'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
|
||
<span class="gp">>>> </span><span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
|
||
<span class="gp">>>> </span><span class="c1">#instances=3821, type=<class 'scipy.sparse.csr.csr_matrix'>, #features=4403, #classes=[0 1], prevs=[0.081, 0.919]</span>
|
||
</pre></div>
|
||
</div>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>show</strong> – if set to True (default), prints the stats in standard output</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a dictionary containing some stats of this collection. Keys include <cite>#instances</cite> (the number of
|
||
instances), <cite>type</cite> (the type representing the instances), <cite>#features</cite> (the number of features, if the
|
||
instances are in array-like format), <cite>#classes</cite> (the classes of the collection), <cite>prevs</cite> (the prevalence
|
||
values for each class)</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.uniform_sampling">
|
||
<span class="sig-name descname"><span class="pre">uniform_sampling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#LabelledCollection.uniform_sampling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.LabelledCollection.uniform_sampling" title="Link to this definition"></a></dt>
|
||
<dd><p>Returns a uniform sample (an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>) of desired size. The sampling is drawn
|
||
with replacement if the requested size is greater than the number of instances, or without replacement
|
||
otherwise.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>size</strong> – integer, the requested size</p></li>
|
||
<li><p><strong>random_state</strong> – if specified, guarantees reproducibility of the split.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> with length == <cite>size</cite></p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.uniform_sampling_index">
|
||
<span class="sig-name descname"><span class="pre">uniform_sampling_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/base.html#LabelledCollection.uniform_sampling_index"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.base.LabelledCollection.uniform_sampling_index" title="Link to this definition"></a></dt>
|
||
<dd><p>Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn
|
||
with replacement if the requested size is greater than the number of instances, or without replacement
|
||
otherwise.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>size</strong> – integer, the size of the uniform sample</p></li>
|
||
<li><p><strong>random_state</strong> – if specified, guarantees reproducibility of the split.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a np.ndarray of shape <cite>(size)</cite> with the indexes</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.y">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">y</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.y" title="Link to this definition"></a></dt>
|
||
<dd><p>An alias to self.labels</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>self.labels</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
</section>
|
||
<section id="module-quapy.data.datasets">
|
||
<span id="quapy-data-datasets-module"></span><h2>quapy.data.datasets module<a class="headerlink" href="#module-quapy.data.datasets" title="Link to this heading"></a></h2>
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.datasets.fetch_IFCB">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_IFCB</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">single_sample_train</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">for_model_selection</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/datasets.html#fetch_IFCB"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.datasets.fetch_IFCB" title="Link to this definition"></a></dt>
|
||
<dd><p>Loads the IFCB dataset for quantification from <a class="reference external" href="https://zenodo.org/records/10036244">Zenodo</a> (for more
|
||
information on this dataset, please follow the zenodo link).
|
||
This dataset is based on the data available publicly at
|
||
<a class="reference external" href="https://github.com/hsosik/WHOI-Plankton">WHOI-Plankton repo</a>.
|
||
The scripts for the processing are available at <a class="reference external" href="https://github.com/pglez82/IFCB_Zenodo">P. González’s repo</a>.
|
||
Basically, this is the IFCB dataset with precomputed features for testing quantification algorithms.</p>
|
||
<p>The datasets are downloaded only once, and stored for fast reuse.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>single_sample_train</strong> – a boolean. If true, it will return the train dataset as a
|
||
<a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.LabelledCollection</span></code></a> (all examples together).
|
||
If false, a generator of training samples will be returned. Each example in the training set has an individual label.</p></li>
|
||
<li><p><strong>for_model_selection</strong> – if True, then returns a split 30% of the training set (86 out of 286 samples) to be used for model selection;
|
||
if False, then returns the full training set as training set and the test set as the test set</p></li>
|
||
<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
|
||
~/quay_data/ directory)</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a tuple <cite>(train, test_gen)</cite> where <cite>train</cite> is an instance of
|
||
<a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.LabelledCollection</span></code></a>, if <cite>single_sample_train</cite> is true or
|
||
<code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data._ifcb.IFCBTrainSamplesFromDir</span></code>, i.e. a sampling protocol that returns a series of samples
|
||
labelled example by example. test_gen will be a <code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data._ifcb.IFCBTestSamples</span></code>,
|
||
i.e., a sampling protocol that returns a series of samples labelled by prevalence.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.datasets.fetch_UCIBinaryDataset">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_UCIBinaryDataset</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">test_split</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.3</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></span><a class="reference internal" href="_modules/quapy/data/datasets.html#fetch_UCIBinaryDataset"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.datasets.fetch_UCIBinaryDataset" title="Link to this definition"></a></dt>
|
||
<dd><p>Loads a UCI dataset as an instance of <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a>, as used in
|
||
<a class="reference external" href="https://www.sciencedirect.com/science/article/pii/S1566253516300628">Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017).
|
||
Using ensembles for problems with characterizable changes in data distribution: A case study on quantification.
|
||
Information Fusion, 34, 87-100.</a>
|
||
and
|
||
<a class="reference external" href="https://www.sciencedirect.com/science/article/pii/S1566253517303652">Pérez-Gállego, P., Castano, A., Quevedo, J. R., & del Coz, J. J. (2019).
|
||
Dynamic ensemble selection for quantification tasks.
|
||
Information Fusion, 45, 1-15.</a>.
|
||
The datasets do not come with a predefined train-test split (see <code class="xref py py-meth docutils literal notranslate"><span class="pre">fetch_UCILabelledCollection()</span></code> for further
|
||
information on how to use these collections), and so a train-test split is generated at desired proportion.
|
||
The list of valid dataset names can be accessed in <cite>quapy.data.datasets.UCI_DATASETS</cite></p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>dataset_name</strong> – a dataset name</p></li>
|
||
<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
|
||
~/quay_data/ directory)</p></li>
|
||
<li><p><strong>test_split</strong> – proportion of documents to be included in the test set. The rest conforms the training set</p></li>
|
||
<li><p><strong>verbose</strong> – set to True (default is False) to get information (from the UCI ML repository) about the datasets</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> instance</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.datasets.fetch_UCIBinaryLabelledCollection">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_UCIBinaryLabelledCollection</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></span><a class="reference internal" href="_modules/quapy/data/datasets.html#fetch_UCIBinaryLabelledCollection"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.datasets.fetch_UCIBinaryLabelledCollection" title="Link to this definition"></a></dt>
|
||
<dd><p>Loads a UCI collection as an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.LabelledCollection</span></code></a>, as used in
|
||
<a class="reference external" href="https://www.sciencedirect.com/science/article/pii/S1566253516300628">Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017).
|
||
Using ensembles for problems with characterizable changes in data distribution: A case study on quantification.
|
||
Information Fusion, 34, 87-100.</a>
|
||
and
|
||
<a class="reference external" href="https://www.sciencedirect.com/science/article/pii/S1566253517303652">Pérez-Gállego, P., Castano, A., Quevedo, J. R., & del Coz, J. J. (2019).
|
||
Dynamic ensemble selection for quantification tasks.
|
||
Information Fusion, 45, 1-15.</a>.
|
||
The datasets do not come with a predefined train-test split, and so Pérez-Gállego et al. adopted a 5FCVx2 evaluation
|
||
protocol, meaning that each collection was used to generate two rounds (hence the x2) of 5 fold cross validation.
|
||
This can be reproduced by using <a class="reference internal" href="#quapy.data.base.Dataset.kFCV" title="quapy.data.base.Dataset.kFCV"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.data.base.Dataset.kFCV()</span></code></a>, e.g.:</p>
|
||
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||
<span class="gp">>>> </span><span class="n">collection</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCIBinaryLabelledCollection</span><span class="p">(</span><span class="s2">"yeast"</span><span class="p">)</span>
|
||
<span class="gp">>>> </span><span class="k">for</span> <span class="n">data</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">train</span><span class="o">.</span><span class="n">Dataset</span><span class="o">.</span><span class="n">kFCV</span><span class="p">(</span><span class="n">collection</span><span class="p">,</span> <span class="n">nfolds</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">nrepeats</span><span class="o">=</span><span class="mi">2</span><span class="p">):</span>
|
||
<span class="gp">>>> </span> <span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The list of valid dataset names can be accessed in <cite>quapy.data.datasets.UCI_DATASETS</cite></p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>dataset_name</strong> – a dataset name</p></li>
|
||
<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
|
||
~/quay_data/ directory)</p></li>
|
||
<li><p><strong>test_split</strong> – proportion of documents to be included in the test set. The rest conforms the training set</p></li>
|
||
<li><p><strong>verbose</strong> – set to True (default is False) to get information (from the UCI ML repository) about the datasets</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.LabelledCollection</span></code></a> instance</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.datasets.fetch_UCIMulticlassDataset">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_UCIMulticlassDataset</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">test_split</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.3</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></span><a class="reference internal" href="_modules/quapy/data/datasets.html#fetch_UCIMulticlassDataset"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.datasets.fetch_UCIMulticlassDataset" title="Link to this definition"></a></dt>
|
||
<dd><p>Loads a UCI multiclass dataset as an instance of <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a>.</p>
|
||
<p>The list of available datasets is taken from <a class="reference external" href="https://archive.ics.uci.edu/">https://archive.ics.uci.edu/</a>, following these criteria:
|
||
- It has more than 1000 instances
|
||
- It is suited for classification
|
||
- It has more than two classes
|
||
- It is available for Python import (requires ucimlrepo package)</p>
|
||
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||
<span class="gp">>>> </span><span class="n">dataset</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCIMulticlassDataset</span><span class="p">(</span><span class="s2">"dry-bean"</span><span class="p">)</span>
|
||
<span class="gp">>>> </span><span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">train_test</span>
|
||
<span class="gp">>>> </span> <span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The list of valid dataset names can be accessed in <cite>quapy.data.datasets.UCI_MULTICLASS_DATASETS</cite></p>
|
||
<p>The datasets are downloaded only once and pickled into disk, saving time for consecutive calls.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>dataset_name</strong> – a dataset name</p></li>
|
||
<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
|
||
~/quay_data/ directory)</p></li>
|
||
<li><p><strong>test_split</strong> – proportion of documents to be included in the test set. The rest conforms the training set</p></li>
|
||
<li><p><strong>verbose</strong> – set to True (default is False) to get information (stats) about the dataset</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> instance</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.datasets.fetch_UCIMulticlassLabelledCollection">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_UCIMulticlassLabelledCollection</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></span><a class="reference internal" href="_modules/quapy/data/datasets.html#fetch_UCIMulticlassLabelledCollection"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.datasets.fetch_UCIMulticlassLabelledCollection" title="Link to this definition"></a></dt>
|
||
<dd><p>Loads a UCI multiclass collection as an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.LabelledCollection</span></code></a>.</p>
|
||
<p>The list of available datasets is taken from <a class="reference external" href="https://archive.ics.uci.edu/">https://archive.ics.uci.edu/</a>, following these criteria:
|
||
- It has more than 1000 instances
|
||
- It is suited for classification
|
||
- It has more than two classes
|
||
- It is available for Python import (requires ucimlrepo package)</p>
|
||
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||
<span class="gp">>>> </span><span class="n">collection</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCIMulticlassLabelledCollection</span><span class="p">(</span><span class="s2">"dry-bean"</span><span class="p">)</span>
|
||
<span class="gp">>>> </span><span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">collection</span><span class="o">.</span><span class="n">Xy</span>
|
||
<span class="gp">>>> </span> <span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The list of valid dataset names can be accessed in <cite>quapy.data.datasets.UCI_MULTICLASS_DATASETS</cite></p>
|
||
<p>The datasets are downloaded only once and pickled into disk, saving time for consecutive calls.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>dataset_name</strong> – a dataset name</p></li>
|
||
<li><p><strong>data_home</strong> – specify the quapy home directory where the dataset will be dumped (leave empty to use the default
|
||
~/quay_data/ directory)</p></li>
|
||
<li><p><strong>test_split</strong> – proportion of documents to be included in the test set. The rest conforms the training set</p></li>
|
||
<li><p><strong>verbose</strong> – set to True (default is False) to get information (stats) about the dataset</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.LabelledCollection</span></code></a> instance</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.datasets.fetch_lequa2022">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_lequa2022</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">task</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/datasets.html#fetch_lequa2022"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.datasets.fetch_lequa2022" title="Link to this definition"></a></dt>
|
||
<dd><p>Loads the official datasets provided for the <a class="reference external" href="https://lequa2022.github.io/index">LeQua</a> competition.
|
||
In brief, there are 4 tasks (T1A, T1B, T2A, T2B) having to do with text quantification
|
||
problems. Tasks T1A and T1B provide documents in vector form, while T2A and T2B provide raw documents instead.
|
||
Tasks T1A and T2A are binary sentiment quantification problems, while T2A and T2B are multiclass quantification
|
||
problems consisting of estimating the class prevalence values of 28 different merchandise products.
|
||
We refer to the <a class="reference external" href="https://ceur-ws.org/Vol-3180/paper-146.pdf">Esuli, A., Moreo, A., Sebastiani, F., & Sperduti, G. (2022).
|
||
A Detailed Overview of LeQua@ CLEF 2022: Learning to Quantify.</a> for a detailed description
|
||
on the tasks and datasets.</p>
|
||
<p>The datasets are downloaded only once, and stored for fast reuse.</p>
|
||
<p>See <cite>lequa2022_experiments.py</cite> provided in the example folder, that can serve as a guide on how to use these
|
||
datasets.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>task</strong> – a string representing the task name; valid ones are T1A, T1B, T2A, and T2B</p></li>
|
||
<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
|
||
~/quay_data/ directory)</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a tuple <cite>(train, val_gen, test_gen)</cite> where <cite>train</cite> is an instance of
|
||
<a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.LabelledCollection</span></code></a>, <cite>val_gen</cite> and <cite>test_gen</cite> are instances of
|
||
<code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data._lequa2022.SamplesFromDir</span></code>, a subclass of <a class="reference internal" href="quapy.html#quapy.protocol.AbstractProtocol" title="quapy.protocol.AbstractProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.protocol.AbstractProtocol</span></code></a>,
|
||
that return a series of samples stored in a directory which are labelled by prevalence.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.datasets.fetch_reviews">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_reviews</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tfidf</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pickle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></span><a class="reference internal" href="_modules/quapy/data/datasets.html#fetch_reviews"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.datasets.fetch_reviews" title="Link to this definition"></a></dt>
|
||
<dd><p>Loads a Reviews dataset as a Dataset instance, as used in
|
||
<a class="reference external" href="https://dl.acm.org/doi/abs/10.1145/3269206.3269287">Esuli, A., Moreo, A., and Sebastiani, F. “A recurrent neural network for sentiment quantification.”
|
||
Proceedings of the 27th ACM International Conference on Information and Knowledge Management. 2018.</a>.
|
||
The list of valid dataset names can be accessed in <cite>quapy.data.datasets.REVIEWS_SENTIMENT_DATASETS</cite></p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>dataset_name</strong> – the name of the dataset: valid ones are ‘hp’, ‘kindle’, ‘imdb’</p></li>
|
||
<li><p><strong>tfidf</strong> – set to True to transform the raw documents into tfidf weighted matrices</p></li>
|
||
<li><p><strong>min_df</strong> – minimun number of documents that should contain a term in order for the term to be
|
||
kept (ignored if tfidf==False)</p></li>
|
||
<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
|
||
~/quay_data/ directory)</p></li>
|
||
<li><p><strong>pickle</strong> – set to True to pickle the Dataset object the first time it is generated, in order to allow for
|
||
faster subsequent invokations</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> instance</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.datasets.fetch_twitter">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">fetch_twitter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">for_model_selection</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_home</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pickle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></span><a class="reference internal" href="_modules/quapy/data/datasets.html#fetch_twitter"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.datasets.fetch_twitter" title="Link to this definition"></a></dt>
|
||
<dd><p>Loads a Twitter dataset as a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> instance, as used in:
|
||
<a class="reference external" href="https://link.springer.com/content/pdf/10.1007/s13278-016-0327-z.pdf">Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis.
|
||
Social Network Analysis and Mining6(19), 1–22 (2016)</a>
|
||
Note that the datasets ‘semeval13’, ‘semeval14’, ‘semeval15’ share the same training set.
|
||
The list of valid dataset names corresponding to training sets can be accessed in
|
||
<cite>quapy.data.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN</cite>, while the test sets can be accessed in
|
||
<cite>quapy.data.datasets.TWITTER_SENTIMENT_DATASETS_TEST</cite></p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>dataset_name</strong> – the name of the dataset: valid ones are ‘gasp’, ‘hcr’, ‘omd’, ‘sanders’, ‘semeval13’,
|
||
‘semeval14’, ‘semeval15’, ‘semeval16’, ‘sst’, ‘wa’, ‘wb’</p></li>
|
||
<li><p><strong>for_model_selection</strong> – if True, then returns the train split as the training set and the devel split
|
||
as the test set; if False, then returns the train+devel split as the training set and the test set as the
|
||
test set</p></li>
|
||
<li><p><strong>min_df</strong> – minimun number of documents that should contain a term in order for the term to be kept</p></li>
|
||
<li><p><strong>data_home</strong> – specify the quapy home directory where collections will be dumped (leave empty to use the default
|
||
~/quay_data/ directory)</p></li>
|
||
<li><p><strong>pickle</strong> – set to True to pickle the Dataset object the first time it is generated, in order to allow for
|
||
faster subsequent invokations</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> instance</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.datasets.warn">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.datasets.</span></span><span class="sig-name descname"><span class="pre">warn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/datasets.html#warn"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.datasets.warn" title="Link to this definition"></a></dt>
|
||
<dd></dd></dl>
|
||
|
||
</section>
|
||
<section id="module-quapy.data.preprocessing">
|
||
<span id="quapy-data-preprocessing-module"></span><h2>quapy.data.preprocessing module<a class="headerlink" href="#module-quapy.data.preprocessing" title="Link to this heading"></a></h2>
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.data.preprocessing.</span></span><span class="sig-name descname"><span class="pre">IndexTransformer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/preprocessing.html#IndexTransformer"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer" title="Link to this definition"></a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
|
||
<p>This class implements a sklearn’s-style transformer that indexes text as numerical ids for the tokens it
|
||
contains, and that would be generated by sklearn’s
|
||
<a class="reference external" href="https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html">CountVectorizer</a></p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>kwargs</strong> – <p>keyworded arguments from
|
||
<a class="reference external" href="https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html">CountVectorizer</a></p>
|
||
</p>
|
||
</dd>
|
||
</dl>
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer.add_word">
|
||
<span class="sig-name descname"><span class="pre">add_word</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">word</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">id</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nogaps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/preprocessing.html#IndexTransformer.add_word"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer.add_word" title="Link to this definition"></a></dt>
|
||
<dd><p>Adds a new token (regardless of whether it has been found in the text or not), with dedicated id.
|
||
Useful to define special tokens for codifying unknown words, or padding tokens.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>word</strong> – string, surface form of the token</p></li>
|
||
<li><p><strong>id</strong> – integer, numerical value to assign to the token (leave as None for indicating the next valid id,
|
||
default)</p></li>
|
||
<li><p><strong>nogaps</strong> – if set to True (default) asserts that the id indicated leads to no numerical gaps with
|
||
precedent ids stored so far</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>integer, the numerical id for the new token</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer.fit">
|
||
<span class="sig-name descname"><span class="pre">fit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/preprocessing.html#IndexTransformer.fit"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer.fit" title="Link to this definition"></a></dt>
|
||
<dd><p>Fits the transformer, i.e., decides on the vocabulary, given a list of strings.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>X</strong> – a list of strings</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>self</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer.fit_transform">
|
||
<span class="sig-name descname"><span class="pre">fit_transform</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/preprocessing.html#IndexTransformer.fit_transform"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer.fit_transform" title="Link to this definition"></a></dt>
|
||
<dd><p>Fits the transform on <cite>X</cite> and transforms it.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>X</strong> – a list of strings</p></li>
|
||
<li><p><strong>n_jobs</strong> – the number of parallel workers to carry out this task</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a <cite>np.ndarray</cite> of numerical ids</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer.transform">
|
||
<span class="sig-name descname"><span class="pre">transform</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/preprocessing.html#IndexTransformer.transform"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer.transform" title="Link to this definition"></a></dt>
|
||
<dd><p>Transforms the strings in <cite>X</cite> as lists of numerical ids</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>X</strong> – a list of strings</p></li>
|
||
<li><p><strong>n_jobs</strong> – the number of parallel workers to carry out this task</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a <cite>np.ndarray</cite> of numerical ids</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="quapy.data.preprocessing.IndexTransformer.vocabulary_size">
|
||
<span class="sig-name descname"><span class="pre">vocabulary_size</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/preprocessing.html#IndexTransformer.vocabulary_size"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.preprocessing.IndexTransformer.vocabulary_size" title="Link to this definition"></a></dt>
|
||
<dd><p>Gets the length of the vocabulary according to which the document tokens have been indexed</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>integer</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.preprocessing.index">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.preprocessing.</span></span><span class="sig-name descname"><span class="pre">index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inplace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/preprocessing.html#index"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.preprocessing.index" title="Link to this definition"></a></dt>
|
||
<dd><p>Indexes the tokens of a textual <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> of string documents.
|
||
To index a document means to replace each different token by a unique numerical index.
|
||
Rare words (i.e., words occurring less than <cite>min_df</cite> times) are replaced by a special token <cite>UNK</cite></p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>dataset</strong> – a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> object where the instances of training and test documents
|
||
are lists of str</p></li>
|
||
<li><p><strong>min_df</strong> – minimum number of occurrences below which the term is replaced by a <cite>UNK</cite> index</p></li>
|
||
<li><p><strong>inplace</strong> – whether or not to apply the transformation inplace (True), or to a new copy (False, default)</p></li>
|
||
<li><p><strong>kwargs</strong> – the rest of parameters of the transformation (as for sklearn’s
|
||
<cite>CountVectorizer <https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html>_</cite>)</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a new <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> (if inplace=False) or a reference to the current
|
||
<a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> (inplace=True) consisting of lists of integer values representing indices.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.preprocessing.reduce_columns">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.preprocessing.</span></span><span class="sig-name descname"><span class="pre">reduce_columns</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inplace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/preprocessing.html#reduce_columns"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.preprocessing.reduce_columns" title="Link to this definition"></a></dt>
|
||
<dd><p>Reduces the dimensionality of the instances, represented as a <cite>csr_matrix</cite> (or any subtype of
|
||
<cite>scipy.sparse.spmatrix</cite>), of training and test documents by removing the columns of words which are not present
|
||
in at least <cite>min_df</cite> instances in the training set</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>dataset</strong> – a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> in which instances are represented in sparse format (any
|
||
subtype of scipy.sparse.spmatrix)</p></li>
|
||
<li><p><strong>min_df</strong> – integer, minimum number of instances below which the columns are removed</p></li>
|
||
<li><p><strong>inplace</strong> – whether or not to apply the transformation inplace (True), or to a new copy (False, default)</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a new <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> (if inplace=False) or a reference to the current
|
||
<a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> (inplace=True) where the dimensions corresponding to infrequent terms
|
||
in the training set have been removed</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.preprocessing.standardize">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.preprocessing.</span></span><span class="sig-name descname"><span class="pre">standardize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">inplace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/preprocessing.html#standardize"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.preprocessing.standardize" title="Link to this definition"></a></dt>
|
||
<dd><p>Standardizes the real-valued columns of a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a>.
|
||
Standardization, aka z-scoring, of a variable <cite>X</cite> comes down to subtracting the average and normalizing by the
|
||
standard deviation.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>dataset</strong> – a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> object</p></li>
|
||
<li><p><strong>inplace</strong> – set to True if the transformation is to be applied inplace, or to False (default) if a new
|
||
<a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> is to be returned</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>an instance of <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a></p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.preprocessing.text2tfidf">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.preprocessing.</span></span><span class="sig-name descname"><span class="pre">text2tfidf</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><span class="pre">Dataset</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_df</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">3</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">sublinear_tf</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inplace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/preprocessing.html#text2tfidf"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.preprocessing.text2tfidf" title="Link to this definition"></a></dt>
|
||
<dd><p>Transforms a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> of textual instances into a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> of
|
||
tfidf weighted sparse vectors</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>dataset</strong> – a <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> where the instances of training and test collections are
|
||
lists of str</p></li>
|
||
<li><p><strong>min_df</strong> – minimum number of occurrences for a word to be considered as part of the vocabulary (default 3)</p></li>
|
||
<li><p><strong>sublinear_tf</strong> – whether or not to apply the log scalling to the tf counters (default True)</p></li>
|
||
<li><p><strong>inplace</strong> – whether or not to apply the transformation inplace (True), or to a new copy (False, default)</p></li>
|
||
<li><p><strong>kwargs</strong> – the rest of parameters of the transformation (as for sklearn’s
|
||
<a class="reference external" href="https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html">TfidfVectorizer</a>)</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a new <a class="reference internal" href="#quapy.data.base.Dataset" title="quapy.data.base.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.Dataset</span></code></a> in <cite>csr_matrix</cite> format (if inplace=False) or a reference to the
|
||
current Dataset (if inplace=True) where the instances are stored in a <cite>csr_matrix</cite> of real-valued tfidf scores</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
</section>
|
||
<section id="module-quapy.data.reader">
|
||
<span id="quapy-data-reader-module"></span><h2>quapy.data.reader module<a class="headerlink" href="#module-quapy.data.reader" title="Link to this heading"></a></h2>
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.reader.binarize">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.reader.</span></span><span class="sig-name descname"><span class="pre">binarize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pos_class</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/reader.html#binarize"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.reader.binarize" title="Link to this definition"></a></dt>
|
||
<dd><p>Binarizes a categorical array-like collection of labels towards the positive class <cite>pos_class</cite>. E.g.,:</p>
|
||
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">binarize</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
||
<span class="gp">>>> </span><span class="n">array</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">])</span>
|
||
</pre></div>
|
||
</div>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>y</strong> – array-like of labels</p></li>
|
||
<li><p><strong>pos_class</strong> – integer, the positive class</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a binary np.ndarray, in which values 1 corresponds to positions in whcih <cite>y</cite> had <cite>pos_class</cite> labels, and
|
||
0 otherwise</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.reader.from_csv">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.reader.</span></span><span class="sig-name descname"><span class="pre">from_csv</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoding</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'utf-8'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/reader.html#from_csv"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.reader.from_csv" title="Link to this definition"></a></dt>
|
||
<dd><p>Reads a csv file in which columns are separated by ‘,’.
|
||
File format <label>,<feat1>,<feat2>,…,<featn></p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>path</strong> – path to the csv file</p></li>
|
||
<li><p><strong>encoding</strong> – the text encoding used to open the file</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a np.ndarray for the labels and a ndarray (float) for the covariates</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.reader.from_sparse">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.reader.</span></span><span class="sig-name descname"><span class="pre">from_sparse</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/reader.html#from_sparse"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.reader.from_sparse" title="Link to this definition"></a></dt>
|
||
<dd><p>Reads a labelled collection of real-valued instances expressed in sparse format
|
||
File format <-1 or 0 or 1>[s col(int):val(float)]</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>path</strong> – path to the labelled collection</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a <cite>csr_matrix</cite> containing the instances (rows), and a ndarray containing the labels</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.reader.from_text">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.reader.</span></span><span class="sig-name descname"><span class="pre">from_text</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoding</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'utf-8'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">class2int</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/reader.html#from_text"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.reader.from_text" title="Link to this definition"></a></dt>
|
||
<dd><p>Reads a labelled colletion of documents.
|
||
File fomart <0 or 1> <document></p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>path</strong> – path to the labelled collection</p></li>
|
||
<li><p><strong>encoding</strong> – the text encoding used to open the file</p></li>
|
||
<li><p><strong>verbose</strong> – if >0 (default) shows some progress information in standard output</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a list of sentences, and a list of labels</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="quapy.data.reader.reindex_labels">
|
||
<span class="sig-prename descclassname"><span class="pre">quapy.data.reader.</span></span><span class="sig-name descname"><span class="pre">reindex_labels</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/quapy/data/reader.html#reindex_labels"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#quapy.data.reader.reindex_labels" title="Link to this definition"></a></dt>
|
||
<dd><p>Re-indexes a list of labels as a list of indexes, and returns the classnames corresponding to the indexes.
|
||
E.g.:</p>
|
||
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">reindex_labels</span><span class="p">([</span><span class="s1">'B'</span><span class="p">,</span> <span class="s1">'B'</span><span class="p">,</span> <span class="s1">'A'</span><span class="p">,</span> <span class="s1">'C'</span><span class="p">])</span>
|
||
<span class="gp">>>> </span><span class="p">(</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">]),</span> <span class="n">array</span><span class="p">([</span><span class="s1">'A'</span><span class="p">,</span> <span class="s1">'B'</span><span class="p">,</span> <span class="s1">'C'</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="s1">'<U1'</span><span class="p">))</span>
|
||
</pre></div>
|
||
</div>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>y</strong> – the list or array of original labels</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>a ndarray (int) of class indexes, and a ndarray of classnames corresponding to the indexes.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
</section>
|
||
<section id="module-quapy.data">
|
||
<span id="module-contents"></span><h2>Module contents<a class="headerlink" href="#module-quapy.data" title="Link to this heading"></a></h2>
|
||
</section>
|
||
</section>
|
||
|
||
|
||
</div>
|
||
</div>
|
||
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
||
<a href="quapy.classification.html" class="btn btn-neutral float-left" title="quapy.classification package" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
||
<a href="quapy.method.html" class="btn btn-neutral float-right" title="quapy.method package" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
||
</div>
|
||
|
||
<hr/>
|
||
|
||
<div role="contentinfo">
|
||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||
</div>
|
||
|
||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||
|
||
|
||
</footer>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<script>
|
||
jQuery(function () {
|
||
SphinxRtdTheme.Navigation.enable(true);
|
||
});
|
||
</script>
|
||
|
||
</body>
|
||
</html> |