Compare commits

..

21 Commits

Author SHA1 Message Date
Alejandro Moreo Fernandez 996bfd82f4 generating plots for overview paper 2024-11-18 14:45:30 +01:00
Alejandro Moreo Fernandez 5bbaf42a0e improving plots for overview paper 2024-11-13 18:45:26 +01:00
Alejandro Moreo Fernandez 6f7a1e511e adding scripts for plots (only local) 2024-11-13 17:16:18 +01:00
Alejandro Moreo Fernandez 9d5ff154a0 deleting scripts folder so it can be downloaded 2024-05-30 11:42:48 +02:00
Alejandro Moreo Fernandez 7febaa2693 documenting the regressor 2024-05-29 11:23:17 +02:00
Alejandro Moreo Fernandez 3264e66cc9 adding regressor for T3 2024-05-29 11:12:43 +02:00
Alejandro Moreo Fernandez a124e791ae Merge branch 'lequa2024' of github.com:HLT-ISTI/QuaPy into lequa2024 2024-05-28 10:24:55 +02:00
Alejandro Moreo Fernandez 12a44586a8 optimizing T3 for normalized match distance 2024-05-28 10:24:13 +02:00
Alejandro Moreo Fernandez ea1e2d2813 more baselines, kde, and dm 2024-05-20 10:15:05 +02:00
Alejandro Moreo Fernandez 6cb30edb7b
Update run_baselines.sh 2024-05-16 21:05:06 +02:00
Alejandro Moreo Fernandez 6b754dd845 redundant code cleaned from run_baselines.sh 2024-05-15 12:01:31 +02:00
Alejandro Moreo Fernandez 81fbb54992 kde 2024-05-10 10:15:44 +02:00
Alejandro Moreo Fernandez 8dfb109b41 cleaning baselines 2024-05-02 16:53:44 +02:00
Alejandro Moreo Fernandez 07a86746c3 Merge branch 'lequa2024' of github.com:HLT-ISTI/QuaPy into lequa2024 2024-05-02 15:03:27 +02:00
Alejandro Moreo Fernandez 0c434384db running baselines 2024-05-02 15:03:00 +02:00
Alejandro Moreo Fernandez 96576687ff preparing baseline format for codalab 2024-05-02 09:49:42 +02:00
Alejandro Moreo Fernandez fd1ab667ac preparing baselines 2024-05-02 09:28:50 +02:00
Alejandro Moreo Fernandez a29d67500a automatically download the official scripts 2024-02-16 17:04:10 +01:00
Alejandro Moreo Fernandez 6c5bd674ea cleaning 2024-02-14 18:54:07 +01:00
Alejandro Moreo Fernandez 451c938171 restoring some commented code 2024-02-14 18:52:11 +01:00
Alejandro Moreo Fernandez 323749178b adding some preliminary baselines... 2024-02-14 18:50:53 +01:00
95 changed files with 6852 additions and 18684 deletions

3
.gitignore vendored
View File

@ -70,8 +70,7 @@ instance/
.scrapy
# Sphinx documentation
docs/_build/doctest
docs/_build/doctrees
docs/_build/
# PyBuilder
target/

View File

@ -1,4 +1,4 @@
Change Log 0.1.8
Change Log 0.1.8g
----------------
- Added Kernel Density Estimation methods (KDEyML, KDEyCS, KDEyHD) as proposed in the paper:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

88
LeQua2024/_lequa2024.py Normal file
View File

@ -0,0 +1,88 @@
import zipfile
import pandas as pd
import os
from os.path import join
import quapy as qp
from scripts.data import load_vector_documents
from quapy.data import LabelledCollection
from quapy.protocol import AbstractProtocol
from quapy.util import download_file_if_not_exists
LEQUA2024_TASKS = ['T1', 'T2', 'T3', 'T4']
LEQUA2024_ZENODO = 'https://zenodo.org/record/11091067' # v2, no ground truth for test yet
class LabelledCollectionsFromDir(AbstractProtocol):
def __init__(self, path_dir:str, ground_truth_path:str, load_fn):
self.path_dir = path_dir
self.load_fn = load_fn
self.true_prevs = pd.read_csv(ground_truth_path, index_col=0)
def __call__(self):
for id, prevalence in self.true_prevs.iterrows():
collection_path = os.path.join(self.path_dir, f'{id}.txt')
lc = LabelledCollection.load(path=collection_path, loader_func=self.load_fn)
yield lc
def fetch_lequa2024(task, data_home=None, merge_T3=False):
from quapy.data._lequa2022 import SamplesFromDir
assert task in LEQUA2024_TASKS, \
f'Unknown task {task}. Valid ones are {LEQUA2024_TASKS}'
if data_home is None:
data_home = qp.util.get_quapy_home()
lequa_dir = data_home
URL_TRAINDEV=f'{LEQUA2024_ZENODO}/files/{task}.train_dev.zip'
URL_TEST=f'{LEQUA2024_ZENODO}/files/{task}.test.zip'
# URL_TEST_PREV=f'{LEQUA2024_ZENODO}/files/{task}.test_prevalences.zip'
lequa_dir = join(data_home, 'lequa2024')
os.makedirs(lequa_dir, exist_ok=True)
def download_unzip_and_remove(unzipped_path, url):
tmp_path = join(lequa_dir, task + '_tmp.zip')
download_file_if_not_exists(url, tmp_path)
with zipfile.ZipFile(tmp_path) as file:
file.extractall(unzipped_path)
os.remove(tmp_path)
if not os.path.exists(join(lequa_dir, task)):
download_unzip_and_remove(lequa_dir, URL_TRAINDEV)
download_unzip_and_remove(lequa_dir, URL_TEST)
# download_unzip_and_remove(lequa_dir, URL_TEST_PREV)
load_fn = load_vector_documents
val_samples_path = join(lequa_dir, task, 'public', 'dev_samples')
val_true_prev_path = join(lequa_dir, task, 'public', 'dev_prevalences.txt')
val_gen = SamplesFromDir(val_samples_path, val_true_prev_path, load_fn=load_fn)
# test_samples_path = join(lequa_dir, task, 'public', 'test_samples')
# test_true_prev_path = join(lequa_dir, task, 'public', 'test_prevalences.txt')
# test_gen = SamplesFromDir(test_samples_path, test_true_prev_path, load_fn=load_fn)
test_gen = None
if task != 'T3':
tr_path = join(lequa_dir, task, 'public', 'training_data.txt')
train = LabelledCollection.load(tr_path, loader_func=load_fn)
return train, val_gen, test_gen
else:
training_samples_path = join(lequa_dir, task, 'public', 'training_samples')
training_true_prev_path = join(lequa_dir, task, 'public', 'training_prevalences.txt')
train_gen = LabelledCollectionsFromDir(training_samples_path, training_true_prev_path, load_fn=load_fn)
if merge_T3:
train = LabelledCollection.join(*list(train_gen()))
return train, val_gen, test_gen
else:
return train_gen, val_gen, test_gen

115
LeQua2024/baselines.py Normal file
View File

@ -0,0 +1,115 @@
import argparse
import pickle
import os
import sys
from os.path import join
import numpy as np
from sklearn.linear_model import LogisticRegression as LR
from scripts.constants import SAMPLE_SIZE
from scripts.evaluate import normalized_match_distance
from LeQua2024._lequa2024 import LEQUA2024_TASKS, fetch_lequa2024, LEQUA2024_ZENODO
from quapy.method.aggregative import *
from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as MLPE
import quapy.functional as F
# LeQua official baselines (under development!)
# =================================================================================
BINARY_TASKS = ['T1', 'T4']
def new_cls():
return LR(n_jobs=-1, max_iter=3000)
lr_params = {
'C': np.logspace(-4, 4, 9),
'class_weight': [None, 'balanced']
}
def wrap_params(cls_params:dict, prefix:str):
return {'__'.join([prefix, key]): val for key, val in cls_params.items()}
def baselines():
q_params = wrap_params(lr_params, 'classifier')
kde_params = {**q_params, 'bandwidth': np.linspace(0.01, 0.20, 20)}
dm_params = {**q_params, 'nbins': [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 64]}
yield CC(new_cls()), "CC", q_params
yield ACC(new_cls()), "ACC", q_params
yield PCC(new_cls()), "PCC", q_params
yield PACC(new_cls()), "PACC", q_params
yield SLD(new_cls()), "SLD", q_params
#yield KDEyML(new_cls()), "KDEy-ML", kde_params
#yield KDEyHD(new_cls()), "KDEy-HD", kde_params
# yield KDEyCS(new_cls()), "KDEy-CS", kde_params
#yield DMy(new_cls()), "DMy", dm_params
def main(args):
models_path = qp.util.create_if_not_exist(join('./models', args.task))
hyperparams_path = qp.util.create_if_not_exist(join('./hyperparams', args.task))
os.makedirs(models_path, exist_ok=True)
os.makedirs(hyperparams_path, exist_ok=True)
qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE[args.task]
train, gen_val, gen_test = fetch_lequa2024(task=args.task, data_home=args.datadir, merge_T3=True)
# gen_test is None, since the true prevalence vectors for the test samples will be released
# only after the competition ends
print(f'number of classes: {len(train.classes_)}')
print(f'number of training documents: {len(train)}')
print(f'training prevalence: {F.strprev(train.prevalence())}')
print(f'training matrix shape: {train.instances.shape}')
for quantifier, q_name, param_grid in baselines():
model_path = os.path.join(models_path, q_name + '.pkl')
modelparams_path = os.path.join(hyperparams_path, q_name + '.pkl')
if os.path.exists(model_path):
print(f'a pickle for {q_name} exists already in {model_path}; skipping!')
continue
print(f'starting model fitting for {q_name}')
if param_grid is not None:
optimizer = qp.model_selection.GridSearchQ(
quantifier,
param_grid,
protocol=gen_val,
error=normalized_match_distance if args.task=='T3' else qp.error.mrae,
refit=False,
verbose=True,
n_jobs=-1
).fit(train)
print(f'{q_name} got MRAE={optimizer.best_score_:.5f} (hyper-params: {optimizer.best_params_})')
quantifier = optimizer.best_model()
else:
quantifier.fit(train)
print(f'saving model in {model_path}')
pickle.dump(quantifier, open(model_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
pickle.dump(quantifier.get_params(), open(modelparams_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='LeQua2024 baselines')
parser.add_argument('task', metavar='TASK', type=str, choices=LEQUA2024_TASKS,
help=f'Code of the task; available ones are {LEQUA2024_TASKS}')
parser.add_argument('datadir', metavar='DATA-PATH', type=str,
help='Path of the directory containing LeQua 2024 data (default is ./data)',
default='./data')
args = parser.parse_args()
main(args)

60
LeQua2024/predict.py Normal file
View File

@ -0,0 +1,60 @@
import argparse
import quapy as qp
from scripts.data import ResultSubmission
import os
import pickle
from tqdm import tqdm
from scripts.data import gen_load_samples
from glob import glob
from scripts import constants
from regressor import KDEyRegressor, RegressionToSimplex
"""
LeQua2024 prediction script
"""
def main(args):
if not args.force and os.path.exists(args.output):
print(f'prediction file {args.output} already exists! set --force to override')
return
# check the number of samples
nsamples = len(glob(os.path.join(args.samples, f'*.txt')))
if nsamples not in {constants.DEV_SAMPLES, constants.TEST_SAMPLES}:
print(f'Warning: The number of samples (.txt) in {args.samples} '
f'does neither coincide with the expected number of '
f'dev samples ({constants.DEV_SAMPLES}) nor with the expected number of '
f'test samples ({constants.TEST_SAMPLES}).')
# load pickled model
model = pickle.load(open(args.model, 'rb'))
# predictions
predictions = ResultSubmission()
for sampleid, sample in tqdm(gen_load_samples(args.samples, return_id=True), desc='predicting', total=nsamples):
predictions.add(sampleid, model.quantify(sample))
# saving
qp.util.create_parent_dir(args.output)
predictions.dump(args.output)
if __name__=='__main__':
parser = argparse.ArgumentParser(description='LeQua2024 prediction script')
parser.add_argument('model', metavar='MODEL-PATH', type=str,
help='Path of saved model')
parser.add_argument('samples', metavar='SAMPLES-PATH', type=str,
help='Path to the directory containing the samples')
parser.add_argument('output', metavar='PREDICTIONS-PATH', type=str,
help='Path where to store the predictions file')
parser.add_argument('--force', action='store_true',
help='Overrides prediction file if exists')
args = parser.parse_args()
if not os.path.exists(args.samples):
raise FileNotFoundError(f'path {args.samples} does not exist')
if not os.path.isdir(args.samples):
raise ValueError(f'path {args.samples} is not a valid directory')
main(args)

133
LeQua2024/regressor.py Normal file
View File

@ -0,0 +1,133 @@
import pickle
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import GridSearchCV
from sklearn.multioutput import MultiOutputRegressor
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR
from LeQua2024._lequa2024 import fetch_lequa2024
from quapy.data import LabelledCollection
from quapy.protocol import AbstractProtocol
from quapy.method.base import BaseQuantifier
import quapy.functional as F
from tqdm import tqdm
from scripts.evaluate import normalized_match_distance, match_distance
def projection_simplex_sort(unnormalized_arr) -> np.ndarray:
"""Projects a point onto the probability simplex.
[This code is taken from the devel branch, that will correspond to the future QuaPy 0.1.9]
The code is adapted from Mathieu Blondel's BSD-licensed
`implementation <https://gist.github.com/mblondel/6f3b7aaad90606b98f71>`_
(see function `projection_simplex_sort` in their repo) which is accompanying the paper
Mathieu Blondel, Akinori Fujino, and Naonori Ueda.
Large-scale Multiclass Support Vector Machine Training via Euclidean Projection onto the Simplex,
ICPR 2014, `URL <http://www.mblondel.org/publications/mblondel-icpr2014.pdf>`_
:param `unnormalized_arr`: point in n-dimensional space, shape `(n,)`
:return: projection of `unnormalized_arr` onto the (n-1)-dimensional probability simplex, shape `(n,)`
"""
unnormalized_arr = np.asarray(unnormalized_arr)
n = len(unnormalized_arr)
u = np.sort(unnormalized_arr)[::-1]
cssv = np.cumsum(u) - 1.0
ind = np.arange(1, n + 1)
cond = u - cssv / ind > 0
rho = ind[cond][-1]
theta = cssv[cond][-1] / float(rho)
return np.maximum(unnormalized_arr - theta, 0)
class RegressionToSimplex(BaseEstimator):
"""
A very simple regressor of probability distributions.
Internally, this class works by invoking an SVR regressor multioutput
followed by a mapping onto the probability simplex.
:param C: regularziation parameter for SVR
"""
def __init__(self, C=1):
self.C = C
def fit(self, X, y):
"""
Learns the correction
:param X: array-like of shape `(n_instances, n_classes)` with uncorrected prevalence vectors
:param y: array-like of shape `(n_instances, n_classes)` with true prevalence vectors
:return: self
"""
self.reg = MultiOutputRegressor(SVR(C=self.C), n_jobs=-1)
self.reg.fit(X, y)
return self
def predict(self, X):
"""
Corrects the a vector of prevalence values
:param X: array-like of shape `(n_classes,)` with one vector of uncorrected prevalence values
:return: array-like of shape `(n_classes,)` with one vector of corrected prevalence values
"""
y_ = self.reg.predict(X)
y_ = np.asarray([projection_simplex_sort(y_i) for y_i in y_])
return y_
class KDEyRegressor(BaseQuantifier):
"""
This class implements a regressor-based correction on top of a quantifier.
The quantifier is taken to be KDEy-ML, which is considered to be already trained (this
method simply loads a pickled object).
The method then optimizes a regressor that corrects prevalence vectors using the
validation samples as training data.
The regressor is based on a multioutput SVR and relies on a post-processing to guarantee
that the output lies on the probability simplex (see also RegressionToSimplex)
"""
def __init__(self, kde_path, Cs=np.logspace(-3,3,7)):
self.kde_path = kde_path
self.Cs = Cs
def fit(self, val_data: AbstractProtocol):
print(f'loading kde from {self.kde_path}')
self.kdey = pickle.load(open(self.kde_path, 'rb'))
print('representing val data with kde')
pbar = tqdm(val_data(), total=val_data.total())
Xs, Ys = [], []
for sample, prev in pbar:
prev_hat = self.kdey.quantify(sample)
Xs.append(prev_hat)
Ys.append(prev)
Xs = np.asarray(Xs)
Ys = np.asarray(Ys)
def scorer(estimator, X, y):
y_hat = estimator.predict(X)
md = normalized_match_distance(y, y_hat)
return (-md)
grid = {'C': self.Cs}
optim = GridSearchCV(
RegressionToSimplex(), param_grid=grid, scoring=scorer, verbose=0, cv=10, n_jobs=64
).fit(Xs, Ys)
self.regressor = optim.best_estimator_
return self
def quantify(self, instances):
prev_hat = self.kdey.quantify(instances)
return self.regressor.predict([prev_hat])[0]
if __name__ == '__main__':
train, gen_val, _ = fetch_lequa2024(task='T3', data_home='./data', merge_T3=True)
kdey_r = KDEyRegressor('./models/T3/KDEy-ML.pkl')
kdey_r.fit(gen_val)
pickle.dump(kdey_r, open('./models/T3/KDEyRegressor.pkl', 'wb'), protocol=pickle.HIGHEST_PROTOCOL)

56
LeQua2024/run_baselines.sh Executable file
View File

@ -0,0 +1,56 @@
#!/bin/bash
set -x
# T1: binary (n=2)
# T2: multiclass (n=28)
# T3: ordinal (n=5)
# T4: covariante shift (n=2)
# preparing the environment: downloads the official LeQua 2024 scripts (only once and for all)
SCRIPTS_URL=https://github.com/HLT-ISTI/LeQua2024_scripts/archive/refs/heads/main.zip
# download and unzip the LeQua 2024 scripts
if [ ! -d "./scripts" ]; then
echo "Downloading $SCRIPTS_URL into ./scripts"
wget -qO scripts.zip "$SCRIPTS_URL"
unzip -q scripts.zip
mv "LeQua2024_scripts-main" "scripts"
rm scripts.zip
echo "[Done]"
else
echo "LeQua 2024 scripts already exists"
fi
for task in T1 T2 T3 T4 ; do
PYTHONPATH=.:scripts/:.. python3 baselines.py $task data/
TEST_SAMPLES=data/lequa2024/$task/public/test_samples
for pickledmodel in models/$task/*.pkl ; do
model=$(basename "$pickledmodel" .pkl)
PREDICTIONS=predictions/$model/task_"${task: -1}".csv
PYTHONPATH=.:scripts/:.. python3 predict.py models/$task/$model.pkl $TEST_SAMPLES $PREDICTIONS
done
done
echo "generating submission files for codalab in folder ./submission_files"
mkdir -p submission_files
for modelname in predictions/* ; do
modelname=$(basename "$modelname")
echo "modelname is $modelname"
for taskname in predictions/$modelname/*.csv ; do
taskname=$(basename "$taskname")
submission_name=submission_files/"$modelname"_"$taskname".zip
rm -f $submission_name
echo "zipping results for $modelname and task $taskname"
zip -j $submission_name predictions/$modelname/$taskname
done
done
echo "[Done]"

View File

@ -0,0 +1,120 @@
import os
from os.path import join
import pandas as pd
import quapy as qp
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
os.chdir('/home/moreo/QuaPy/LeQua2024/util_scripts')
print(os.getcwd())
qp.environ['SAMPLE_SIZE']=250
true_prevs_path = '../TruePrevalences/T4.test_prevalences/T4/public/test_prevalences.txt'
domain_prevs_path = '../T4_domain_prevalence/test_domain_prevalences.txt'
folder = '../Results_CODALAB_2024/extracted/TASK_4'
def load_result_file(path):
df = pd.read_csv(path, index_col=0)
id = df.index.to_numpy()
prevs = df.values
return id, prevs
method_files = [
#'ACC.csv',
#'CC.csv',
#'DistMatching-y.csv',
#'KDEy.csv',
#'PACC.csv',
'PCC.csv',
#'SLD.csv',
#'TeamCUFE.csv',
#'TeamGMNet.csv',
'tobiaslotz.csv'
]
method_names_nice={
'DistMatching-y': 'DM',
'TeamGMNet': 'UniOviedo(Team1)',
'tobiaslotz': 'Lamarr'
}
desired_order=[
'Lamarr',
'SLD',
'DM',
'KDEy',
'UniOviedo(Team1)'
]
desired_order=[
'PCC', 'Lamarr'
]
# load the true values (sentiment prevalence, domain prevalence)
true_id, true_prevs = load_result_file(true_prevs_path)
dom_id, dom_prevs = load_result_file(domain_prevs_path)
assert (true_id == dom_id).all(), 'unmatched files'
# define the loss for evaluation
error_name = 'RAE'
error_log = False
if error_name == 'RAE':
err_function_ = qp.error.rae
elif error_name == 'AE':
err_function_ = qp.error.ae
else:
raise ValueError()
if error_log:
error_name = f'log({error_name})'
err_function = lambda x,y: np.log(err_function_(x,y))
else:
err_function = err_function_
# load the participant and baseline results
errors = {}
for method_file in method_files:
method_name = method_file.replace('.csv', '')
id, method_prevs = load_result_file(join(folder, method_file))
print(method_file)
assert (true_id == id).all(), f'unmatched files for {method_file}'
method_error = err_function(true_prevs, method_prevs)
method_name = method_names_nice.get(method_name, method_name)
errors[method_name] = method_error
dom_A_prevs = dom_prevs[:,0]
n_bins = 5
bins = np.linspace(dom_A_prevs.min(), dom_A_prevs.max(), n_bins + 1)
# Crear un DataFrame para los datos
df = pd.DataFrame({'dom_A_prevs': dom_A_prevs})
for method, err in errors.items():
df[method] = err
# Asignar cada valor de dom_A_prevs a un bin
df['bin'] = pd.cut(df['dom_A_prevs'], bins=bins, labels=False, include_lowest=True)
# Convertir el DataFrame a formato largo
df_long = df.melt(id_vars=['dom_A_prevs', 'bin'], value_vars=errors.keys(), var_name='Método', value_name='Error')
# Crear etiquetas de los bins para el eje X
bin_labels = [f"[{bins[i]:.3f}-{bins[i + 1]:.3f}" + (']' if i == n_bins-1 else ')') for i in range(n_bins)]
df_long['bin_label'] = df_long['bin'].map(dict(enumerate(bin_labels)))
# Crear el gráfico de boxplot en Seaborn
plt.figure(figsize=(14, 8))
sns.boxplot(x='bin', y='Error', hue='Método', data=df_long, palette='Set2', showfliers=False, hue_order=desired_order)
# Configurar etiquetas del eje X con los rangos de los bins
plt.xticks(ticks=range(n_bins), labels=bin_labels, rotation=0)
plt.xlabel("Prevalence of Books")
plt.ylabel(error_name)
#plt.title("Boxplots de Errores por Método dentro de Bins de dom_A_prevs")
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
#plt.show()
plt.savefig(f'./t4_{error_name}_pcc.png')

View File

@ -0,0 +1,113 @@
import os
from os.path import join
import pandas as pd
from LeQua2024.scripts.data import load_vector_documents
from LeQua2024.scripts.constants import SAMPLE_SIZE
from quapy.data.base import LabelledCollection
import sys
# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), './')))
#from LeQua2024.scripts import constants
#from LeQua2024._lequa2024 import fetch_lequa2024
import quapy as qp
import numpy as np
import matplotlib.pyplot as plt
# import seaborn as sns
from pathlib import Path
import glob
from commons import *
for TASK in [1,2,4]:
qp.environ['SAMPLE_SIZE']=SAMPLE_SIZE[f'T{TASK}']
true_prevs_path = f'../TruePrevalences/T{TASK}.test_prevalences/T{TASK}/public/test_prevalences.txt'
folder = F'../Results_CODALAB_2024/extracted/TASK_{TASK}'
method_files = glob.glob(f"{folder}/*.csv")
desired_order = desired_order_dict[TASK]
# load the true values (sentiment prevalence, domain prevalence)
true_id, true_prevs = load_result_file(true_prevs_path)
# define the loss for evaluation
error_name = 'RAE'
error_log = False
if error_name == 'RAE':
err_function_ = qp.error.rae
elif error_name == 'AE':
err_function_ = qp.error.ae
else:
raise ValueError()
if error_log:
error_name = f'log({error_name})'
err_function = lambda x,y: np.log(err_function_(x,y))
else:
err_function = err_function_
#train_prevalence = fetch_lequa2024(task=f'T{TASK}', data_home='./data')
train = LabelledCollection.load(f'../data/lequa2024/T{TASK}/public/training_data.txt', loader_func=load_vector_documents)
train_prev = train.prevalence()
#train_prev = np.tile(train_prev, (len(true_id),1))
from quapy.plot import error_by_drift, binary_diagonal
# load the participant and baseline results
method_names, estim_prevs = [], []
for method_file in method_files:
method_name = Path(method_file).name.replace('.csv', '')
# if method_name in exclude_methods:
# continue
id, method_prevs = load_result_file(join(folder, method_name+'.csv'))
assert (true_id == id).all(), f'unmatched files for {method_file}'
method_name = method_names_nice.get(method_name, method_name)
if method_name not in desired_order:
print(f'method {method_name} unknown')
raise ValueError()
method_names.append(method_name)
estim_prevs.append(method_prevs)
plt.rcParams['figure.figsize'] = [14, 6]
plt.rcParams['figure.dpi'] = 200
plt.rcParams['font.size'] = 15
true_prevs = [true_prevs]*len(method_names)
savepath = f'./t{TASK}_diagonal.png'
if TASK in [1,4]:
binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=None, show_std=True, legend=True,
train_prev=train.prevalence(), savepath=savepath, method_order=desired_order)
box_to_ancor={
1: (0.88,0.1),
2: (0.9,0.15),
4: (0.9, 0.15),
}
tr_prevs =[train.prevalence()]*len(method_names)
savepath = f'./t{TASK}_{error_name}_pps.png'
binary=TASK in [1,4]
if binary:
print(f'{TASK=} has positive prevalence = {train.prevalence()[1]}')
error_by_drift(method_names,
true_prevs,
estim_prevs,
tr_prevs,
title=None,
y_error_name='rae',
x_error_name='bias_binary' if binary else 'ae',
x_axis_title=f'PPS between training set and test sample (in terms of bias)' if binary else None,
show_std=False,
n_bins=25,
logscale=True if binary else False,
show_density=True,
method_order=desired_order,
vlines=list(train.prevalence()) if binary else None,
bbox_to_anchor=box_to_ancor[TASK],
savepath=savepath)

View File

@ -14,7 +14,7 @@ for facilitating the analysis and interpretation of the experimental results.
### Last updates:
* Version 0.1.8 is released! major changes can be consulted [here](CHANGE_LOG.txt).
* A detailed wiki is available [here](https://github.com/HLT-ISTI/QuaPy/wiki)
* A detailed documentation is now available [here](https://hlt-isti.github.io/QuaPy/)
* The developer API documentation is available [here](https://hlt-isti.github.io/QuaPy/build/html/modules.html)
### Installation
@ -78,12 +78,10 @@ quantification methods based on structured output learning, HDy, QuaNet, quantif
* Versatile functionality for performing evaluation based on sampling generation protocols (e.g., APP, NPP, etc.).
* Implementation of most commonly used evaluation metrics (e.g., AE, RAE, NAE, NRAE, SE, KLD, NKLD, etc.).
* Datasets frequently used in quantification (textual and numeric), including:
* 32 UCI Machine Learning binary datasets.
* 5 UCI Machine Learning multiclass datasets (_new in v0.1.8!_).
* 32 UCI Machine Learning datasets.
* 11 Twitter quantification-by-sentiment datasets.
* 3 product reviews quantification-by-sentiment datasets.
* 4 tasks from LeQua competition (_new in v0.1.7!_)
* IFCB dataset of plankton water samples (_new in v0.1.8!_).
* Native support for binary and single-label multiclass quantification scenarios.
* Model selection functionality that minimizes quantification-oriented loss functions.
* Visualization tools for analysing the experimental results.
@ -97,7 +95,6 @@ quantification methods based on structured output learning, HDy, QuaNet, quantif
* tqdm
* pandas, xlrd
* matplotlib
* ucimlrepo
## Contributing
@ -105,7 +102,7 @@ In case you want to contribute improvements to quapy, please generate pull reque
## Documentation
The developer API documentation is available [here](https://hlt-isti.github.io/QuaPy/build/html/index.html).
The [developer API documentation](https://hlt-isti.github.io/QuaPy/build/html/modules.html) is available [here](https://hlt-isti.github.io/QuaPy/build/html/index.html).
Check out our [Wiki](https://github.com/HLT-ISTI/QuaPy/wiki), in which many examples
are provided:
@ -120,10 +117,4 @@ are provided:
## Acknowledgments:
<img src="logo/SoBigData.png" alt="SoBigData++" width="250"/>
<img src="logo/LogoQuaDaSh.png" alt="QuaDaSh" width="250"/>
<img src="logo/NextGenerationEU.jpg" alt="QuaDaSh" width="250"/>
<img src="SoBigData.png" alt="SoBigData++" width="250"/>

View File

Before

Width:  |  Height:  |  Size: 128 KiB

After

Width:  |  Height:  |  Size: 128 KiB

View File

@ -1,20 +0,0 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

View File

@ -1,123 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Overview: module code &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../_static/jquery.js?v=5d32c60e"></script>
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../_static/documentation_options.js?v=22607128"></script>
<script src="../_static/doctools.js?v=9a2dae69"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">Overview: module code</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>All modules for which code is available</h1>
<ul><li><a href="quapy/classification/calibration.html">quapy.classification.calibration</a></li>
<li><a href="quapy/classification/methods.html">quapy.classification.methods</a></li>
<li><a href="quapy/classification/neural.html">quapy.classification.neural</a></li>
<li><a href="quapy/classification/svmperf.html">quapy.classification.svmperf</a></li>
<li><a href="quapy/data/base.html">quapy.data.base</a></li>
<li><a href="quapy/data/datasets.html">quapy.data.datasets</a></li>
<li><a href="quapy/data/preprocessing.html">quapy.data.preprocessing</a></li>
<li><a href="quapy/data/reader.html">quapy.data.reader</a></li>
<li><a href="quapy/error.html">quapy.error</a></li>
<li><a href="quapy/evaluation.html">quapy.evaluation</a></li>
<li><a href="quapy/functional.html">quapy.functional</a></li>
<li><a href="quapy/method/_kdey.html">quapy.method._kdey</a></li>
<li><a href="quapy/method/_neural.html">quapy.method._neural</a></li>
<li><a href="quapy/method/_threshold_optim.html">quapy.method._threshold_optim</a></li>
<li><a href="quapy/method/aggregative.html">quapy.method.aggregative</a></li>
<li><a href="quapy/method/base.html">quapy.method.base</a></li>
<li><a href="quapy/method/meta.html">quapy.method.meta</a></li>
<li><a href="quapy/method/non_aggregative.html">quapy.method.non_aggregative</a></li>
<li><a href="quapy/model_selection.html">quapy.model_selection</a></li>
<li><a href="quapy/plot.html">quapy.plot</a></li>
<li><a href="quapy/protocol.html">quapy.protocol</a></li>
<li><a href="quapy/util.html">quapy.util</a></li>
</ul>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,351 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.classification.calibration &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.classification.calibration</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.classification.calibration</h1><div class="highlight"><pre>
<span></span><span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
<span class="kn">from</span> <span class="nn">abstention.calibration</span> <span class="kn">import</span> <span class="n">NoBiasVectorScaling</span><span class="p">,</span> <span class="n">TempScaling</span><span class="p">,</span> <span class="n">VectorScaling</span>
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">clone</span>
<span class="kn">from</span> <span class="nn">sklearn.model_selection</span> <span class="kn">import</span> <span class="n">cross_val_predict</span><span class="p">,</span> <span class="n">train_test_split</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="c1"># Wrappers of calibration defined by Alexandari et al. in paper &lt;http://proceedings.mlr.press/v119/alexandari20a.html&gt;</span>
<span class="c1"># requires &quot;pip install abstension&quot;</span>
<span class="c1"># see https://github.com/kundajelab/abstention</span>
<div class="viewcode-block" id="RecalibratedProbabilisticClassifier">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifier">[docs]</a>
<span class="k">class</span> <span class="nc">RecalibratedProbabilisticClassifier</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Abstract class for (re)calibration method from `abstention.calibration`, as defined in</span>
<span class="sd"> `Alexandari, A., Kundaje, A., &amp; Shrikumar, A. (2020, November). Maximum likelihood with bias-corrected calibration</span>
<span class="sd"> is hard-to-beat at label shift adaptation. In International Conference on Machine Learning (pp. 222-232). PMLR.</span>
<span class="sd"> &lt;http://proceedings.mlr.press/v119/alexandari20a.html&gt;`_:</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">pass</span></div>
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase">[docs]</a>
<span class="k">class</span> <span class="nc">RecalibratedProbabilisticClassifierBase</span><span class="p">(</span><span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">RecalibratedProbabilisticClassifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Applies a (re)calibration method from `abstention.calibration`, as defined in</span>
<span class="sd"> `Alexandari et al. paper &lt;http://proceedings.mlr.press/v119/alexandari20a.html&gt;`_.</span>
<span class="sd"> :param classifier: a scikit-learn probabilistic classifier</span>
<span class="sd"> :param calibrator: the calibration object (an instance of abstention.calibration.CalibratorFactory)</span>
<span class="sd"> :param val_split: indicate an integer k for performing kFCV to obtain the posterior probabilities, or a float p</span>
<span class="sd"> in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the</span>
<span class="sd"> training instances (the rest is used for training). In any case, the classifier is retrained in the whole</span>
<span class="sd"> training set afterwards. Default value is 5.</span>
<span class="sd"> :param n_jobs: indicate the number of parallel workers (only when val_split is an integer); default=None</span>
<span class="sd"> :param verbose: whether or not to display information in the standard output</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">calibrator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span> <span class="o">=</span> <span class="n">calibrator</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span>
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase.fit">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit">[docs]</a>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Fits the calibration for the probabilistic classifier.</span>
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the data instances</span>
<span class="sd"> :param y: array-like of shape `(n_samples,)` with the class labels</span>
<span class="sd"> :return: self</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">k</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">val_split</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="k">if</span> <span class="n">k</span> <span class="o">&lt;</span> <span class="mi">2</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;wrong value for val_split: the number of folds must be &gt; 2&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fit_cv</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="mi">0</span> <span class="o">&lt;</span> <span class="n">k</span> <span class="o">&lt;</span> <span class="mi">1</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;wrong value for val_split: the proportion of validation documents must be in (0,1)&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fit_tr_val</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span></div>
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase.fit_cv">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit_cv">[docs]</a>
<span class="k">def</span> <span class="nf">fit_cv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Fits the calibration in a cross-validation manner, i.e., it generates posterior probabilities for all</span>
<span class="sd"> training instances via cross-validation, and then retrains the classifier on all training instances.</span>
<span class="sd"> The posterior probabilities thus generated are used for calibrating the outputs of the classifier.</span>
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the data instances</span>
<span class="sd"> :param y: array-like of shape `(n_samples,)` with the class labels</span>
<span class="sd"> :return: self</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">posteriors</span> <span class="o">=</span> <span class="n">cross_val_predict</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">cv</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">val_split</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">&#39;predict_proba&#39;</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="n">nclasses</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">eye</span><span class="p">(</span><span class="n">nclasses</span><span class="p">)[</span><span class="n">y</span><span class="p">],</span> <span class="n">posterior_supplied</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase.fit_tr_val">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit_tr_val">[docs]</a>
<span class="k">def</span> <span class="nf">fit_tr_val</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Fits the calibration in a train/val-split manner, i.e.t, it partitions the training instances into a</span>
<span class="sd"> training and a validation set, and then uses the training samples to learn classifier which is then used</span>
<span class="sd"> to generate posterior probabilities for the held-out validation data. These posteriors are used to calibrate</span>
<span class="sd"> the classifier. The classifier is not retrained on the whole dataset.</span>
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the data instances</span>
<span class="sd"> :param y: array-like of shape `(n_samples,)` with the class labels</span>
<span class="sd"> :return: self</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">Xtr</span><span class="p">,</span> <span class="n">Xva</span><span class="p">,</span> <span class="n">ytr</span><span class="p">,</span> <span class="n">yva</span> <span class="o">=</span> <span class="n">train_test_split</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">test_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">val_split</span><span class="p">,</span> <span class="n">stratify</span><span class="o">=</span><span class="n">y</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">Xtr</span><span class="p">,</span> <span class="n">ytr</span><span class="p">)</span>
<span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">Xva</span><span class="p">)</span>
<span class="n">nclasses</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">yva</span><span class="p">))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">eye</span><span class="p">(</span><span class="n">nclasses</span><span class="p">)[</span><span class="n">yva</span><span class="p">],</span> <span class="n">posterior_supplied</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase.predict">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.predict">[docs]</a>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Predicts class labels for the data instances in `X`</span>
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the data instances</span>
<span class="sd"> :return: array-like of shape `(n_samples,)` with the class label predictions</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">X</span><span class="p">)</span></div>
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase.predict_proba">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.predict_proba">[docs]</a>
<span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Generates posterior probabilities for the data instances in `X`</span>
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the data instances</span>
<span class="sd"> :return: array-like of shape `(n_samples, n_classes)` with posterior probabilities</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span><span class="p">(</span><span class="n">posteriors</span><span class="p">)</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">classes_</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the classes on which the classifier has been trained on</span>
<span class="sd"> :return: array-like of shape `(n_classes)`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span></div>
<div class="viewcode-block" id="NBVSCalibration">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.NBVSCalibration">[docs]</a>
<span class="k">class</span> <span class="nc">NBVSCalibration</span><span class="p">(</span><span class="n">RecalibratedProbabilisticClassifierBase</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Applies the No-Bias Vector Scaling (NBVS) calibration method from `abstention.calibration`, as defined in</span>
<span class="sd"> `Alexandari et al. paper &lt;http://proceedings.mlr.press/v119/alexandari20a.html&gt;`_:</span>
<span class="sd"> :param classifier: a scikit-learn probabilistic classifier</span>
<span class="sd"> :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p</span>
<span class="sd"> in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the</span>
<span class="sd"> training instances (the rest is used for training). In any case, the classifier is retrained in the whole</span>
<span class="sd"> training set afterwards. Default value is 5.</span>
<span class="sd"> :param n_jobs: indicate the number of parallel workers (only when val_split is an integer)</span>
<span class="sd"> :param verbose: whether or not to display information in the standard output</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span> <span class="o">=</span> <span class="n">NoBiasVectorScaling</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span></div>
<div class="viewcode-block" id="BCTSCalibration">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.BCTSCalibration">[docs]</a>
<span class="k">class</span> <span class="nc">BCTSCalibration</span><span class="p">(</span><span class="n">RecalibratedProbabilisticClassifierBase</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Applies the Bias-Corrected Temperature Scaling (BCTS) calibration method from `abstention.calibration`, as defined in</span>
<span class="sd"> `Alexandari et al. paper &lt;http://proceedings.mlr.press/v119/alexandari20a.html&gt;`_:</span>
<span class="sd"> :param classifier: a scikit-learn probabilistic classifier</span>
<span class="sd"> :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p</span>
<span class="sd"> in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the</span>
<span class="sd"> training instances (the rest is used for training). In any case, the classifier is retrained in the whole</span>
<span class="sd"> training set afterwards. Default value is 5.</span>
<span class="sd"> :param n_jobs: indicate the number of parallel workers (only when val_split is an integer)</span>
<span class="sd"> :param verbose: whether or not to display information in the standard output</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span> <span class="o">=</span> <span class="n">TempScaling</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">,</span> <span class="n">bias_positions</span><span class="o">=</span><span class="s1">&#39;all&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span></div>
<div class="viewcode-block" id="TSCalibration">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.TSCalibration">[docs]</a>
<span class="k">class</span> <span class="nc">TSCalibration</span><span class="p">(</span><span class="n">RecalibratedProbabilisticClassifierBase</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Applies the Temperature Scaling (TS) calibration method from `abstention.calibration`, as defined in</span>
<span class="sd"> `Alexandari et al. paper &lt;http://proceedings.mlr.press/v119/alexandari20a.html&gt;`_:</span>
<span class="sd"> :param classifier: a scikit-learn probabilistic classifier</span>
<span class="sd"> :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p</span>
<span class="sd"> in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the</span>
<span class="sd"> training instances (the rest is used for training). In any case, the classifier is retrained in the whole</span>
<span class="sd"> training set afterwards. Default value is 5.</span>
<span class="sd"> :param n_jobs: indicate the number of parallel workers (only when val_split is an integer)</span>
<span class="sd"> :param verbose: whether or not to display information in the standard output</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span> <span class="o">=</span> <span class="n">TempScaling</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span></div>
<div class="viewcode-block" id="VSCalibration">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.VSCalibration">[docs]</a>
<span class="k">class</span> <span class="nc">VSCalibration</span><span class="p">(</span><span class="n">RecalibratedProbabilisticClassifierBase</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Applies the Vector Scaling (VS) calibration method from `abstention.calibration`, as defined in</span>
<span class="sd"> `Alexandari et al. paper &lt;http://proceedings.mlr.press/v119/alexandari20a.html&gt;`_:</span>
<span class="sd"> :param classifier: a scikit-learn probabilistic classifier</span>
<span class="sd"> :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p</span>
<span class="sd"> in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the</span>
<span class="sd"> training instances (the rest is used for training). In any case, the classifier is retrained in the whole</span>
<span class="sd"> training set afterwards. Default value is 5.</span>
<span class="sd"> :param n_jobs: indicate the number of parallel workers (only when val_split is an integer)</span>
<span class="sd"> :param verbose: whether or not to display information in the standard output</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span> <span class="o">=</span> <span class="n">VectorScaling</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,220 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.classification.methods &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.classification.methods</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.classification.methods</h1><div class="highlight"><pre>
<span></span><span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span>
<span class="kn">from</span> <span class="nn">sklearn.decomposition</span> <span class="kn">import</span> <span class="n">TruncatedSVD</span>
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
<div class="viewcode-block" id="LowRankLogisticRegression">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression">[docs]</a>
<span class="k">class</span> <span class="nc">LowRankLogisticRegression</span><span class="p">(</span><span class="n">BaseEstimator</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> An example of a classification method (i.e., an object that implements `fit`, `predict`, and `predict_proba`)</span>
<span class="sd"> that also generates embedded inputs (i.e., that implements `transform`), as those required for</span>
<span class="sd"> :class:`quapy.method.neural.QuaNet`. This is a mock method to allow for easily instantiating</span>
<span class="sd"> :class:`quapy.method.neural.QuaNet` on array-like real-valued instances.</span>
<span class="sd"> The transformation consists of applying :class:`sklearn.decomposition.TruncatedSVD`</span>
<span class="sd"> while classification is performed using :class:`sklearn.linear_model.LogisticRegression` on the low-rank space.</span>
<span class="sd"> :param n_components: the number of principal components to retain</span>
<span class="sd"> :param kwargs: parameters for the</span>
<span class="sd"> `Logistic Regression &lt;https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html&gt;`__ classifier</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n_components</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_components</span> <span class="o">=</span> <span class="n">n_components</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
<div class="viewcode-block" id="LowRankLogisticRegression.get_params">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.get_params">[docs]</a>
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Get hyper-parameters for this estimator.</span>
<span class="sd"> :return: a dictionary with parameter names mapped to their values</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">params</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;n_components&#39;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_components</span><span class="p">}</span>
<span class="n">params</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">())</span>
<span class="k">return</span> <span class="n">params</span></div>
<div class="viewcode-block" id="LowRankLogisticRegression.set_params">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.set_params">[docs]</a>
<span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">params</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Set the parameters of this estimator.</span>
<span class="sd"> :param parameters: a `**kwargs` dictionary with the estimator parameters for</span>
<span class="sd"> `Logistic Regression &lt;https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html&gt;`__</span>
<span class="sd"> and eventually also `n_components` for `TruncatedSVD`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">params_</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
<span class="k">if</span> <span class="s1">&#39;n_components&#39;</span> <span class="ow">in</span> <span class="n">params_</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_components</span> <span class="o">=</span> <span class="n">params_</span><span class="p">[</span><span class="s1">&#39;n_components&#39;</span><span class="p">]</span>
<span class="k">del</span> <span class="n">params_</span><span class="p">[</span><span class="s1">&#39;n_components&#39;</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params_</span><span class="p">)</span></div>
<div class="viewcode-block" id="LowRankLogisticRegression.fit">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.fit">[docs]</a>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Fit the model according to the given training data. The fit consists of</span>
<span class="sd"> fitting `TruncatedSVD` and then `LogisticRegression` on the low-rank representation.</span>
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the instances</span>
<span class="sd"> :param y: array-like of shape `(n_samples, n_classes)` with the class labels</span>
<span class="sd"> :return: `self`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">nF</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pca</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">nF</span> <span class="o">&gt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_components</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pca</span> <span class="o">=</span> <span class="n">TruncatedSVD</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n_components</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="LowRankLogisticRegression.predict">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.predict">[docs]</a>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Predicts labels for the instances `X` embedded into the low-rank space.</span>
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` instances to classify</span>
<span class="sd"> :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of</span>
<span class="sd"> instances in `X`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">X</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">X</span><span class="p">)</span></div>
<div class="viewcode-block" id="LowRankLogisticRegression.predict_proba">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.predict_proba">[docs]</a>
<span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Predicts posterior probabilities for the instances `X` embedded into the low-rank space.</span>
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` instances to classify</span>
<span class="sd"> :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">X</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">X</span><span class="p">)</span></div>
<div class="viewcode-block" id="LowRankLogisticRegression.transform">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.transform">[docs]</a>
<span class="k">def</span> <span class="nf">transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the low-rank approximation of `X` with `n_components` dimensions, or `X` unaltered if</span>
<span class="sd"> `n_components` &gt;= `X.shape[1]`.</span>
<span class="sd"> </span>
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` instances to embed</span>
<span class="sd"> :return: array-like of shape `(n_samples, n_components)` with the embedded instances</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">pca</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="n">X</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">pca</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">X</span><span class="p">)</span></div>
</div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,715 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.classification.neural &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.classification.neural</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.classification.neural</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">os</span>
<span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABCMeta</span><span class="p">,</span> <span class="n">abstractmethod</span>
<span class="kn">from</span> <span class="nn">pathlib</span> <span class="kn">import</span> <span class="n">Path</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">torch</span>
<span class="kn">import</span> <span class="nn">torch.nn</span> <span class="k">as</span> <span class="nn">nn</span>
<span class="kn">import</span> <span class="nn">torch.nn.functional</span> <span class="k">as</span> <span class="nn">F</span>
<span class="kn">from</span> <span class="nn">sklearn.metrics</span> <span class="kn">import</span> <span class="n">accuracy_score</span><span class="p">,</span> <span class="n">f1_score</span>
<span class="kn">from</span> <span class="nn">torch.nn.utils.rnn</span> <span class="kn">import</span> <span class="n">pad_sequence</span>
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">EarlyStop</span>
<div class="viewcode-block" id="NeuralClassifierTrainer">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer">[docs]</a>
<span class="k">class</span> <span class="nc">NeuralClassifierTrainer</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Trains a neural network for text classification.</span>
<span class="sd"> :param net: an instance of `TextClassifierNet` implementing the forward pass</span>
<span class="sd"> :param lr: learning rate (default 1e-3)</span>
<span class="sd"> :param weight_decay: weight decay (default 0)</span>
<span class="sd"> :param patience: number of epochs that do not show any improvement in validation</span>
<span class="sd"> to wait before applying early stop (default 10)</span>
<span class="sd"> :param epochs: maximum number of training epochs (default 200)</span>
<span class="sd"> :param batch_size: batch size for training (default 64)</span>
<span class="sd"> :param batch_size_test: batch size for test (default 512)</span>
<span class="sd"> :param padding_length: maximum number of tokens to consider in a document (default 300)</span>
<span class="sd"> :param device: specify &#39;cpu&#39; (default) or &#39;cuda&#39; for enabling gpu</span>
<span class="sd"> :param checkpointpath: where to store the parameters of the best model found so far</span>
<span class="sd"> according to the evaluation in the held-out validation split (default &#39;../checkpoint/classifier_net.dat&#39;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">net</span><span class="p">:</span> <span class="s1">&#39;TextClassifierNet&#39;</span><span class="p">,</span>
<span class="n">lr</span><span class="o">=</span><span class="mf">1e-3</span><span class="p">,</span>
<span class="n">weight_decay</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
<span class="n">patience</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
<span class="n">epochs</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span>
<span class="n">batch_size</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span>
<span class="n">batch_size_test</span><span class="o">=</span><span class="mi">512</span><span class="p">,</span>
<span class="n">padding_length</span><span class="o">=</span><span class="mi">300</span><span class="p">,</span>
<span class="n">device</span><span class="o">=</span><span class="s1">&#39;cuda&#39;</span><span class="p">,</span>
<span class="n">checkpointpath</span><span class="o">=</span><span class="s1">&#39;../checkpoint/classifier_net.dat&#39;</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">net</span><span class="p">,</span> <span class="n">TextClassifierNet</span><span class="p">),</span> <span class="sa">f</span><span class="s1">&#39;net is not an instance of </span><span class="si">{</span><span class="n">TextClassifierNet</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">net</span> <span class="o">=</span> <span class="n">net</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">vocab_size</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">vocabulary_size</span>
<span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span><span class="o">=</span><span class="p">{</span>
<span class="s1">&#39;lr&#39;</span><span class="p">:</span> <span class="n">lr</span><span class="p">,</span>
<span class="s1">&#39;weight_decay&#39;</span><span class="p">:</span> <span class="n">weight_decay</span><span class="p">,</span>
<span class="s1">&#39;patience&#39;</span><span class="p">:</span> <span class="n">patience</span><span class="p">,</span>
<span class="s1">&#39;epochs&#39;</span><span class="p">:</span> <span class="n">epochs</span><span class="p">,</span>
<span class="s1">&#39;batch_size&#39;</span><span class="p">:</span> <span class="n">batch_size</span><span class="p">,</span>
<span class="s1">&#39;batch_size_test&#39;</span><span class="p">:</span> <span class="n">batch_size_test</span><span class="p">,</span>
<span class="s1">&#39;padding_length&#39;</span><span class="p">:</span> <span class="n">padding_length</span><span class="p">,</span>
<span class="s1">&#39;device&#39;</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
<span class="p">}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">learner_hyperparams</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">get_params</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">checkpointpath</span> <span class="o">=</span> <span class="n">checkpointpath</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;[NeuralNetwork running on </span><span class="si">{</span><span class="n">device</span><span class="si">}</span><span class="s1">]&#39;</span><span class="p">)</span>
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">Path</span><span class="p">(</span><span class="n">checkpointpath</span><span class="p">)</span><span class="o">.</span><span class="n">parent</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<div class="viewcode-block" id="NeuralClassifierTrainer.reset_net_params">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.reset_net_params">[docs]</a>
<span class="k">def</span> <span class="nf">reset_net_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">vocab_size</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Reinitialize the network parameters</span>
<span class="sd"> :param vocab_size: the size of the vocabulary</span>
<span class="sd"> :param n_classes: the number of target classes</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">net</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="vm">__class__</span><span class="p">(</span><span class="n">vocab_size</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">learner_hyperparams</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">net</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span><span class="p">[</span><span class="s1">&#39;device&#39;</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">xavier_uniform</span><span class="p">()</span></div>
<div class="viewcode-block" id="NeuralClassifierTrainer.get_params">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.get_params">[docs]</a>
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Get hyper-parameters for this estimator</span>
<span class="sd"> :return: a dictionary with parameter names mapped to their values</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">{</span><span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">get_params</span><span class="p">(),</span> <span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span><span class="p">}</span></div>
<div class="viewcode-block" id="NeuralClassifierTrainer.set_params">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.set_params">[docs]</a>
<span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">params</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Set the parameters of this trainer and the learner it is training.</span>
<span class="sd"> In this current version, parameter names for the trainer and learner should</span>
<span class="sd"> be disjoint.</span>
<span class="sd"> :param params: a `**kwargs` dictionary with the parameters</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">trainer_hyperparams</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span>
<span class="n">learner_hyperparams</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">get_params</span><span class="p">()</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">params</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">trainer_hyperparams</span> <span class="ow">and</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">learner_hyperparams</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;the use of parameter </span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s1"> is ambiguous since it can refer to &#39;</span>
<span class="sa">f</span><span class="s1">&#39;a parameters of the Trainer or the learner </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">trainer_hyperparams</span> <span class="ow">and</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">learner_hyperparams</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;parameter </span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s1"> is not valid&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">trainer_hyperparams</span><span class="p">:</span>
<span class="n">trainer_hyperparams</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">val</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">learner_hyperparams</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">val</span>
<span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span> <span class="o">=</span> <span class="n">trainer_hyperparams</span>
<span class="bp">self</span><span class="o">.</span><span class="n">learner_hyperparams</span> <span class="o">=</span> <span class="n">learner_hyperparams</span> </div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">device</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot; Gets the device in which the network is allocated</span>
<span class="sd"> :return: device</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">next</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">parameters</span><span class="p">())</span><span class="o">.</span><span class="n">device</span>
<span class="k">def</span> <span class="nf">_train_epoch</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
<span class="n">criterion</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">CrossEntropyLoss</span><span class="p">()</span>
<span class="n">losses</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">true_labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[],</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">xi</span><span class="p">,</span> <span class="n">yi</span> <span class="ow">in</span> <span class="n">data</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span>
<span class="n">logits</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="n">xi</span><span class="p">)</span>
<span class="n">loss</span> <span class="o">=</span> <span class="n">criterion</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">yi</span><span class="p">)</span>
<span class="n">loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
<span class="n">losses</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">loss</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
<span class="n">preds</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
<span class="n">status</span><span class="p">[</span><span class="s2">&quot;loss&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">losses</span><span class="p">)</span>
<span class="n">predictions</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">preds</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
<span class="n">true_labels</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">yi</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
<span class="n">status</span><span class="p">[</span><span class="s2">&quot;acc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">accuracy_score</span><span class="p">(</span><span class="n">true_labels</span><span class="p">,</span> <span class="n">predictions</span><span class="p">)</span>
<span class="n">status</span><span class="p">[</span><span class="s2">&quot;f1&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">f1_score</span><span class="p">(</span><span class="n">true_labels</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">average</span><span class="o">=</span><span class="s1">&#39;macro&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">__update_progress_bar</span><span class="p">(</span><span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_test_epoch</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
<span class="n">criterion</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">CrossEntropyLoss</span><span class="p">()</span>
<span class="n">losses</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">true_labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[],</span> <span class="p">[]</span>
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
<span class="k">for</span> <span class="n">xi</span><span class="p">,</span> <span class="n">yi</span> <span class="ow">in</span> <span class="n">data</span><span class="p">:</span>
<span class="n">logits</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="n">xi</span><span class="p">)</span>
<span class="n">loss</span> <span class="o">=</span> <span class="n">criterion</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">yi</span><span class="p">)</span>
<span class="n">losses</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">loss</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
<span class="n">preds</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
<span class="n">predictions</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">preds</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
<span class="n">true_labels</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">yi</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
<span class="n">status</span><span class="p">[</span><span class="s2">&quot;loss&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">losses</span><span class="p">)</span>
<span class="n">status</span><span class="p">[</span><span class="s2">&quot;acc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">accuracy_score</span><span class="p">(</span><span class="n">true_labels</span><span class="p">,</span> <span class="n">predictions</span><span class="p">)</span>
<span class="n">status</span><span class="p">[</span><span class="s2">&quot;f1&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">f1_score</span><span class="p">(</span><span class="n">true_labels</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">average</span><span class="o">=</span><span class="s1">&#39;macro&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">__update_progress_bar</span><span class="p">(</span><span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">__update_progress_bar</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">):</span>
<span class="n">pbar</span><span class="o">.</span><span class="n">set_description</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;[</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">] training epoch=</span><span class="si">{</span><span class="n">epoch</span><span class="si">}</span><span class="s1"> &#39;</span>
<span class="sa">f</span><span class="s1">&#39;tr-loss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">&quot;tr&quot;</span><span class="p">][</span><span class="s2">&quot;loss&quot;</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> &#39;</span>
<span class="sa">f</span><span class="s1">&#39;tr-acc=</span><span class="si">{</span><span class="mi">100</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">&quot;tr&quot;</span><span class="p">][</span><span class="s2">&quot;acc&quot;</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">% &#39;</span>
<span class="sa">f</span><span class="s1">&#39;tr-macroF1=</span><span class="si">{</span><span class="mi">100</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">&quot;tr&quot;</span><span class="p">][</span><span class="s2">&quot;f1&quot;</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">% &#39;</span>
<span class="sa">f</span><span class="s1">&#39;patience=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="o">.</span><span class="n">patience</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="o">.</span><span class="n">PATIENCE_LIMIT</span><span class="si">}</span><span class="s1"> &#39;</span>
<span class="sa">f</span><span class="s1">&#39;val-loss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">&quot;va&quot;</span><span class="p">][</span><span class="s2">&quot;loss&quot;</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> &#39;</span>
<span class="sa">f</span><span class="s1">&#39;val-acc=</span><span class="si">{</span><span class="mi">100</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">&quot;va&quot;</span><span class="p">][</span><span class="s2">&quot;acc&quot;</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">% &#39;</span>
<span class="sa">f</span><span class="s1">&#39;macroF1=</span><span class="si">{</span><span class="mi">100</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">&quot;va&quot;</span><span class="p">][</span><span class="s2">&quot;f1&quot;</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">%&#39;</span><span class="p">)</span>
<div class="viewcode-block" id="NeuralClassifierTrainer.fit">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.fit">[docs]</a>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">,</span> <span class="n">labels</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mf">0.3</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Fits the model according to the given training data.</span>
<span class="sd"> :param instances: list of lists of indexed tokens</span>
<span class="sd"> :param labels: array-like of shape `(n_samples, n_classes)` with the class labels</span>
<span class="sd"> :param val_split: proportion of training documents to be taken as the validation set (default 0.3)</span>
<span class="sd"> :return:</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">train</span><span class="p">,</span> <span class="n">val</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">instances</span><span class="p">,</span> <span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mi">1</span><span class="o">-</span><span class="n">val_split</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span> <span class="o">=</span> <span class="n">train</span><span class="o">.</span><span class="n">classes_</span>
<span class="n">opt</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span>
<span class="n">checkpoint</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">checkpointpath</span>
<span class="bp">self</span><span class="o">.</span><span class="n">reset_net_params</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vocab_size</span><span class="p">,</span> <span class="n">train</span><span class="o">.</span><span class="n">n_classes</span><span class="p">)</span>
<span class="n">train_generator</span> <span class="o">=</span> <span class="n">TorchDataset</span><span class="p">(</span><span class="n">train</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">train</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">asDataloader</span><span class="p">(</span>
<span class="n">opt</span><span class="p">[</span><span class="s1">&#39;batch_size&#39;</span><span class="p">],</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">pad_length</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">&#39;padding_length&#39;</span><span class="p">],</span> <span class="n">device</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">&#39;device&#39;</span><span class="p">])</span>
<span class="n">valid_generator</span> <span class="o">=</span> <span class="n">TorchDataset</span><span class="p">(</span><span class="n">val</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">val</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">asDataloader</span><span class="p">(</span>
<span class="n">opt</span><span class="p">[</span><span class="s1">&#39;batch_size_test&#39;</span><span class="p">],</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">pad_length</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">&#39;padding_length&#39;</span><span class="p">],</span> <span class="n">device</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">&#39;device&#39;</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">status</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;tr&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;loss&#39;</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;acc&#39;</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;f1&#39;</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">},</span>
<span class="s1">&#39;va&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;loss&#39;</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;acc&#39;</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;f1&#39;</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">}}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">Adam</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">parameters</span><span class="p">(),</span> <span class="n">lr</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">&#39;lr&#39;</span><span class="p">],</span> <span class="n">weight_decay</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">&#39;weight_decay&#39;</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span> <span class="o">=</span> <span class="n">EarlyStop</span><span class="p">(</span><span class="n">opt</span><span class="p">[</span><span class="s1">&#39;patience&#39;</span><span class="p">],</span> <span class="n">lower_is_better</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="k">with</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">opt</span><span class="p">[</span><span class="s1">&#39;epochs&#39;</span><span class="p">]</span> <span class="o">+</span> <span class="mi">1</span><span class="p">))</span> <span class="k">as</span> <span class="n">pbar</span><span class="p">:</span>
<span class="k">for</span> <span class="n">epoch</span> <span class="ow">in</span> <span class="n">pbar</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_train_epoch</span><span class="p">(</span><span class="n">train_generator</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">&#39;tr&#39;</span><span class="p">],</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_test_epoch</span><span class="p">(</span><span class="n">valid_generator</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">&#39;va&#39;</span><span class="p">],</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">&#39;va&#39;</span><span class="p">][</span><span class="s1">&#39;f1&#39;</span><span class="p">],</span> <span class="n">epoch</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="o">.</span><span class="n">IMPROVED</span><span class="p">:</span>
<span class="n">torch</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">state_dict</span><span class="p">(),</span> <span class="n">checkpoint</span><span class="p">)</span>
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="o">.</span><span class="n">STOP</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;training ended by patience exhasted; loading best model parameters in </span><span class="si">{</span><span class="n">checkpoint</span><span class="si">}</span><span class="s1"> &#39;</span>
<span class="sa">f</span><span class="s1">&#39;for epoch </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="o">.</span><span class="n">best_epoch</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">load_state_dict</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">checkpoint</span><span class="p">))</span>
<span class="k">break</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;performing one training pass over the validation set...&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_train_epoch</span><span class="p">(</span><span class="n">valid_generator</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">&#39;tr&#39;</span><span class="p">],</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;[done]&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="NeuralClassifierTrainer.predict">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.predict">[docs]</a>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Predicts labels for the instances</span>
<span class="sd"> :param instances: list of lists of indexed tokens</span>
<span class="sd"> :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of</span>
<span class="sd"> instances in `X`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">instances</span><span class="p">),</span> <span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></div>
<div class="viewcode-block" id="NeuralClassifierTrainer.predict_proba">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.predict_proba">[docs]</a>
<span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Predicts posterior probabilities for the instances</span>
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` instances to classify</span>
<span class="sd"> :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
<span class="n">opt</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span>
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
<span class="n">posteriors</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">xi</span> <span class="ow">in</span> <span class="n">TorchDataset</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span><span class="o">.</span><span class="n">asDataloader</span><span class="p">(</span>
<span class="n">opt</span><span class="p">[</span><span class="s1">&#39;batch_size_test&#39;</span><span class="p">],</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">pad_length</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">&#39;padding_length&#39;</span><span class="p">],</span> <span class="n">device</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">&#39;device&#39;</span><span class="p">]):</span>
<span class="n">posteriors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">xi</span><span class="p">))</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">(</span><span class="n">posteriors</span><span class="p">)</span></div>
<div class="viewcode-block" id="NeuralClassifierTrainer.transform">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.transform">[docs]</a>
<span class="k">def</span> <span class="nf">transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the embeddings of the instances</span>
<span class="sd"> :param instances: list of lists of indexed tokens</span>
<span class="sd"> :return: array-like of shape `(n_samples, embed_size)` with the embedded instances,</span>
<span class="sd"> where `embed_size` is defined by the classification network</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
<span class="n">embeddings</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">opt</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span>
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
<span class="k">for</span> <span class="n">xi</span> <span class="ow">in</span> <span class="n">TorchDataset</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span><span class="o">.</span><span class="n">asDataloader</span><span class="p">(</span>
<span class="n">opt</span><span class="p">[</span><span class="s1">&#39;batch_size_test&#39;</span><span class="p">],</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">pad_length</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">&#39;padding_length&#39;</span><span class="p">],</span> <span class="n">device</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">&#39;device&#39;</span><span class="p">]):</span>
<span class="n">embeddings</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">document_embedding</span><span class="p">(</span><span class="n">xi</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">())</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">(</span><span class="n">embeddings</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="TorchDataset">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TorchDataset">[docs]</a>
<span class="k">class</span> <span class="nc">TorchDataset</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">Dataset</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Transforms labelled instances into a Torch&#39;s :class:`torch.utils.data.DataLoader` object</span>
<span class="sd"> :param instances: list of lists of indexed tokens</span>
<span class="sd"> :param labels: array-like of shape `(n_samples, n_classes)` with the class labels</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">instances</span>
<span class="bp">self</span><span class="o">.</span><span class="n">labels</span> <span class="o">=</span> <span class="n">labels</span>
<span class="k">def</span> <span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
<span class="k">return</span> <span class="p">{</span><span class="s1">&#39;doc&#39;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">[</span><span class="n">index</span><span class="p">],</span> <span class="s1">&#39;label&#39;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">[</span><span class="n">index</span><span class="p">]</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="kc">None</span><span class="p">}</span>
<div class="viewcode-block" id="TorchDataset.asDataloader">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TorchDataset.asDataloader">[docs]</a>
<span class="k">def</span> <span class="nf">asDataloader</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">batch_size</span><span class="p">,</span> <span class="n">shuffle</span><span class="p">,</span> <span class="n">pad_length</span><span class="p">,</span> <span class="n">device</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts the labelled collection into a Torch DataLoader with dynamic padding for</span>
<span class="sd"> the batch</span>
<span class="sd"> :param batch_size: batch size</span>
<span class="sd"> :param shuffle: whether or not to shuffle instances</span>
<span class="sd"> :param pad_length: the maximum length for the list of tokens (dynamic padding is</span>
<span class="sd"> applied, meaning that if the longest document in the batch is shorter than</span>
<span class="sd"> `pad_length`, then the batch is padded up to its length, and not to `pad_length`.</span>
<span class="sd"> :param device: whether to allocate tensors in cpu or in cuda</span>
<span class="sd"> :return: a :class:`torch.utils.data.DataLoader` object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">collate</span><span class="p">(</span><span class="n">batch</span><span class="p">):</span>
<span class="n">data</span> <span class="o">=</span> <span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">LongTensor</span><span class="p">(</span><span class="n">item</span><span class="p">[</span><span class="s1">&#39;doc&#39;</span><span class="p">][:</span><span class="n">pad_length</span><span class="p">])</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">batch</span><span class="p">]</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">pad_sequence</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">batch_first</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">padding_value</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;PAD_INDEX&#39;</span><span class="p">])</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
<span class="n">targets</span> <span class="o">=</span> <span class="p">[</span><span class="n">item</span><span class="p">[</span><span class="s1">&#39;label&#39;</span><span class="p">]</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">batch</span><span class="p">]</span>
<span class="k">if</span> <span class="n">targets</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="n">data</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">targets</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">as_tensor</span><span class="p">(</span><span class="n">targets</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">long</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
<span class="k">return</span> <span class="p">[</span><span class="n">data</span><span class="p">,</span> <span class="n">targets</span><span class="p">]</span>
<span class="n">torchDataset</span> <span class="o">=</span> <span class="n">TorchDataset</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span>
<span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">DataLoader</span><span class="p">(</span><span class="n">torchDataset</span><span class="p">,</span> <span class="n">batch_size</span><span class="o">=</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="n">shuffle</span><span class="p">,</span> <span class="n">collate_fn</span><span class="o">=</span><span class="n">collate</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="TextClassifierNet">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet">[docs]</a>
<span class="k">class</span> <span class="nc">TextClassifierNet</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">,</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Abstract Text classifier (`torch.nn.Module`)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="TextClassifierNet.document_embedding">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.document_embedding">[docs]</a>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">document_embedding</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Embeds documents (i.e., performs the forward pass up to the</span>
<span class="sd"> next-to-last layer).</span>
<span class="sd"> :param x: a batch of instances, typically generated by a torch&#39;s `DataLoader`</span>
<span class="sd"> instance (see :class:`quapy.classification.neural.TorchDataset`)</span>
<span class="sd"> :return: a torch tensor of shape `(n_samples, n_dimensions)`, where</span>
<span class="sd"> `n_samples` is the number of documents, and `n_dimensions` is the</span>
<span class="sd"> dimensionality of the embedding</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="o">...</span></div>
<div class="viewcode-block" id="TextClassifierNet.forward">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.forward">[docs]</a>
<span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Performs the forward pass.</span>
<span class="sd"> :param x: a batch of instances, typically generated by a torch&#39;s `DataLoader`</span>
<span class="sd"> instance (see :class:`quapy.classification.neural.TorchDataset`)</span>
<span class="sd"> :return: a tensor of shape `(n_instances, n_classes)` with the decision scores</span>
<span class="sd"> for each of the instances and classes</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">doc_embedded</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">document_embedding</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">output</span><span class="p">(</span><span class="n">doc_embedded</span><span class="p">)</span></div>
<div class="viewcode-block" id="TextClassifierNet.dimensions">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.dimensions">[docs]</a>
<span class="k">def</span> <span class="nf">dimensions</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Gets the number of dimensions of the embedding space</span>
<span class="sd"> :return: integer</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dim</span></div>
<div class="viewcode-block" id="TextClassifierNet.predict_proba">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.predict_proba">[docs]</a>
<span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Predicts posterior probabilities for the instances in `x`</span>
<span class="sd"> :param x: a torch tensor of indexed tokens with shape `(n_instances, pad_length)`</span>
<span class="sd"> where `n_instances` is the number of instances in the batch, and `pad_length`</span>
<span class="sd"> is length of the pad in the batch</span>
<span class="sd"> :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">logits</span> <span class="o">=</span> <span class="bp">self</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">dim</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span></div>
<div class="viewcode-block" id="TextClassifierNet.xavier_uniform">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.xavier_uniform">[docs]</a>
<span class="k">def</span> <span class="nf">xavier_uniform</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Performs Xavier initialization of the network parameters</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">parameters</span><span class="p">():</span>
<span class="k">if</span> <span class="n">p</span><span class="o">.</span><span class="n">dim</span><span class="p">()</span> <span class="o">&gt;</span> <span class="mi">1</span> <span class="ow">and</span> <span class="n">p</span><span class="o">.</span><span class="n">requires_grad</span><span class="p">:</span>
<span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">xavier_uniform_</span><span class="p">(</span><span class="n">p</span><span class="p">)</span></div>
<div class="viewcode-block" id="TextClassifierNet.get_params">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.get_params">[docs]</a>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Get hyper-parameters for this estimator</span>
<span class="sd"> :return: a dictionary with parameter names mapped to their values</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="o">...</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">vocabulary_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the size of the vocabulary</span>
<span class="sd"> :return: integer</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="o">...</span></div>
<div class="viewcode-block" id="LSTMnet">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.LSTMnet">[docs]</a>
<span class="k">class</span> <span class="nc">LSTMnet</span><span class="p">(</span><span class="n">TextClassifierNet</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> An implementation of :class:`quapy.classification.neural.TextClassifierNet` based on</span>
<span class="sd"> Long Short Term Memory networks.</span>
<span class="sd"> :param vocabulary_size: the size of the vocabulary</span>
<span class="sd"> :param n_classes: number of target classes</span>
<span class="sd"> :param embedding_size: the dimensionality of the word embeddings space (default 100)</span>
<span class="sd"> :param hidden_size: the dimensionality of the hidden space (default 256)</span>
<span class="sd"> :param repr_size: the dimensionality of the document embeddings space (default 100)</span>
<span class="sd"> :param lstm_class_nlayers: number of LSTM layers (default 1)</span>
<span class="sd"> :param drop_p: drop probability for dropout (default 0.5)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">vocabulary_size</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">embedding_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">hidden_size</span><span class="o">=</span><span class="mi">256</span><span class="p">,</span> <span class="n">repr_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">lstm_class_nlayers</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
<span class="n">drop_p</span><span class="o">=</span><span class="mf">0.5</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size_</span> <span class="o">=</span> <span class="n">vocabulary_size</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span> <span class="o">=</span> <span class="n">n_classes</span>
<span class="bp">self</span><span class="o">.</span><span class="n">hyperparams</span><span class="o">=</span><span class="p">{</span>
<span class="s1">&#39;embedding_size&#39;</span><span class="p">:</span> <span class="n">embedding_size</span><span class="p">,</span>
<span class="s1">&#39;hidden_size&#39;</span><span class="p">:</span> <span class="n">hidden_size</span><span class="p">,</span>
<span class="s1">&#39;repr_size&#39;</span><span class="p">:</span> <span class="n">repr_size</span><span class="p">,</span>
<span class="s1">&#39;lstm_class_nlayers&#39;</span><span class="p">:</span> <span class="n">lstm_class_nlayers</span><span class="p">,</span>
<span class="s1">&#39;drop_p&#39;</span><span class="p">:</span> <span class="n">drop_p</span>
<span class="p">}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">word_embedding</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="p">(</span><span class="n">vocabulary_size</span><span class="p">,</span> <span class="n">embedding_size</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">lstm</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">LSTM</span><span class="p">(</span><span class="n">embedding_size</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="n">lstm_class_nlayers</span><span class="p">,</span> <span class="n">dropout</span><span class="o">=</span><span class="n">drop_p</span><span class="p">,</span> <span class="n">batch_first</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">dropout</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Dropout</span><span class="p">(</span><span class="n">drop_p</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">dim</span> <span class="o">=</span> <span class="n">repr_size</span>
<span class="bp">self</span><span class="o">.</span><span class="n">doc_embedder</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">hidden_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dim</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">output</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dim</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">__init_hidden</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">set_size</span><span class="p">):</span>
<span class="n">opt</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hyperparams</span>
<span class="n">var_hidden</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">opt</span><span class="p">[</span><span class="s1">&#39;lstm_class_nlayers&#39;</span><span class="p">],</span> <span class="n">set_size</span><span class="p">,</span> <span class="n">opt</span><span class="p">[</span><span class="s1">&#39;hidden_size&#39;</span><span class="p">])</span>
<span class="n">var_cell</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">opt</span><span class="p">[</span><span class="s1">&#39;lstm_class_nlayers&#39;</span><span class="p">],</span> <span class="n">set_size</span><span class="p">,</span> <span class="n">opt</span><span class="p">[</span><span class="s1">&#39;hidden_size&#39;</span><span class="p">])</span>
<span class="k">if</span> <span class="nb">next</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lstm</span><span class="o">.</span><span class="n">parameters</span><span class="p">())</span><span class="o">.</span><span class="n">is_cuda</span><span class="p">:</span>
<span class="n">var_hidden</span><span class="p">,</span> <span class="n">var_cell</span> <span class="o">=</span> <span class="n">var_hidden</span><span class="o">.</span><span class="n">cuda</span><span class="p">(),</span> <span class="n">var_cell</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
<span class="k">return</span> <span class="n">var_hidden</span><span class="p">,</span> <span class="n">var_cell</span>
<div class="viewcode-block" id="LSTMnet.document_embedding">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.LSTMnet.document_embedding">[docs]</a>
<span class="k">def</span> <span class="nf">document_embedding</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Embeds documents (i.e., performs the forward pass up to the</span>
<span class="sd"> next-to-last layer).</span>
<span class="sd"> :param x: a batch of instances, typically generated by a torch&#39;s `DataLoader`</span>
<span class="sd"> instance (see :class:`quapy.classification.neural.TorchDataset`)</span>
<span class="sd"> :return: a torch tensor of shape `(n_samples, n_dimensions)`, where</span>
<span class="sd"> `n_samples` is the number of documents, and `n_dimensions` is the</span>
<span class="sd"> dimensionality of the embedding</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">embedded</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">word_embedding</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="n">rnn_output</span><span class="p">,</span> <span class="n">rnn_hidden</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lstm</span><span class="p">(</span><span class="n">embedded</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">__init_hidden</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">size</span><span class="p">()[</span><span class="mi">0</span><span class="p">]))</span>
<span class="n">abstracted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropout</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="n">rnn_hidden</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="o">-</span><span class="mi">1</span><span class="p">]))</span>
<span class="n">abstracted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_embedder</span><span class="p">(</span><span class="n">abstracted</span><span class="p">)</span>
<span class="k">return</span> <span class="n">abstracted</span></div>
<div class="viewcode-block" id="LSTMnet.get_params">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.LSTMnet.get_params">[docs]</a>
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Get hyper-parameters for this estimator</span>
<span class="sd"> :return: a dictionary with parameter names mapped to their values</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">hyperparams</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">vocabulary_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the size of the vocabulary</span>
<span class="sd"> :return: integer</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size_</span></div>
<div class="viewcode-block" id="CNNnet">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.CNNnet">[docs]</a>
<span class="k">class</span> <span class="nc">CNNnet</span><span class="p">(</span><span class="n">TextClassifierNet</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> An implementation of :class:`quapy.classification.neural.TextClassifierNet` based on</span>
<span class="sd"> Convolutional Neural Networks.</span>
<span class="sd"> :param vocabulary_size: the size of the vocabulary</span>
<span class="sd"> :param n_classes: number of target classes</span>
<span class="sd"> :param embedding_size: the dimensionality of the word embeddings space (default 100)</span>
<span class="sd"> :param hidden_size: the dimensionality of the hidden space (default 256)</span>
<span class="sd"> :param repr_size: the dimensionality of the document embeddings space (default 100)</span>
<span class="sd"> :param kernel_heights: list of kernel lengths (default [3,5,7]), i.e., the number of</span>
<span class="sd"> consecutive tokens that each kernel covers</span>
<span class="sd"> :param stride: convolutional stride (default 1)</span>
<span class="sd"> :param stride: convolutional pad (default 0)</span>
<span class="sd"> :param drop_p: drop probability for dropout (default 0.5)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">vocabulary_size</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">embedding_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">hidden_size</span><span class="o">=</span><span class="mi">256</span><span class="p">,</span> <span class="n">repr_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
<span class="n">kernel_heights</span><span class="o">=</span><span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">7</span><span class="p">],</span> <span class="n">stride</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">drop_p</span><span class="o">=</span><span class="mf">0.5</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">CNNnet</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size_</span> <span class="o">=</span> <span class="n">vocabulary_size</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span> <span class="o">=</span> <span class="n">n_classes</span>
<span class="bp">self</span><span class="o">.</span><span class="n">hyperparams</span><span class="o">=</span><span class="p">{</span>
<span class="s1">&#39;embedding_size&#39;</span><span class="p">:</span> <span class="n">embedding_size</span><span class="p">,</span>
<span class="s1">&#39;hidden_size&#39;</span><span class="p">:</span> <span class="n">hidden_size</span><span class="p">,</span>
<span class="s1">&#39;repr_size&#39;</span><span class="p">:</span> <span class="n">repr_size</span><span class="p">,</span>
<span class="s1">&#39;kernel_heights&#39;</span><span class="p">:</span><span class="n">kernel_heights</span><span class="p">,</span>
<span class="s1">&#39;stride&#39;</span><span class="p">:</span> <span class="n">stride</span><span class="p">,</span>
<span class="s1">&#39;drop_p&#39;</span><span class="p">:</span> <span class="n">drop_p</span>
<span class="p">}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">word_embedding</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="p">(</span><span class="n">vocabulary_size</span><span class="p">,</span> <span class="n">embedding_size</span><span class="p">)</span>
<span class="n">in_channels</span> <span class="o">=</span> <span class="mi">1</span>
<span class="bp">self</span><span class="o">.</span><span class="n">conv1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="p">(</span><span class="n">kernel_heights</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">embedding_size</span><span class="p">),</span> <span class="n">stride</span><span class="p">,</span> <span class="n">padding</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">conv2</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="p">(</span><span class="n">kernel_heights</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">embedding_size</span><span class="p">),</span> <span class="n">stride</span><span class="p">,</span> <span class="n">padding</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">conv3</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="p">(</span><span class="n">kernel_heights</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span> <span class="n">embedding_size</span><span class="p">),</span> <span class="n">stride</span><span class="p">,</span> <span class="n">padding</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">dropout</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Dropout</span><span class="p">(</span><span class="n">drop_p</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">dim</span> <span class="o">=</span> <span class="n">repr_size</span>
<span class="bp">self</span><span class="o">.</span><span class="n">doc_embedder</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">kernel_heights</span><span class="p">)</span> <span class="o">*</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dim</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">output</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dim</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">__conv_block</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="nb">input</span><span class="p">,</span> <span class="n">conv_layer</span><span class="p">):</span>
<span class="n">conv_out</span> <span class="o">=</span> <span class="n">conv_layer</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span> <span class="c1"># conv_out.size() = (batch_size, out_channels, dim, 1)</span>
<span class="n">activation</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="n">conv_out</span><span class="o">.</span><span class="n">squeeze</span><span class="p">(</span><span class="mi">3</span><span class="p">))</span> <span class="c1"># activation.size() = (batch_size, out_channels, dim1)</span>
<span class="n">max_out</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">max_pool1d</span><span class="p">(</span><span class="n">activation</span><span class="p">,</span> <span class="n">activation</span><span class="o">.</span><span class="n">size</span><span class="p">()[</span><span class="mi">2</span><span class="p">])</span><span class="o">.</span><span class="n">squeeze</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> <span class="c1"># maxpool_out.size() = (batch_size, out_channels)</span>
<span class="k">return</span> <span class="n">max_out</span>
<div class="viewcode-block" id="CNNnet.document_embedding">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.CNNnet.document_embedding">[docs]</a>
<span class="k">def</span> <span class="nf">document_embedding</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="nb">input</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Embeds documents (i.e., performs the forward pass up to the</span>
<span class="sd"> next-to-last layer).</span>
<span class="sd"> :param input: a batch of instances, typically generated by a torch&#39;s `DataLoader`</span>
<span class="sd"> instance (see :class:`quapy.classification.neural.TorchDataset`)</span>
<span class="sd"> :return: a torch tensor of shape `(n_samples, n_dimensions)`, where</span>
<span class="sd"> `n_samples` is the number of documents, and `n_dimensions` is the</span>
<span class="sd"> dimensionality of the embedding</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="nb">input</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">word_embedding</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span>
<span class="nb">input</span> <span class="o">=</span> <span class="nb">input</span><span class="o">.</span><span class="n">unsqueeze</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="c1"># input.size() = (batch_size, 1, num_seq, embedding_length)</span>
<span class="n">max_out1</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__conv_block</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">conv1</span><span class="p">)</span>
<span class="n">max_out2</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__conv_block</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">conv2</span><span class="p">)</span>
<span class="n">max_out3</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__conv_block</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">conv3</span><span class="p">)</span>
<span class="n">all_out</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">max_out1</span><span class="p">,</span> <span class="n">max_out2</span><span class="p">,</span> <span class="n">max_out3</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span> <span class="c1"># all_out.size() = (batch_size, num_kernels*out_channels)</span>
<span class="n">abstracted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropout</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="n">all_out</span><span class="p">))</span> <span class="c1"># (batch_size, num_kernels*out_channels)</span>
<span class="n">abstracted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_embedder</span><span class="p">(</span><span class="n">abstracted</span><span class="p">)</span>
<span class="k">return</span> <span class="n">abstracted</span></div>
<div class="viewcode-block" id="CNNnet.get_params">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.CNNnet.get_params">[docs]</a>
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Get hyper-parameters for this estimator</span>
<span class="sd"> :return: a dictionary with parameter names mapped to their values</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">hyperparams</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">vocabulary_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the size of the vocabulary</span>
<span class="sd"> :return: integer</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size_</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,268 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.classification.svmperf &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.classification.svmperf</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.classification.svmperf</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">random</span>
<span class="kn">import</span> <span class="nn">shutil</span>
<span class="kn">import</span> <span class="nn">subprocess</span>
<span class="kn">import</span> <span class="nn">tempfile</span>
<span class="kn">from</span> <span class="nn">os</span> <span class="kn">import</span> <span class="n">remove</span><span class="p">,</span> <span class="n">makedirs</span>
<span class="kn">from</span> <span class="nn">os.path</span> <span class="kn">import</span> <span class="n">join</span><span class="p">,</span> <span class="n">exists</span>
<span class="kn">from</span> <span class="nn">subprocess</span> <span class="kn">import</span> <span class="n">PIPE</span><span class="p">,</span> <span class="n">STDOUT</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">ClassifierMixin</span>
<span class="kn">from</span> <span class="nn">sklearn.datasets</span> <span class="kn">import</span> <span class="n">dump_svmlight_file</span>
<div class="viewcode-block" id="SVMperf">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.svmperf.SVMperf">[docs]</a>
<span class="k">class</span> <span class="nc">SVMperf</span><span class="p">(</span><span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">ClassifierMixin</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;A wrapper for the `SVM-perf package &lt;https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html&gt;`__ by Thorsten Joachims.</span>
<span class="sd"> When using losses for quantification, the source code has to be patched. See</span>
<span class="sd"> the `installation documentation &lt;https://hlt-isti.github.io/QuaPy/build/html/Installation.html#svm-perf-with-quantification-oriented-losses&gt;`__</span>
<span class="sd"> for further details.</span>
<span class="sd"> References:</span>
<span class="sd"> * `Esuli et al.2015 &lt;https://dl.acm.org/doi/abs/10.1145/2700406?casa_token=8D2fHsGCVn0AAAAA:ZfThYOvrzWxMGfZYlQW_y8Cagg-o_l6X_PcF09mdETQ4Tu7jK98mxFbGSXp9ZSO14JkUIYuDGFG0&gt;`__</span>
<span class="sd"> * `Barranquero et al.2015 &lt;https://www.sciencedirect.com/science/article/abs/pii/S003132031400291X&gt;`__</span>
<span class="sd"> :param svmperf_base: path to directory containing the binary files `svm_perf_learn` and `svm_perf_classify`</span>
<span class="sd"> :param C: trade-off between training error and margin (default 0.01)</span>
<span class="sd"> :param verbose: set to True to print svm-perf std outputs</span>
<span class="sd"> :param loss: the loss to optimize for. Available losses are &quot;01&quot;, &quot;f1&quot;, &quot;kld&quot;, &quot;nkld&quot;, &quot;q&quot;, &quot;qacc&quot;, &quot;qf1&quot;, &quot;qgm&quot;, &quot;mae&quot;, &quot;mrae&quot;.</span>
<span class="sd"> :param host_folder: directory where to store the trained model; set to None (default) for using a tmp directory</span>
<span class="sd"> (temporal directories are automatically deleted)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># losses with their respective codes in svm_perf implementation</span>
<span class="n">valid_losses</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;01&#39;</span><span class="p">:</span><span class="mi">0</span><span class="p">,</span> <span class="s1">&#39;f1&#39;</span><span class="p">:</span><span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;kld&#39;</span><span class="p">:</span><span class="mi">12</span><span class="p">,</span> <span class="s1">&#39;nkld&#39;</span><span class="p">:</span><span class="mi">13</span><span class="p">,</span> <span class="s1">&#39;q&#39;</span><span class="p">:</span><span class="mi">22</span><span class="p">,</span> <span class="s1">&#39;qacc&#39;</span><span class="p">:</span><span class="mi">23</span><span class="p">,</span> <span class="s1">&#39;qf1&#39;</span><span class="p">:</span><span class="mi">24</span><span class="p">,</span> <span class="s1">&#39;qgm&#39;</span><span class="p">:</span><span class="mi">25</span><span class="p">,</span> <span class="s1">&#39;mae&#39;</span><span class="p">:</span><span class="mi">26</span><span class="p">,</span> <span class="s1">&#39;mrae&#39;</span><span class="p">:</span><span class="mi">27</span><span class="p">}</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">svmperf_base</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="mf">0.01</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">&#39;01&#39;</span><span class="p">,</span> <span class="n">host_folder</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="k">assert</span> <span class="n">exists</span><span class="p">(</span><span class="n">svmperf_base</span><span class="p">),</span> <span class="sa">f</span><span class="s1">&#39;path </span><span class="si">{</span><span class="n">svmperf_base</span><span class="si">}</span><span class="s1"> does not seem to point to a valid path&#39;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">svmperf_base</span> <span class="o">=</span> <span class="n">svmperf_base</span>
<span class="bp">self</span><span class="o">.</span><span class="n">C</span> <span class="o">=</span> <span class="n">C</span>
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span>
<span class="bp">self</span><span class="o">.</span><span class="n">loss</span> <span class="o">=</span> <span class="n">loss</span>
<span class="bp">self</span><span class="o">.</span><span class="n">host_folder</span> <span class="o">=</span> <span class="n">host_folder</span>
<span class="c1"># def set_params(self, **parameters):</span>
<span class="c1"># &quot;&quot;&quot;</span>
<span class="c1"># Set the hyper-parameters for svm-perf. Currently, only the `C` and `loss` parameters are supported</span>
<span class="c1">#</span>
<span class="c1"># :param parameters: a `**kwargs` dictionary `{&#39;C&#39;: &lt;float&gt;}`</span>
<span class="c1"># &quot;&quot;&quot;</span>
<span class="c1"># assert sorted(list(parameters.keys())) == [&#39;C&#39;, &#39;loss&#39;], \</span>
<span class="c1"># &#39;currently, only the C and loss parameters are supported&#39;</span>
<span class="c1"># self.C = parameters.get(&#39;C&#39;, self.C)</span>
<span class="c1"># self.loss = parameters.get(&#39;loss&#39;, self.loss)</span>
<span class="c1">#</span>
<span class="c1"># def get_params(self, deep=True):</span>
<span class="c1"># return {&#39;C&#39;: self.C, &#39;loss&#39;: self.loss}</span>
<div class="viewcode-block" id="SVMperf.fit">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.svmperf.SVMperf.fit">[docs]</a>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Trains the SVM for the multivariate performance loss</span>
<span class="sd"> :param X: training instances</span>
<span class="sd"> :param y: a binary vector of labels</span>
<span class="sd"> :return: `self`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">loss</span> <span class="ow">in</span> <span class="n">SVMperf</span><span class="o">.</span><span class="n">valid_losses</span><span class="p">,</span> \
<span class="sa">f</span><span class="s1">&#39;unsupported loss </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">loss</span><span class="si">}</span><span class="s1">, valid ones are </span><span class="si">{</span><span class="nb">list</span><span class="p">(</span><span class="n">SVMperf</span><span class="o">.</span><span class="n">valid_losses</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">svmperf_learn</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="s1">&#39;svm_perf_learn&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">svmperf_classify</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="s1">&#39;svm_perf_classify&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">loss_cmd</span> <span class="o">=</span> <span class="s1">&#39;-w 3 -l &#39;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">valid_losses</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">loss</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">c_cmd</span> <span class="o">=</span> <span class="s1">&#39;-c &#39;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">C</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_classes_</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="n">local_random</span> <span class="o">=</span> <span class="n">random</span><span class="o">.</span><span class="n">Random</span><span class="p">()</span>
<span class="c1"># this would allow to run parallel instances of predict</span>
<span class="n">random_code</span> <span class="o">=</span> <span class="s1">&#39;svmperfprocess&#39;</span><span class="o">+</span><span class="s1">&#39;-&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">local_random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1000000</span><span class="p">))</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">5</span><span class="p">))</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">host_folder</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># tmp dir are removed after the fit terminates in multiprocessing...</span>
<span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span> <span class="o">=</span> <span class="n">tempfile</span><span class="o">.</span><span class="n">TemporaryDirectory</span><span class="p">(</span><span class="n">suffix</span><span class="o">=</span><span class="n">random_code</span><span class="p">)</span><span class="o">.</span><span class="n">name</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">host_folder</span><span class="p">,</span> <span class="s1">&#39;.&#39;</span> <span class="o">+</span> <span class="n">random_code</span><span class="p">)</span>
<span class="n">makedirs</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">model</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="s1">&#39;model-&#39;</span><span class="o">+</span><span class="n">random_code</span><span class="p">)</span>
<span class="n">traindat</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;train-</span><span class="si">{</span><span class="n">random_code</span><span class="si">}</span><span class="s1">.dat&#39;</span><span class="p">)</span>
<span class="n">dump_svmlight_file</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">traindat</span><span class="p">,</span> <span class="n">zero_based</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="n">cmd</span> <span class="o">=</span> <span class="s1">&#39; &#39;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">svmperf_learn</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">c_cmd</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">loss_cmd</span><span class="p">,</span> <span class="n">traindat</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">])</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;[Running]&#39;</span><span class="p">,</span> <span class="n">cmd</span><span class="p">)</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">cmd</span><span class="o">.</span><span class="n">split</span><span class="p">(),</span> <span class="n">stdout</span><span class="o">=</span><span class="n">PIPE</span><span class="p">,</span> <span class="n">stderr</span><span class="o">=</span><span class="n">STDOUT</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">exists</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">):</span>
<span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">))</span>
<span class="n">remove</span><span class="p">(</span><span class="n">traindat</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="SVMperf.predict">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.svmperf.SVMperf.predict">[docs]</a>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Predicts labels for the instances `X`</span>
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` instances to classify</span>
<span class="sd"> :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of</span>
<span class="sd"> instances in `X`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">confidence_scores</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">decision_function</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="p">(</span><span class="n">confidence_scores</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">)</span> <span class="o">*</span> <span class="mi">1</span>
<span class="k">return</span> <span class="n">predictions</span></div>
<div class="viewcode-block" id="SVMperf.decision_function">
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.svmperf.SVMperf.decision_function">[docs]</a>
<span class="k">def</span> <span class="nf">decision_function</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Evaluate the decision function for the samples in `X`.</span>
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` containing the instances to classify</span>
<span class="sd"> :param y: unused</span>
<span class="sd"> :return: array-like of shape `(n_samples,)` containing the decision scores of the instances</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">&#39;tmpdir&#39;</span><span class="p">),</span> <span class="s1">&#39;predict called before fit&#39;</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">,</span> <span class="s1">&#39;model directory corrupted&#39;</span>
<span class="k">assert</span> <span class="n">exists</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">),</span> <span class="s1">&#39;model not found&#39;</span>
<span class="k">if</span> <span class="n">y</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="c1"># in order to allow for parallel runs of predict, a random code is assigned</span>
<span class="n">local_random</span> <span class="o">=</span> <span class="n">random</span><span class="o">.</span><span class="n">Random</span><span class="p">()</span>
<span class="n">random_code</span> <span class="o">=</span> <span class="s1">&#39;-&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">local_random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1000000</span><span class="p">))</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">5</span><span class="p">))</span>
<span class="n">predictions_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="s1">&#39;predictions&#39;</span> <span class="o">+</span> <span class="n">random_code</span> <span class="o">+</span> <span class="s1">&#39;.dat&#39;</span><span class="p">)</span>
<span class="n">testdat</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="s1">&#39;test&#39;</span> <span class="o">+</span> <span class="n">random_code</span> <span class="o">+</span> <span class="s1">&#39;.dat&#39;</span><span class="p">)</span>
<span class="n">dump_svmlight_file</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">testdat</span><span class="p">,</span> <span class="n">zero_based</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="n">cmd</span> <span class="o">=</span> <span class="s1">&#39; &#39;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">svmperf_classify</span><span class="p">,</span> <span class="n">testdat</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions_path</span><span class="p">])</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;[Running]&#39;</span><span class="p">,</span> <span class="n">cmd</span><span class="p">)</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">cmd</span><span class="o">.</span><span class="n">split</span><span class="p">(),</span> <span class="n">stdout</span><span class="o">=</span><span class="n">PIPE</span><span class="p">,</span> <span class="n">stderr</span><span class="o">=</span><span class="n">STDOUT</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">))</span>
<span class="n">scores</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">loadtxt</span><span class="p">(</span><span class="n">predictions_path</span><span class="p">)</span>
<span class="n">remove</span><span class="p">(</span><span class="n">testdat</span><span class="p">)</span>
<span class="n">remove</span><span class="p">(</span><span class="n">predictions_path</span><span class="p">)</span>
<span class="k">return</span> <span class="n">scores</span></div>
<span class="k">def</span> <span class="fm">__del__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">&#39;tmpdir&#39;</span><span class="p">):</span>
<span class="n">shutil</span><span class="o">.</span><span class="n">rmtree</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="n">ignore_errors</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,165 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.data._ifcb &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.data._ifcb</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.data._ifcb</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">AbstractProtocol</span>
<div class="viewcode-block" id="IFCBTrainSamplesFromDir">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._ifcb.IFCBTrainSamplesFromDir">[docs]</a>
<span class="k">class</span> <span class="nc">IFCBTrainSamplesFromDir</span><span class="p">(</span><span class="n">AbstractProtocol</span><span class="p">):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path_dir</span><span class="p">:</span><span class="nb">str</span><span class="p">,</span> <span class="n">classes</span><span class="p">:</span> <span class="nb">list</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span> <span class="o">=</span> <span class="n">path_dir</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classes</span> <span class="o">=</span> <span class="n">classes</span>
<span class="bp">self</span><span class="o">.</span><span class="n">samples</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">filename</span> <span class="ow">in</span> <span class="n">os</span><span class="o">.</span><span class="n">listdir</span><span class="p">(</span><span class="n">path_dir</span><span class="p">):</span>
<span class="k">if</span> <span class="n">filename</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s1">&#39;.csv&#39;</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">samples</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">for</span> <span class="n">sample</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">samples</span><span class="p">:</span>
<span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span><span class="p">,</span><span class="n">sample</span><span class="p">))</span>
<span class="c1"># all columns but the first where we get the class</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:]</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span>
<span class="k">yield</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span>
<div class="viewcode-block" id="IFCBTrainSamplesFromDir.total">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._ifcb.IFCBTrainSamplesFromDir.total">[docs]</a>
<span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the total number of samples that the protocol generates.</span>
<span class="sd"> :return: The number of training samples to generate.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">samples</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="IFCBTestSamples">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._ifcb.IFCBTestSamples">[docs]</a>
<span class="k">class</span> <span class="nc">IFCBTestSamples</span><span class="p">(</span><span class="n">AbstractProtocol</span><span class="p">):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path_dir</span><span class="p">:</span><span class="nb">str</span><span class="p">,</span> <span class="n">test_prevalences_path</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span> <span class="o">=</span> <span class="n">path_dir</span>
<span class="bp">self</span><span class="o">.</span><span class="n">test_prevalences</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">path_dir</span><span class="p">,</span> <span class="n">test_prevalences_path</span><span class="p">))</span>
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">for</span> <span class="n">_</span><span class="p">,</span> <span class="n">test_sample</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">test_prevalences</span><span class="o">.</span><span class="n">iterrows</span><span class="p">():</span>
<span class="c1">#Load the sample from disk</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span><span class="p">,</span><span class="n">test_sample</span><span class="p">[</span><span class="s1">&#39;sample&#39;</span><span class="p">]</span><span class="o">+</span><span class="s1">&#39;.csv&#39;</span><span class="p">))</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span>
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">test_sample</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span>
<span class="k">yield</span> <span class="n">X</span><span class="p">,</span> <span class="n">prevalences</span>
<div class="viewcode-block" id="IFCBTestSamples.total">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._ifcb.IFCBTestSamples.total">[docs]</a>
<span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the total number of samples that the protocol generates.</span>
<span class="sd"> :return: The number of test samples to generate.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">test_prevalences</span><span class="o">.</span><span class="n">index</span><span class="p">)</span></div>
</div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,307 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.data._lequa2022 &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.data._lequa2022</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.data._lequa2022</h1><div class="highlight"><pre>
<span></span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Tuple</span><span class="p">,</span> <span class="n">Union</span>
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">AbstractProtocol</span>
<span class="n">DEV_SAMPLES</span> <span class="o">=</span> <span class="mi">1000</span>
<span class="n">TEST_SAMPLES</span> <span class="o">=</span> <span class="mi">5000</span>
<span class="n">ERROR_TOL</span> <span class="o">=</span> <span class="mf">1E-3</span>
<div class="viewcode-block" id="load_category_map">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.load_category_map">[docs]</a>
<span class="k">def</span> <span class="nf">load_category_map</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
<span class="n">cat2code</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s1">&#39;rt&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">fin</span><span class="p">:</span>
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">fin</span><span class="p">:</span>
<span class="n">category</span><span class="p">,</span> <span class="n">code</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">split</span><span class="p">()</span>
<span class="n">cat2code</span><span class="p">[</span><span class="n">category</span><span class="p">]</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">code</span><span class="p">)</span>
<span class="n">code2cat</span> <span class="o">=</span> <span class="p">[</span><span class="n">cat</span> <span class="k">for</span> <span class="n">cat</span><span class="p">,</span> <span class="n">code</span> <span class="ow">in</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">cat2code</span><span class="o">.</span><span class="n">items</span><span class="p">(),</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">[</span><span class="mi">1</span><span class="p">])]</span>
<span class="k">return</span> <span class="n">cat2code</span><span class="p">,</span> <span class="n">code2cat</span></div>
<div class="viewcode-block" id="load_raw_documents">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.load_raw_documents">[docs]</a>
<span class="k">def</span> <span class="nf">load_raw_documents</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<span class="n">documents</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="s2">&quot;text&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">values</span><span class="p">)</span>
<span class="n">labels</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="s2">&quot;label&quot;</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">:</span>
<span class="n">labels</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="s2">&quot;label&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">values</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span>
<span class="k">return</span> <span class="n">documents</span><span class="p">,</span> <span class="n">labels</span></div>
<div class="viewcode-block" id="load_vector_documents">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.load_vector_documents">[docs]</a>
<span class="k">def</span> <span class="nf">load_vector_documents</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
<span class="n">D</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">path</span><span class="p">)</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">(</span><span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
<span class="n">labelled</span> <span class="o">=</span> <span class="n">D</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="mi">301</span>
<span class="k">if</span> <span class="n">labelled</span><span class="p">:</span>
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">D</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:],</span> <span class="n">D</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">D</span><span class="p">,</span> <span class="kc">None</span>
<span class="k">return</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span></div>
<div class="viewcode-block" id="SamplesFromDir">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.SamplesFromDir">[docs]</a>
<span class="k">class</span> <span class="nc">SamplesFromDir</span><span class="p">(</span><span class="n">AbstractProtocol</span><span class="p">):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path_dir</span><span class="p">:</span><span class="nb">str</span><span class="p">,</span> <span class="n">ground_truth_path</span><span class="p">:</span><span class="nb">str</span><span class="p">,</span> <span class="n">load_fn</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span> <span class="o">=</span> <span class="n">path_dir</span>
<span class="bp">self</span><span class="o">.</span><span class="n">load_fn</span> <span class="o">=</span> <span class="n">load_fn</span>
<span class="bp">self</span><span class="o">.</span><span class="n">true_prevs</span> <span class="o">=</span> <span class="n">ResultSubmission</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">ground_truth_path</span><span class="p">)</span>
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">for</span> <span class="nb">id</span><span class="p">,</span> <span class="n">prevalence</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">true_prevs</span><span class="o">.</span><span class="n">iterrows</span><span class="p">():</span>
<span class="n">sample</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">load_fn</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="nb">id</span><span class="si">}</span><span class="s1">.txt&#39;</span><span class="p">))</span>
<span class="k">yield</span> <span class="n">sample</span><span class="p">,</span> <span class="n">prevalence</span></div>
<div class="viewcode-block" id="ResultSubmission">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission">[docs]</a>
<span class="k">class</span> <span class="nc">ResultSubmission</span><span class="p">:</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">def</span> <span class="nf">__init_df</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">categories</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">categories</span><span class="p">,</span> <span class="nb">int</span><span class="p">)</span> <span class="ow">or</span> <span class="n">categories</span> <span class="o">&lt;</span> <span class="mi">2</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">&#39;wrong format for categories: an int (&gt;=2) was expected&#39;</span><span class="p">)</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="n">categories</span><span class="p">)))</span>
<span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">set_names</span><span class="p">(</span><span class="s1">&#39;id&#39;</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">n_categories</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="o">.</span><span class="n">values</span><span class="p">)</span>
<div class="viewcode-block" id="ResultSubmission.add">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.add">[docs]</a>
<span class="k">def</span> <span class="nf">add</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sample_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">prevalence_values</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">sample_id</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;error: expected int for sample_sample, found </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">sample_id</span><span class="p">)</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">prevalence_values</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;error: expected np.ndarray for prevalence_values, found </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">prevalence_values</span><span class="p">)</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">__init_df</span><span class="p">(</span><span class="n">categories</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="n">prevalence_values</span><span class="p">))</span>
<span class="k">if</span> <span class="n">sample_id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">values</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;error: prevalence values for &quot;</span><span class="si">{</span><span class="n">sample_id</span><span class="si">}</span><span class="s1">&quot; already added&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">prevalence_values</span><span class="o">.</span><span class="n">ndim</span> <span class="o">!=</span> <span class="mi">1</span> <span class="ow">and</span> <span class="n">prevalence_values</span><span class="o">.</span><span class="n">size</span> <span class="o">!=</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_categories</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;error: wrong shape found for prevalence vector </span><span class="si">{</span><span class="n">prevalence_values</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="p">(</span><span class="n">prevalence_values</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">any</span><span class="p">()</span> <span class="ow">or</span> <span class="p">(</span><span class="n">prevalence_values</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">any</span><span class="p">():</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;error: prevalence values out of range [0,1] for &quot;</span><span class="si">{</span><span class="n">sample_id</span><span class="si">}</span><span class="s1">&quot;&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">np</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">prevalence_values</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">&gt;</span> <span class="n">ERROR_TOL</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;error: prevalence values do not sum up to one for &quot;</span><span class="si">{</span><span class="n">sample_id</span><span class="si">}</span><span class="s1">&quot;&#39;</span>
<span class="sa">f</span><span class="s1">&#39;(error tolerance </span><span class="si">{</span><span class="n">ERROR_TOL</span><span class="si">}</span><span class="s1">)&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">sample_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">prevalence_values</span></div>
<span class="k">def</span> <span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="p">)</span>
<div class="viewcode-block" id="ResultSubmission.load">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.load">[docs]</a>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s1">&#39;ResultSubmission&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">ResultSubmission</span><span class="o">.</span><span class="n">check_file_format</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<span class="n">r</span> <span class="o">=</span> <span class="n">ResultSubmission</span><span class="p">()</span>
<span class="n">r</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span>
<span class="k">return</span> <span class="n">r</span></div>
<div class="viewcode-block" id="ResultSubmission.dump">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.dump">[docs]</a>
<span class="k">def</span> <span class="nf">dump</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">ResultSubmission</span><span class="o">.</span><span class="n">check_dataframe_format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
<div class="viewcode-block" id="ResultSubmission.prevalence">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.prevalence">[docs]</a>
<span class="k">def</span> <span class="nf">prevalence</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sample_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
<span class="n">sel</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">sample_id</span><span class="p">]</span>
<span class="k">if</span> <span class="n">sel</span><span class="o">.</span><span class="n">empty</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">sel</span><span class="o">.</span><span class="n">values</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span></div>
<div class="viewcode-block" id="ResultSubmission.iterrows">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.iterrows">[docs]</a>
<span class="k">def</span> <span class="nf">iterrows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">for</span> <span class="n">index</span><span class="p">,</span> <span class="n">row</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">iterrows</span><span class="p">():</span>
<span class="n">prevalence</span> <span class="o">=</span> <span class="n">row</span><span class="o">.</span><span class="n">values</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
<span class="k">yield</span> <span class="n">index</span><span class="p">,</span> <span class="n">prevalence</span></div>
<div class="viewcode-block" id="ResultSubmission.check_file_format">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.check_file_format">[docs]</a>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">check_file_format</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">path</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]:</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">index_col</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;the file </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s1"> does not seem to be a valid csv file. &#39;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
<span class="k">return</span> <span class="n">ResultSubmission</span><span class="o">.</span><span class="n">check_dataframe_format</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">)</span></div>
<div class="viewcode-block" id="ResultSubmission.check_dataframe_format">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.check_dataframe_format">[docs]</a>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">check_dataframe_format</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">df</span><span class="p">,</span> <span class="n">path</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]:</span>
<span class="n">hint_path</span> <span class="o">=</span> <span class="s1">&#39;&#39;</span> <span class="c1"># if given, show the data path in the error message</span>
<span class="k">if</span> <span class="n">path</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">hint_path</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39; in </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="k">if</span> <span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">name</span> <span class="o">!=</span> <span class="s1">&#39;id&#39;</span> <span class="ow">or</span> <span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">)</span> <span class="o">&lt;</span> <span class="mi">2</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;wrong header</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">, &#39;</span>
<span class="sa">f</span><span class="s1">&#39;the format of the header should be &quot;id,0,...,n-1&quot;, &#39;</span>
<span class="sa">f</span><span class="s1">&#39;where n is the number of categories&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">ci</span><span class="p">)</span> <span class="k">for</span> <span class="n">ci</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="o">.</span><span class="n">values</span><span class="p">]</span> <span class="o">!=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">))):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;wrong header</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">, category ids should be 0,1,2,...,n-1, &#39;</span>
<span class="sa">f</span><span class="s1">&#39;where n is the number of categories&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">df</span><span class="o">.</span><span class="n">empty</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;error</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">: results file is empty&#39;</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> <span class="o">!=</span> <span class="n">DEV_SAMPLES</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> <span class="o">!=</span> <span class="n">TEST_SAMPLES</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;wrong number of prevalence values found</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">; &#39;</span>
<span class="sa">f</span><span class="s1">&#39;expected </span><span class="si">{</span><span class="n">DEV_SAMPLES</span><span class="si">}</span><span class="s1"> for development sets and &#39;</span>
<span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">TEST_SAMPLES</span><span class="si">}</span><span class="s1"> for test sets; found </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="p">)</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">ids</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">values</span><span class="p">)</span>
<span class="n">expected_ids</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="p">)))</span>
<span class="k">if</span> <span class="n">ids</span> <span class="o">!=</span> <span class="n">expected_ids</span><span class="p">:</span>
<span class="n">missing</span> <span class="o">=</span> <span class="n">expected_ids</span> <span class="o">-</span> <span class="n">ids</span>
<span class="k">if</span> <span class="n">missing</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;there are </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">missing</span><span class="p">)</span><span class="si">}</span><span class="s1"> missing ids</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">: </span><span class="si">{</span><span class="nb">sorted</span><span class="p">(</span><span class="n">missing</span><span class="p">)</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">unexpected</span> <span class="o">=</span> <span class="n">ids</span> <span class="o">-</span> <span class="n">expected_ids</span>
<span class="k">if</span> <span class="n">unexpected</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;there are </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">missing</span><span class="p">)</span><span class="si">}</span><span class="s1"> unexpected ids</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">: </span><span class="si">{</span><span class="nb">sorted</span><span class="p">(</span><span class="n">unexpected</span><span class="p">)</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">for</span> <span class="n">category_id</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">:</span>
<span class="k">if</span> <span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="n">category_id</span><span class="p">]</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">any</span><span class="p">()</span> <span class="ow">or</span> <span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="n">category_id</span><span class="p">]</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">any</span><span class="p">():</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;error</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1"> column &quot;</span><span class="si">{</span><span class="n">category_id</span><span class="si">}</span><span class="s1">&quot; contains values out of range [0,1]&#39;</span><span class="p">)</span>
<span class="n">prevs</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">values</span>
<span class="n">round_errors</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">prevs</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span> <span class="o">-</span> <span class="mf">1.</span><span class="p">)</span> <span class="o">&gt;</span> <span class="n">ERROR_TOL</span>
<span class="k">if</span> <span class="n">round_errors</span><span class="o">.</span><span class="n">any</span><span class="p">():</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;warning: prevalence values in rows with id </span><span class="si">{</span><span class="n">np</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">round_errors</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span><span class="si">}</span><span class="s1"> &#39;</span>
<span class="sa">f</span><span class="s1">&#39;do not sum up to 1 (error tolerance </span><span class="si">{</span><span class="n">ERROR_TOL</span><span class="si">}</span><span class="s1">), &#39;</span>
<span class="sa">f</span><span class="s1">&#39;probably due to some rounding errors.&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">df</span></div>
</div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,728 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.data.base &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.data.base</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.data.base</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">itertools</span>
<span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="n">cached_property</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Iterable</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">scipy.sparse</span> <span class="kn">import</span> <span class="n">issparse</span>
<span class="kn">from</span> <span class="nn">scipy.sparse</span> <span class="kn">import</span> <span class="n">vstack</span>
<span class="kn">from</span> <span class="nn">sklearn.model_selection</span> <span class="kn">import</span> <span class="n">train_test_split</span><span class="p">,</span> <span class="n">RepeatedStratifiedKFold</span>
<span class="kn">from</span> <span class="nn">numpy.random</span> <span class="kn">import</span> <span class="n">RandomState</span>
<span class="kn">from</span> <span class="nn">quapy.functional</span> <span class="kn">import</span> <span class="n">strprev</span>
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">temp_seed</span>
<div class="viewcode-block" id="LabelledCollection">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection">[docs]</a>
<span class="k">class</span> <span class="nc">LabelledCollection</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> A LabelledCollection is a set of objects each with a label attached to each of them. </span>
<span class="sd"> This class implements several sampling routines and other utilities.</span>
<span class="sd"> </span>
<span class="sd"> :param instances: array-like (np.ndarray, list, or csr_matrix are supported)</span>
<span class="sd"> :param labels: array-like with the same length of instances</span>
<span class="sd"> :param classes: optional, list of classes from which labels are taken. If not specified, the classes are inferred</span>
<span class="sd"> from the labels. The classes must be indicated in cases in which some of the labels might have no examples</span>
<span class="sd"> (i.e., a prevalence of 0)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">,</span> <span class="n">labels</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="k">if</span> <span class="n">issparse</span><span class="p">(</span><span class="n">instances</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">instances</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">instances</span><span class="p">,</span> <span class="nb">list</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">instances</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">str</span><span class="p">):</span>
<span class="c1"># lists of strings occupy too much as ndarrays (although python-objects add a heavy overload)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">instances</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">labels</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">labels</span><span class="p">)</span>
<span class="n">n_docs</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">if</span> <span class="n">classes</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">classes</span><span class="p">))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="nb">set</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">difference</span><span class="p">(</span><span class="nb">set</span><span class="p">(</span><span class="n">classes</span><span class="p">)))</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;labels (</span><span class="si">{</span><span class="nb">set</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span><span class="si">}</span><span class="s1">) contain values not included in classes_ (</span><span class="si">{</span><span class="nb">set</span><span class="p">(</span><span class="n">classes</span><span class="p">)</span><span class="si">}</span><span class="s1">)&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="p">{</span><span class="n">class_</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="n">n_docs</span><span class="p">)[</span><span class="bp">self</span><span class="o">.</span><span class="n">labels</span> <span class="o">==</span> <span class="n">class_</span><span class="p">]</span> <span class="k">for</span> <span class="n">class_</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">}</span>
<div class="viewcode-block" id="LabelledCollection.load">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.load">[docs]</a>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">loader_func</span><span class="p">:</span> <span class="n">callable</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">loader_kwargs</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads a labelled set of data and convert it into a :class:`LabelledCollection` instance. The function in charge</span>
<span class="sd"> of reading the instances must be specified. This function can be a custom one, or any of the reading functions</span>
<span class="sd"> defined in :mod:`quapy.data.reader` module.</span>
<span class="sd"> :param path: string, the path to the file containing the labelled instances</span>
<span class="sd"> :param loader_func: a custom function that implements the data loader and returns a tuple with instances and</span>
<span class="sd"> labels</span>
<span class="sd"> :param classes: array-like, the classes according to which the instances are labelled</span>
<span class="sd"> :param loader_kwargs: any argument that the `loader_func` function needs in order to read the instances, i.e.,</span>
<span class="sd"> these arguments are used to call `loader_func(path, **loader_kwargs)`</span>
<span class="sd"> :return: a :class:`LabelledCollection` object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="o">*</span><span class="n">loader_func</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="o">**</span><span class="n">loader_kwargs</span><span class="p">),</span> <span class="n">classes</span><span class="p">)</span></div>
<span class="k">def</span> <span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the length of this collection (number of labelled instances)</span>
<span class="sd"> :return: integer</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<div class="viewcode-block" id="LabelledCollection.prevalence">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.prevalence">[docs]</a>
<span class="k">def</span> <span class="nf">prevalence</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the prevalence, or relative frequency, of the classes in the codeframe.</span>
<span class="sd"> :return: a np.ndarray of shape `(n_classes)` with the relative frequencies of each class, in the same order</span>
<span class="sd"> as listed by `self.classes_`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">counts</span><span class="p">()</span> <span class="o">/</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span></div>
<div class="viewcode-block" id="LabelledCollection.counts">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.counts">[docs]</a>
<span class="k">def</span> <span class="nf">counts</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the number of instances for each of the classes in the codeframe.</span>
<span class="sd"> :return: a np.ndarray of shape `(n_classes)` with the number of instances of each class, in the same order</span>
<span class="sd"> as listed by `self.classes_`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">[</span><span class="n">class_</span><span class="p">])</span> <span class="k">for</span> <span class="n">class_</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">])</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">n_classes</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The number of classes</span>
<span class="sd"> :return: integer</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">binary</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns True if the number of classes is 2</span>
<span class="sd"> :return: boolean</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span> <span class="o">==</span> <span class="mi">2</span>
<div class="viewcode-block" id="LabelledCollection.sampling_index">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.sampling_index">[docs]</a>
<span class="k">def</span> <span class="nf">sampling_index</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="o">*</span><span class="n">prevs</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the</span>
<span class="sd"> prevalence values are not specified, then returns the index of a uniform sampling.</span>
<span class="sd"> For each class, the sampling is drawn with replacement if the requested prevalence is larger than</span>
<span class="sd"> the actual prevalence of the class, or without replacement otherwise.</span>
<span class="sd"> :param size: integer, the requested size</span>
<span class="sd"> :param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since</span>
<span class="sd"> it is constrained. E.g., for binary collections, only the prevalence `p` for the first class (as listed in</span>
<span class="sd"> `self.classes_` can be specified, while the other class takes prevalence value `1-p`</span>
<span class="sd"> :param shuffle: if set to True (default), shuffles the index before returning it</span>
<span class="sd"> :param random_state: seed for reproducing sampling</span>
<span class="sd"> :return: a np.ndarray of shape `(size)` with the indexes</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> <span class="c1"># no prevalence was indicated; returns an index for uniform sampling</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">uniform_sampling_index</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span> <span class="o">-</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">prevs</span> <span class="o">=</span> <span class="n">prevs</span> <span class="o">+</span> <span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="nb">sum</span><span class="p">(</span><span class="n">prevs</span><span class="p">),)</span>
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="s1">&#39;unexpected number of prevalences&#39;</span>
<span class="k">assert</span> <span class="nb">sum</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;prevalences (</span><span class="si">{</span><span class="n">prevs</span><span class="si">}</span><span class="s1">) wrong range (sum=</span><span class="si">{</span><span class="nb">sum</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span><span class="si">}</span><span class="s1">)&#39;</span>
<span class="c1"># Decide how many instances should be taken for each class in order to satisfy the requested prevalence</span>
<span class="c1"># accurately, and the number of instances in the sample (exactly). If int(size * prevs[i]) (which is</span>
<span class="c1"># &lt;= size * prevs[i]) examples are drawn from class i, there could be a remainder number of instances to take</span>
<span class="c1"># to satisfy the size constrain. The remainder is distributed along the classes with probability = prevs.</span>
<span class="c1"># (This aims at avoiding the remainder to be placed in a class for which the prevalence requested is 0.)</span>
<span class="n">n_requests</span> <span class="o">=</span> <span class="p">{</span><span class="n">class_</span><span class="p">:</span> <span class="nb">round</span><span class="p">(</span><span class="n">size</span> <span class="o">*</span> <span class="n">prevs</span><span class="p">[</span><span class="n">i</span><span class="p">])</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">class_</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)}</span>
<span class="n">remainder</span> <span class="o">=</span> <span class="n">size</span> <span class="o">-</span> <span class="nb">sum</span><span class="p">(</span><span class="n">n_requests</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
<span class="k">with</span> <span class="n">temp_seed</span><span class="p">(</span><span class="n">random_state</span><span class="p">):</span>
<span class="c1"># due to rounding, the remainder can be 0, &gt;0, or &lt;0</span>
<span class="k">if</span> <span class="n">remainder</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="c1"># when the remainder is &gt;0 we randomly add 1 to the requests for each class;</span>
<span class="c1"># more prevalent classes are more likely to be taken in order to minimize the impact in the final prevalence</span>
<span class="k">for</span> <span class="n">rand_class</span> <span class="ow">in</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">remainder</span><span class="p">,</span> <span class="n">p</span><span class="o">=</span><span class="n">prevs</span><span class="p">):</span>
<span class="n">n_requests</span><span class="p">[</span><span class="n">rand_class</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">elif</span> <span class="n">remainder</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="c1"># when the remainder is &lt;0 we randomly remove 1 from the requests, unless the request is 0 for a chosen</span>
<span class="c1"># class; we repeat until remainder==0</span>
<span class="k">while</span> <span class="n">remainder</span><span class="o">!=</span><span class="mi">0</span><span class="p">:</span>
<span class="n">rand_class</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">,</span> <span class="n">p</span><span class="o">=</span><span class="n">prevs</span><span class="p">)</span>
<span class="k">if</span> <span class="n">n_requests</span><span class="p">[</span><span class="n">rand_class</span><span class="p">]</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">n_requests</span><span class="p">[</span><span class="n">rand_class</span><span class="p">]</span> <span class="o">-=</span> <span class="mi">1</span>
<span class="n">remainder</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="n">indexes_sample</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">class_</span><span class="p">,</span> <span class="n">n_requested</span> <span class="ow">in</span> <span class="n">n_requests</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="n">n_candidates</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">[</span><span class="n">class_</span><span class="p">])</span>
<span class="n">index_sample</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">[</span><span class="n">class_</span><span class="p">][</span>
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="n">n_candidates</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">n_requested</span><span class="p">,</span> <span class="n">replace</span><span class="o">=</span><span class="p">(</span><span class="n">n_requested</span> <span class="o">&gt;</span> <span class="n">n_candidates</span><span class="p">))</span>
<span class="p">]</span> <span class="k">if</span> <span class="n">n_requested</span> <span class="o">&gt;</span> <span class="mi">0</span> <span class="k">else</span> <span class="p">[]</span>
<span class="n">indexes_sample</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index_sample</span><span class="p">)</span>
<span class="n">indexes_sample</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">(</span><span class="n">indexes_sample</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span>
<span class="k">if</span> <span class="n">shuffle</span><span class="p">:</span>
<span class="n">indexes_sample</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">permutation</span><span class="p">(</span><span class="n">indexes_sample</span><span class="p">)</span>
<span class="k">return</span> <span class="n">indexes_sample</span></div>
<div class="viewcode-block" id="LabelledCollection.uniform_sampling_index">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.uniform_sampling_index">[docs]</a>
<span class="k">def</span> <span class="nf">uniform_sampling_index</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn</span>
<span class="sd"> with replacement if the requested size is greater than the number of instances, or without replacement</span>
<span class="sd"> otherwise.</span>
<span class="sd"> :param size: integer, the size of the uniform sample</span>
<span class="sd"> :param random_state: if specified, guarantees reproducibility of the split.</span>
<span class="sd"> :return: a np.ndarray of shape `(size)` with the indexes</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">random_state</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">ng</span> <span class="o">=</span> <span class="n">RandomState</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">ng</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span>
<span class="k">return</span> <span class="n">ng</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">),</span> <span class="n">size</span><span class="p">,</span> <span class="n">replace</span><span class="o">=</span><span class="n">size</span> <span class="o">&gt;</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div>
<div class="viewcode-block" id="LabelledCollection.sampling">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.sampling">[docs]</a>
<span class="k">def</span> <span class="nf">sampling</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="o">*</span><span class="n">prevs</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return a random sample (an instance of :class:`LabelledCollection`) of desired size and desired prevalence</span>
<span class="sd"> values. For each class, the sampling is drawn without replacement if the requested prevalence is larger than</span>
<span class="sd"> the actual prevalence of the class, or with replacement otherwise.</span>
<span class="sd"> :param size: integer, the requested size</span>
<span class="sd"> :param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since</span>
<span class="sd"> it is constrained. E.g., for binary collections, only the prevalence `p` for the first class (as listed in</span>
<span class="sd"> `self.classes_` can be specified, while the other class takes prevalence value `1-p`</span>
<span class="sd"> :param shuffle: if set to True (default), shuffles the index before returning it</span>
<span class="sd"> :param random_state: seed for reproducing sampling</span>
<span class="sd"> :return: an instance of :class:`LabelledCollection` with length == `size` and prevalence close to `prevs` (or</span>
<span class="sd"> prevalence == `prevs` if the exact prevalence values can be met as proportions of instances)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">prev_index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="o">*</span><span class="n">prevs</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="n">shuffle</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">prev_index</span><span class="p">)</span></div>
<div class="viewcode-block" id="LabelledCollection.uniform_sampling">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.uniform_sampling">[docs]</a>
<span class="k">def</span> <span class="nf">uniform_sampling</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns a uniform sample (an instance of :class:`LabelledCollection`) of desired size. The sampling is drawn</span>
<span class="sd"> with replacement if the requested size is greater than the number of instances, or without replacement</span>
<span class="sd"> otherwise.</span>
<span class="sd"> :param size: integer, the requested size</span>
<span class="sd"> :param random_state: if specified, guarantees reproducibility of the split.</span>
<span class="sd"> :return: an instance of :class:`LabelledCollection` with length == `size`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">unif_index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">uniform_sampling_index</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">unif_index</span><span class="p">)</span></div>
<div class="viewcode-block" id="LabelledCollection.sampling_from_index">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.sampling_from_index">[docs]</a>
<span class="k">def</span> <span class="nf">sampling_from_index</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns an instance of :class:`LabelledCollection` whose elements are sampled from this collection using the</span>
<span class="sd"> index.</span>
<span class="sd"> :param index: np.ndarray</span>
<span class="sd"> :return: an instance of :class:`LabelledCollection`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">documents</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">[</span><span class="n">index</span><span class="p">]</span>
<span class="n">labels</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">[</span><span class="n">index</span><span class="p">]</span>
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">documents</span><span class="p">,</span> <span class="n">labels</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span></div>
<div class="viewcode-block" id="LabelledCollection.split_stratified">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.split_stratified">[docs]</a>
<span class="k">def</span> <span class="nf">split_stratified</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">train_prop</span><span class="o">=</span><span class="mf">0.6</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns two instances of :class:`LabelledCollection` split with stratification from this collection, at desired</span>
<span class="sd"> proportion.</span>
<span class="sd"> :param train_prop: the proportion of elements to include in the left-most returned collection (typically used</span>
<span class="sd"> as the training collection). The rest of elements are included in the right-most returned collection</span>
<span class="sd"> (typically used as a test collection).</span>
<span class="sd"> :param random_state: if specified, guarantees reproducibility of the split.</span>
<span class="sd"> :return: two instances of :class:`LabelledCollection`, the first one with `train_prop` elements, and the</span>
<span class="sd"> second one with `1-train_prop` elements</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">tr_docs</span><span class="p">,</span> <span class="n">te_docs</span><span class="p">,</span> <span class="n">tr_labels</span><span class="p">,</span> <span class="n">te_labels</span> <span class="o">=</span> <span class="n">train_test_split</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">train_size</span><span class="o">=</span><span class="n">train_prop</span><span class="p">,</span> <span class="n">stratify</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span>
<span class="p">)</span>
<span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">tr_docs</span><span class="p">,</span> <span class="n">tr_labels</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">te_docs</span><span class="p">,</span> <span class="n">te_labels</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="k">return</span> <span class="n">training</span><span class="p">,</span> <span class="n">test</span></div>
<div class="viewcode-block" id="LabelledCollection.split_random">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.split_random">[docs]</a>
<span class="k">def</span> <span class="nf">split_random</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">train_prop</span><span class="o">=</span><span class="mf">0.6</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns two instances of :class:`LabelledCollection` split randomly from this collection, at desired</span>
<span class="sd"> proportion.</span>
<span class="sd"> :param train_prop: the proportion of elements to include in the left-most returned collection (typically used</span>
<span class="sd"> as the training collection). The rest of elements are included in the right-most returned collection</span>
<span class="sd"> (typically used as a test collection).</span>
<span class="sd"> :param random_state: if specified, guarantees reproducibility of the split.</span>
<span class="sd"> :return: two instances of :class:`LabelledCollection`, the first one with `train_prop` elements, and the</span>
<span class="sd"> second one with `1-train_prop` elements</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">indexes</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">RandomState</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span><span class="o">.</span><span class="n">permutation</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">train_prop</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="k">assert</span> <span class="n">train_prop</span> <span class="o">&lt;</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">),</span> \
<span class="s1">&#39;argument train_prop cannot be greater than the number of elements in the collection&#39;</span>
<span class="n">splitpoint</span> <span class="o">=</span> <span class="n">train_prop</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">train_prop</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
<span class="k">assert</span> <span class="mi">0</span> <span class="o">&lt;</span> <span class="n">train_prop</span> <span class="o">&lt;</span> <span class="mi">1</span><span class="p">,</span> \
<span class="s1">&#39;argument train_prop out of range (0,1)&#39;</span>
<span class="n">splitpoint</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span><span class="o">*</span><span class="n">train_prop</span><span class="p">))</span>
<span class="n">left</span><span class="p">,</span> <span class="n">right</span> <span class="o">=</span> <span class="n">indexes</span><span class="p">[:</span><span class="n">splitpoint</span><span class="p">],</span> <span class="n">indexes</span><span class="p">[</span><span class="n">splitpoint</span><span class="p">:]</span>
<span class="n">training</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">left</span><span class="p">)</span>
<span class="n">test</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">right</span><span class="p">)</span>
<span class="k">return</span> <span class="n">training</span><span class="p">,</span> <span class="n">test</span></div>
<span class="k">def</span> <span class="fm">__add__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns a new :class:`LabelledCollection` as the union of this collection with another collection.</span>
<span class="sd"> Both labelled collections must have the same classes.</span>
<span class="sd"> :param other: another :class:`LabelledCollection`</span>
<span class="sd"> :return: a :class:`LabelledCollection` representing the union of both collections</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">all</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span><span class="o">==</span><span class="n">np</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">classes_</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;unsupported operation for collections on different classes; &#39;</span>
<span class="sa">f</span><span class="s1">&#39;expected </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="si">}</span><span class="s1">, found </span><span class="si">{</span><span class="n">other</span><span class="o">.</span><span class="n">classes_</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span>
<div class="viewcode-block" id="LabelledCollection.join">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.join">[docs]</a>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">join</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="s1">&#39;LabelledCollection&#39;</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns a new :class:`LabelledCollection` as the union of the collections given in input.</span>
<span class="sd"> :param args: instances of :class:`LabelledCollection`</span>
<span class="sd"> :return: a :class:`LabelledCollection` representing the union of both collections</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">args</span> <span class="o">=</span> <span class="p">[</span><span class="n">lc</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span> <span class="k">if</span> <span class="n">lc</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">]</span>
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">args</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s1">&#39;empty list is not allowed for mix&#39;</span>
<span class="k">assert</span> <span class="nb">all</span><span class="p">([</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">lc</span><span class="p">,</span> <span class="n">LabelledCollection</span><span class="p">)</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">]),</span> \
<span class="s1">&#39;only instances of LabelledCollection allowed&#39;</span>
<span class="n">first_instances</span> <span class="o">=</span> <span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">instances</span>
<span class="n">first_type</span> <span class="o">=</span> <span class="nb">type</span><span class="p">(</span><span class="n">first_instances</span><span class="p">)</span>
<span class="k">assert</span> <span class="nb">all</span><span class="p">([</span><span class="nb">type</span><span class="p">(</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span><span class="o">==</span><span class="n">first_type</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">:]]),</span> \
<span class="s1">&#39;not all the collections are of instances of the same type&#39;</span>
<span class="k">if</span> <span class="n">issparse</span><span class="p">(</span><span class="n">first_instances</span><span class="p">)</span> <span class="ow">or</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">first_instances</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
<span class="n">first_ndim</span> <span class="o">=</span> <span class="n">first_instances</span><span class="o">.</span><span class="n">ndim</span>
<span class="k">assert</span> <span class="nb">all</span><span class="p">([</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">ndim</span> <span class="o">==</span> <span class="n">first_ndim</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">:]]),</span> \
<span class="s1">&#39;not all the ndarrays are of the same dimension&#39;</span>
<span class="k">if</span> <span class="n">first_ndim</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">first_shape</span> <span class="o">=</span> <span class="n">first_instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span>
<span class="k">assert</span> <span class="nb">all</span><span class="p">([</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span> <span class="o">==</span> <span class="n">first_shape</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">:]]),</span> \
<span class="s1">&#39;not all the ndarrays are of the same shape&#39;</span>
<span class="k">if</span> <span class="n">issparse</span><span class="p">(</span><span class="n">first_instances</span><span class="p">):</span>
<span class="n">instances</span> <span class="o">=</span> <span class="n">vstack</span><span class="p">([</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">])</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">instances</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">])</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">first_instances</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
<span class="n">instances</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">itertools</span><span class="o">.</span><span class="n">chain</span><span class="p">(</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s1">&#39;unsupported operation for collection types&#39;</span><span class="p">)</span>
<span class="n">labels</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">lc</span><span class="o">.</span><span class="n">labels</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">])</span>
<span class="n">classes</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">instances</span><span class="p">,</span> <span class="n">labels</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">classes</span><span class="p">)</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">Xy</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Gets the instances and labels. This is useful when working with `sklearn` estimators, e.g.:</span>
<span class="sd"> &gt;&gt;&gt; svm = LinearSVC().fit(*my_collection.Xy)</span>
<span class="sd"> :return: a tuple `(instances, labels)` from this collection</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">Xp</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Gets the instances and the true prevalence. This is useful when implementing evaluation protocols from</span>
<span class="sd"> a :class:`LabelledCollection` object.</span>
<span class="sd"> :return: a tuple `(instances, prevalence)` from this collection</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">X</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> An alias to self.instances</span>
<span class="sd"> :return: self.instances</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">y</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> An alias to self.labels</span>
<span class="sd"> :return: self.labels</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">p</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> An alias to self.prevalence()</span>
<span class="sd"> :return: self.prevalence()</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
<div class="viewcode-block" id="LabelledCollection.stats">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.stats">[docs]</a>
<span class="k">def</span> <span class="nf">stats</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">show</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns (and eventually prints) a dictionary with some stats of this collection. E.g.,:</span>
<span class="sd"> &gt;&gt;&gt; data = qp.datasets.fetch_reviews(&#39;kindle&#39;, tfidf=True, min_df=5)</span>
<span class="sd"> &gt;&gt;&gt; data.training.stats()</span>
<span class="sd"> &gt;&gt;&gt; #instances=3821, type=&lt;class &#39;scipy.sparse.csr.csr_matrix&#39;&gt;, #features=4403, #classes=[0 1], prevs=[0.081, 0.919]</span>
<span class="sd"> :param show: if set to True (default), prints the stats in standard output</span>
<span class="sd"> :return: a dictionary containing some stats of this collection. Keys include `#instances` (the number of</span>
<span class="sd"> instances), `type` (the type representing the instances), `#features` (the number of features, if the</span>
<span class="sd"> instances are in array-like format), `#classes` (the classes of the collection), `prevs` (the prevalence</span>
<span class="sd"> values for each class)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">ninstances</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="n">instance_type</span> <span class="o">=</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="k">if</span> <span class="n">instance_type</span> <span class="o">==</span> <span class="nb">list</span><span class="p">:</span>
<span class="n">nfeats</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="k">elif</span> <span class="n">instance_type</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span> <span class="ow">or</span> <span class="n">issparse</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">):</span>
<span class="n">nfeats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">nfeats</span> <span class="o">=</span> <span class="s1">&#39;?&#39;</span>
<span class="n">stats_</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;instances&#39;</span><span class="p">:</span> <span class="n">ninstances</span><span class="p">,</span>
<span class="s1">&#39;type&#39;</span><span class="p">:</span> <span class="n">instance_type</span><span class="p">,</span>
<span class="s1">&#39;features&#39;</span><span class="p">:</span> <span class="n">nfeats</span><span class="p">,</span>
<span class="s1">&#39;classes&#39;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">,</span>
<span class="s1">&#39;prevs&#39;</span><span class="p">:</span> <span class="n">strprev</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">())}</span>
<span class="k">if</span> <span class="n">show</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;#instances=</span><span class="si">{</span><span class="n">stats_</span><span class="p">[</span><span class="s2">&quot;instances&quot;</span><span class="p">]</span><span class="si">}</span><span class="s1">, type=</span><span class="si">{</span><span class="n">stats_</span><span class="p">[</span><span class="s2">&quot;type&quot;</span><span class="p">]</span><span class="si">}</span><span class="s1">, #features=</span><span class="si">{</span><span class="n">stats_</span><span class="p">[</span><span class="s2">&quot;features&quot;</span><span class="p">]</span><span class="si">}</span><span class="s1">, &#39;</span>
<span class="sa">f</span><span class="s1">&#39;#classes=</span><span class="si">{</span><span class="n">stats_</span><span class="p">[</span><span class="s2">&quot;classes&quot;</span><span class="p">]</span><span class="si">}</span><span class="s1">, prevs=</span><span class="si">{</span><span class="n">stats_</span><span class="p">[</span><span class="s2">&quot;prevs&quot;</span><span class="p">]</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">stats_</span></div>
<div class="viewcode-block" id="LabelledCollection.kFCV">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.kFCV">[docs]</a>
<span class="k">def</span> <span class="nf">kFCV</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">nfolds</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">nrepeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Generator of stratified folds to be used in k-fold cross validation.</span>
<span class="sd"> :param nfolds: integer (default 5), the number of folds to generate</span>
<span class="sd"> :param nrepeats: integer (default 1), the number of rounds of k-fold cross validation to run</span>
<span class="sd"> :param random_state: integer (default 0), guarantees that the folds generated are reproducible</span>
<span class="sd"> :return: yields `nfolds * nrepeats` folds for k-fold cross validation</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">kf</span> <span class="o">=</span> <span class="n">RepeatedStratifiedKFold</span><span class="p">(</span><span class="n">n_splits</span><span class="o">=</span><span class="n">nfolds</span><span class="p">,</span> <span class="n">n_repeats</span><span class="o">=</span><span class="n">nrepeats</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span>
<span class="k">for</span> <span class="n">train_index</span><span class="p">,</span> <span class="n">test_index</span> <span class="ow">in</span> <span class="n">kf</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">Xy</span><span class="p">):</span>
<span class="n">train</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">train_index</span><span class="p">)</span>
<span class="n">test</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">test_index</span><span class="p">)</span>
<span class="k">yield</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span></div>
</div>
<div class="viewcode-block" id="Dataset">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset">[docs]</a>
<span class="k">class</span> <span class="nc">Dataset</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Abstraction of training and test :class:`LabelledCollection` objects.</span>
<span class="sd"> :param training: a :class:`LabelledCollection` instance</span>
<span class="sd"> :param test: a :class:`LabelledCollection` instance</span>
<span class="sd"> :param vocabulary: if indicated, is a dictionary of the terms used in this textual dataset</span>
<span class="sd"> :param name: a string representing the name of the dataset</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">test</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">vocabulary</span><span class="p">:</span> <span class="nb">dict</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">&#39;&#39;</span><span class="p">):</span>
<span class="k">assert</span> <span class="nb">set</span><span class="p">(</span><span class="n">training</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span> <span class="o">==</span> <span class="nb">set</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">classes_</span><span class="p">),</span> <span class="s1">&#39;incompatible labels in training and test collections&#39;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">training</span> <span class="o">=</span> <span class="n">training</span>
<span class="bp">self</span><span class="o">.</span><span class="n">test</span> <span class="o">=</span> <span class="n">test</span>
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary</span> <span class="o">=</span> <span class="n">vocabulary</span>
<span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">name</span>
<div class="viewcode-block" id="Dataset.SplitStratified">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset.SplitStratified">[docs]</a>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">SplitStratified</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">collection</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">train_size</span><span class="o">=</span><span class="mf">0.6</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Generates a :class:`Dataset` from a stratified split of a :class:`LabelledCollection` instance.</span>
<span class="sd"> See :meth:`LabelledCollection.split_stratified`</span>
<span class="sd"> :param collection: :class:`LabelledCollection`</span>
<span class="sd"> :param train_size: the proportion of training documents (the rest conforms the test split)</span>
<span class="sd"> :return: an instance of :class:`Dataset`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="o">*</span><span class="n">collection</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="n">train_size</span><span class="p">))</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">classes_</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The classes according to which the training collection is labelled</span>
<span class="sd"> :return: The classes according to which the training collection is labelled</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">classes_</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">n_classes</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The number of classes according to which the training collection is labelled</span>
<span class="sd"> :return: integer</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">n_classes</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">binary</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns True if the training collection is labelled according to two classes</span>
<span class="sd"> :return: boolean</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">binary</span>
<div class="viewcode-block" id="Dataset.load">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset.load">[docs]</a>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">train_path</span><span class="p">,</span> <span class="n">test_path</span><span class="p">,</span> <span class="n">loader_func</span><span class="p">:</span> <span class="n">callable</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">loader_kwargs</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads a training and a test labelled set of data and convert it into a :class:`Dataset` instance.</span>
<span class="sd"> The function in charge of reading the instances must be specified. This function can be a custom one, or any of</span>
<span class="sd"> the reading functions defined in :mod:`quapy.data.reader` module.</span>
<span class="sd"> :param train_path: string, the path to the file containing the training instances</span>
<span class="sd"> :param test_path: string, the path to the file containing the test instances</span>
<span class="sd"> :param loader_func: a custom function that implements the data loader and returns a tuple with instances and</span>
<span class="sd"> labels</span>
<span class="sd"> :param classes: array-like, the classes according to which the instances are labelled</span>
<span class="sd"> :param loader_kwargs: any argument that the `loader_func` function needs in order to read the instances.</span>
<span class="sd"> See :meth:`LabelledCollection.load` for further details.</span>
<span class="sd"> :return: a :class:`Dataset` object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">train_path</span><span class="p">,</span> <span class="n">loader_func</span><span class="p">,</span> <span class="n">classes</span><span class="p">,</span> <span class="o">**</span><span class="n">loader_kwargs</span><span class="p">)</span>
<span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">test_path</span><span class="p">,</span> <span class="n">loader_func</span><span class="p">,</span> <span class="n">classes</span><span class="p">,</span> <span class="o">**</span><span class="n">loader_kwargs</span><span class="p">)</span>
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">vocabulary_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> If the dataset is textual, and the vocabulary was indicated, returns the size of the vocabulary</span>
<span class="sd"> :return: integer</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vocabulary</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">train_test</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Alias to `self.training` and `self.test`</span>
<span class="sd"> :return: the training and test collections</span>
<span class="sd"> :return: the training and test collections</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">test</span>
<div class="viewcode-block" id="Dataset.stats">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset.stats">[docs]</a>
<span class="k">def</span> <span class="nf">stats</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">show</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns (and eventually prints) a dictionary with some stats of this dataset. E.g.,:</span>
<span class="sd"> &gt;&gt;&gt; data = qp.datasets.fetch_reviews(&#39;kindle&#39;, tfidf=True, min_df=5)</span>
<span class="sd"> &gt;&gt;&gt; data.stats()</span>
<span class="sd"> &gt;&gt;&gt; Dataset=kindle #tr-instances=3821, #te-instances=21591, type=&lt;class &#39;scipy.sparse.csr.csr_matrix&#39;&gt;, #features=4403, #classes=[0 1], tr-prevs=[0.081, 0.919], te-prevs=[0.063, 0.937]</span>
<span class="sd"> :param show: if set to True (default), prints the stats in standard output</span>
<span class="sd"> :return: a dictionary containing some stats of this collection for the training and test collections. The keys</span>
<span class="sd"> are `train` and `test`, and point to dedicated dictionaries of stats, for each collection, with keys</span>
<span class="sd"> `#instances` (the number of instances), `type` (the type representing the instances),</span>
<span class="sd"> `#features` (the number of features, if the instances are in array-like format), `#classes` (the classes of</span>
<span class="sd"> the collection), `prevs` (the prevalence values for each class)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">tr_stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">stats</span><span class="p">(</span><span class="n">show</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="n">te_stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">stats</span><span class="p">(</span><span class="n">show</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="k">if</span> <span class="n">show</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Dataset=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="si">}</span><span class="s1"> #tr-instances=</span><span class="si">{</span><span class="n">tr_stats</span><span class="p">[</span><span class="s2">&quot;instances&quot;</span><span class="p">]</span><span class="si">}</span><span class="s1">, #te-instances=</span><span class="si">{</span><span class="n">te_stats</span><span class="p">[</span><span class="s2">&quot;instances&quot;</span><span class="p">]</span><span class="si">}</span><span class="s1">, &#39;</span>
<span class="sa">f</span><span class="s1">&#39;type=</span><span class="si">{</span><span class="n">tr_stats</span><span class="p">[</span><span class="s2">&quot;type&quot;</span><span class="p">]</span><span class="si">}</span><span class="s1">, #features=</span><span class="si">{</span><span class="n">tr_stats</span><span class="p">[</span><span class="s2">&quot;features&quot;</span><span class="p">]</span><span class="si">}</span><span class="s1">, #classes=</span><span class="si">{</span><span class="n">tr_stats</span><span class="p">[</span><span class="s2">&quot;classes&quot;</span><span class="p">]</span><span class="si">}</span><span class="s1">, &#39;</span>
<span class="sa">f</span><span class="s1">&#39;tr-prevs=</span><span class="si">{</span><span class="n">tr_stats</span><span class="p">[</span><span class="s2">&quot;prevs&quot;</span><span class="p">]</span><span class="si">}</span><span class="s1">, te-prevs=</span><span class="si">{</span><span class="n">te_stats</span><span class="p">[</span><span class="s2">&quot;prevs&quot;</span><span class="p">]</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="p">{</span><span class="s1">&#39;train&#39;</span><span class="p">:</span> <span class="n">tr_stats</span><span class="p">,</span> <span class="s1">&#39;test&#39;</span><span class="p">:</span> <span class="n">te_stats</span><span class="p">}</span></div>
<div class="viewcode-block" id="Dataset.kFCV">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset.kFCV">[docs]</a>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">kFCV</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">nfolds</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">nrepeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Generator of stratified folds to be used in k-fold cross validation. This function is only a wrapper around</span>
<span class="sd"> :meth:`LabelledCollection.kFCV` that returns :class:`Dataset` instances made of training and test folds.</span>
<span class="sd"> :param nfolds: integer (default 5), the number of folds to generate</span>
<span class="sd"> :param nrepeats: integer (default 1), the number of rounds of k-fold cross validation to run</span>
<span class="sd"> :param random_state: integer (default 0), guarantees that the folds generated are reproducible</span>
<span class="sd"> :return: yields `nfolds * nrepeats` folds for k-fold cross validation as instances of :class:`Dataset`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">kFCV</span><span class="p">(</span><span class="n">nfolds</span><span class="o">=</span><span class="n">nfolds</span><span class="p">,</span> <span class="n">nrepeats</span><span class="o">=</span><span class="n">nrepeats</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">)):</span>
<span class="k">yield</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;fold </span><span class="si">{</span><span class="p">(</span><span class="n">i</span><span class="w"> </span><span class="o">%</span><span class="w"> </span><span class="n">nfolds</span><span class="p">)</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">nfolds</span><span class="si">}</span><span class="s1"> (round=</span><span class="si">{</span><span class="p">(</span><span class="n">i</span><span class="w"> </span><span class="o">//</span><span class="w"> </span><span class="n">nfolds</span><span class="p">)</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="si">}</span><span class="s1">)&#39;</span><span class="p">)</span></div>
<div class="viewcode-block" id="Dataset.reduce">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset.reduce">[docs]</a>
<span class="k">def</span> <span class="nf">reduce</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n_train</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">n_test</span><span class="o">=</span><span class="mi">100</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Reduce the number of instances in place for quick experiments. Preserves the prevalence of each set.</span>
<span class="sd"> :param n_train: number of training documents to keep (default 100)</span>
<span class="sd"> :param n_test: number of test documents to keep (default 100)</span>
<span class="sd"> :return: self</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">training</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="n">n_train</span><span class="p">,</span> <span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">prevalence</span><span class="p">())</span>
<span class="bp">self</span><span class="o">.</span><span class="n">test</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="n">n_test</span><span class="p">,</span> <span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">())</span>
<span class="k">return</span> <span class="bp">self</span></div>
</div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,945 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.data.datasets &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.data.datasets</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.data.datasets</h1><div class="highlight"><pre>
<div class="viewcode-block" id="warn">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.warn">[docs]</a>
<span></span><span class="k">def</span> <span class="nf">warn</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="k">pass</span></div>
<span class="kn">import</span> <span class="nn">warnings</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span> <span class="o">=</span> <span class="n">warn</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">zipfile</span>
<span class="kn">from</span> <span class="nn">os.path</span> <span class="kn">import</span> <span class="n">join</span>
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
<span class="kn">from</span> <span class="nn">ucimlrepo</span> <span class="kn">import</span> <span class="n">fetch_ucirepo</span>
<span class="kn">from</span> <span class="nn">quapy.data.base</span> <span class="kn">import</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">LabelledCollection</span>
<span class="kn">from</span> <span class="nn">quapy.data.preprocessing</span> <span class="kn">import</span> <span class="n">text2tfidf</span><span class="p">,</span> <span class="n">reduce_columns</span>
<span class="kn">from</span> <span class="nn">quapy.data.reader</span> <span class="kn">import</span> <span class="o">*</span>
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">download_file_if_not_exists</span><span class="p">,</span> <span class="n">download_file</span><span class="p">,</span> <span class="n">get_quapy_home</span><span class="p">,</span> <span class="n">pickled_resource</span>
<span class="n">REVIEWS_SENTIMENT_DATASETS</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;hp&#39;</span><span class="p">,</span> <span class="s1">&#39;kindle&#39;</span><span class="p">,</span> <span class="s1">&#39;imdb&#39;</span><span class="p">]</span>
<span class="n">TWITTER_SENTIMENT_DATASETS_TEST</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;gasp&#39;</span><span class="p">,</span> <span class="s1">&#39;hcr&#39;</span><span class="p">,</span> <span class="s1">&#39;omd&#39;</span><span class="p">,</span> <span class="s1">&#39;sanders&#39;</span><span class="p">,</span>
<span class="s1">&#39;semeval13&#39;</span><span class="p">,</span> <span class="s1">&#39;semeval14&#39;</span><span class="p">,</span> <span class="s1">&#39;semeval15&#39;</span><span class="p">,</span> <span class="s1">&#39;semeval16&#39;</span><span class="p">,</span>
<span class="s1">&#39;sst&#39;</span><span class="p">,</span> <span class="s1">&#39;wa&#39;</span><span class="p">,</span> <span class="s1">&#39;wb&#39;</span><span class="p">]</span>
<span class="n">TWITTER_SENTIMENT_DATASETS_TRAIN</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;gasp&#39;</span><span class="p">,</span> <span class="s1">&#39;hcr&#39;</span><span class="p">,</span> <span class="s1">&#39;omd&#39;</span><span class="p">,</span> <span class="s1">&#39;sanders&#39;</span><span class="p">,</span>
<span class="s1">&#39;semeval&#39;</span><span class="p">,</span> <span class="s1">&#39;semeval16&#39;</span><span class="p">,</span>
<span class="s1">&#39;sst&#39;</span><span class="p">,</span> <span class="s1">&#39;wa&#39;</span><span class="p">,</span> <span class="s1">&#39;wb&#39;</span><span class="p">]</span>
<span class="n">UCI_BINARY_DATASETS</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;acute.a&#39;</span><span class="p">,</span> <span class="s1">&#39;acute.b&#39;</span><span class="p">,</span>
<span class="s1">&#39;balance.1&#39;</span><span class="p">,</span> <span class="s1">&#39;balance.2&#39;</span><span class="p">,</span> <span class="s1">&#39;balance.3&#39;</span><span class="p">,</span>
<span class="s1">&#39;breast-cancer&#39;</span><span class="p">,</span>
<span class="s1">&#39;cmc.1&#39;</span><span class="p">,</span> <span class="s1">&#39;cmc.2&#39;</span><span class="p">,</span> <span class="s1">&#39;cmc.3&#39;</span><span class="p">,</span>
<span class="s1">&#39;ctg.1&#39;</span><span class="p">,</span> <span class="s1">&#39;ctg.2&#39;</span><span class="p">,</span> <span class="s1">&#39;ctg.3&#39;</span><span class="p">,</span>
<span class="c1">#&#39;diabetes&#39;, # &lt;-- I haven&#39;t found this one...</span>
<span class="s1">&#39;german&#39;</span><span class="p">,</span>
<span class="s1">&#39;haberman&#39;</span><span class="p">,</span>
<span class="s1">&#39;ionosphere&#39;</span><span class="p">,</span>
<span class="s1">&#39;iris.1&#39;</span><span class="p">,</span> <span class="s1">&#39;iris.2&#39;</span><span class="p">,</span> <span class="s1">&#39;iris.3&#39;</span><span class="p">,</span>
<span class="s1">&#39;mammographic&#39;</span><span class="p">,</span>
<span class="s1">&#39;pageblocks.5&#39;</span><span class="p">,</span>
<span class="c1">#&#39;phoneme&#39;, # &lt;-- I haven&#39;t found this one...</span>
<span class="s1">&#39;semeion&#39;</span><span class="p">,</span>
<span class="s1">&#39;sonar&#39;</span><span class="p">,</span>
<span class="s1">&#39;spambase&#39;</span><span class="p">,</span>
<span class="s1">&#39;spectf&#39;</span><span class="p">,</span>
<span class="s1">&#39;tictactoe&#39;</span><span class="p">,</span>
<span class="s1">&#39;transfusion&#39;</span><span class="p">,</span>
<span class="s1">&#39;wdbc&#39;</span><span class="p">,</span>
<span class="s1">&#39;wine.1&#39;</span><span class="p">,</span> <span class="s1">&#39;wine.2&#39;</span><span class="p">,</span> <span class="s1">&#39;wine.3&#39;</span><span class="p">,</span>
<span class="s1">&#39;wine-q-red&#39;</span><span class="p">,</span> <span class="s1">&#39;wine-q-white&#39;</span><span class="p">,</span>
<span class="s1">&#39;yeast&#39;</span><span class="p">]</span>
<span class="n">UCI_MULTICLASS_DATASETS</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;dry-bean&#39;</span><span class="p">,</span>
<span class="s1">&#39;wine-quality&#39;</span><span class="p">,</span>
<span class="s1">&#39;academic-success&#39;</span><span class="p">,</span>
<span class="s1">&#39;digits&#39;</span><span class="p">,</span>
<span class="s1">&#39;letter&#39;</span><span class="p">]</span>
<span class="n">LEQUA2022_TASKS</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;T1A&#39;</span><span class="p">,</span> <span class="s1">&#39;T1B&#39;</span><span class="p">,</span> <span class="s1">&#39;T2A&#39;</span><span class="p">,</span> <span class="s1">&#39;T2B&#39;</span><span class="p">]</span>
<span class="n">_TXA_SAMPLE_SIZE</span> <span class="o">=</span> <span class="mi">250</span>
<span class="n">_TXB_SAMPLE_SIZE</span> <span class="o">=</span> <span class="mi">1000</span>
<span class="n">LEQUA2022_SAMPLE_SIZE</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;TXA&#39;</span><span class="p">:</span> <span class="n">_TXA_SAMPLE_SIZE</span><span class="p">,</span>
<span class="s1">&#39;TXB&#39;</span><span class="p">:</span> <span class="n">_TXB_SAMPLE_SIZE</span><span class="p">,</span>
<span class="s1">&#39;T1A&#39;</span><span class="p">:</span> <span class="n">_TXA_SAMPLE_SIZE</span><span class="p">,</span>
<span class="s1">&#39;T1B&#39;</span><span class="p">:</span> <span class="n">_TXB_SAMPLE_SIZE</span><span class="p">,</span>
<span class="s1">&#39;T2A&#39;</span><span class="p">:</span> <span class="n">_TXA_SAMPLE_SIZE</span><span class="p">,</span>
<span class="s1">&#39;T2B&#39;</span><span class="p">:</span> <span class="n">_TXB_SAMPLE_SIZE</span><span class="p">,</span>
<span class="s1">&#39;binary&#39;</span><span class="p">:</span> <span class="n">_TXA_SAMPLE_SIZE</span><span class="p">,</span>
<span class="s1">&#39;multiclass&#39;</span><span class="p">:</span> <span class="n">_TXB_SAMPLE_SIZE</span>
<span class="p">}</span>
<div class="viewcode-block" id="fetch_reviews">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_reviews">[docs]</a>
<span class="k">def</span> <span class="nf">fetch_reviews</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dataset</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads a Reviews dataset as a Dataset instance, as used in</span>
<span class="sd"> `Esuli, A., Moreo, A., and Sebastiani, F. &quot;A recurrent neural network for sentiment quantification.&quot;</span>
<span class="sd"> Proceedings of the 27th ACM International Conference on Information and Knowledge Management. 2018. &lt;https://dl.acm.org/doi/abs/10.1145/3269206.3269287&gt;`_.</span>
<span class="sd"> The list of valid dataset names can be accessed in `quapy.data.datasets.REVIEWS_SENTIMENT_DATASETS`</span>
<span class="sd"> :param dataset_name: the name of the dataset: valid ones are &#39;hp&#39;, &#39;kindle&#39;, &#39;imdb&#39;</span>
<span class="sd"> :param tfidf: set to True to transform the raw documents into tfidf weighted matrices</span>
<span class="sd"> :param min_df: minimun number of documents that should contain a term in order for the term to be</span>
<span class="sd"> kept (ignored if tfidf==False)</span>
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
<span class="sd"> ~/quay_data/ directory)</span>
<span class="sd"> :param pickle: set to True to pickle the Dataset object the first time it is generated, in order to allow for</span>
<span class="sd"> faster subsequent invokations</span>
<span class="sd"> :return: a :class:`quapy.data.base.Dataset` instance</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">dataset_name</span> <span class="ow">in</span> <span class="n">REVIEWS_SENTIMENT_DATASETS</span><span class="p">,</span> \
<span class="sa">f</span><span class="s1">&#39;Name </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> does not match any known dataset for sentiment reviews. &#39;</span> \
<span class="sa">f</span><span class="s1">&#39;Valid ones are </span><span class="si">{</span><span class="n">REVIEWS_SENTIMENT_DATASETS</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
<span class="n">URL_TRAIN</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39;https://zenodo.org/record/4117827/files/</span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">_train.txt&#39;</span>
<span class="n">URL_TEST</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39;https://zenodo.org/record/4117827/files/</span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">_test.txt&#39;</span>
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">&#39;reviews&#39;</span><span class="p">),</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">train_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">&#39;reviews&#39;</span><span class="p">,</span> <span class="n">dataset_name</span><span class="p">,</span> <span class="s1">&#39;train.txt&#39;</span><span class="p">)</span>
<span class="n">test_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">&#39;reviews&#39;</span><span class="p">,</span> <span class="n">dataset_name</span><span class="p">,</span> <span class="s1">&#39;test.txt&#39;</span><span class="p">)</span>
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="n">URL_TRAIN</span><span class="p">,</span> <span class="n">train_path</span><span class="p">)</span>
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="n">URL_TEST</span><span class="p">,</span> <span class="n">test_path</span><span class="p">)</span>
<span class="n">pickle_path</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">pickle</span><span class="p">:</span>
<span class="n">pickle_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">&#39;reviews&#39;</span><span class="p">,</span> <span class="s1">&#39;pickle&#39;</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">.pkl&#39;</span><span class="p">)</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">pickled_resource</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">,</span> <span class="n">Dataset</span><span class="o">.</span><span class="n">load</span><span class="p">,</span> <span class="n">train_path</span><span class="p">,</span> <span class="n">test_path</span><span class="p">,</span> <span class="n">from_text</span><span class="p">)</span>
<span class="k">if</span> <span class="n">tfidf</span><span class="p">:</span>
<span class="n">text2tfidf</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">if</span> <span class="n">min_df</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">reduce_columns</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="n">min_df</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">data</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">dataset_name</span>
<span class="k">return</span> <span class="n">data</span></div>
<div class="viewcode-block" id="fetch_twitter">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_twitter">[docs]</a>
<span class="k">def</span> <span class="nf">fetch_twitter</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">for_model_selection</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dataset</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads a Twitter dataset as a :class:`quapy.data.base.Dataset` instance, as used in:</span>
<span class="sd"> `Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis.</span>
<span class="sd"> Social Network Analysis and Mining6(19), 122 (2016) &lt;https://link.springer.com/content/pdf/10.1007/s13278-016-0327-z.pdf&gt;`_</span>
<span class="sd"> Note that the datasets &#39;semeval13&#39;, &#39;semeval14&#39;, &#39;semeval15&#39; share the same training set.</span>
<span class="sd"> The list of valid dataset names corresponding to training sets can be accessed in</span>
<span class="sd"> `quapy.data.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN`, while the test sets can be accessed in</span>
<span class="sd"> `quapy.data.datasets.TWITTER_SENTIMENT_DATASETS_TEST`</span>
<span class="sd"> :param dataset_name: the name of the dataset: valid ones are &#39;gasp&#39;, &#39;hcr&#39;, &#39;omd&#39;, &#39;sanders&#39;, &#39;semeval13&#39;,</span>
<span class="sd"> &#39;semeval14&#39;, &#39;semeval15&#39;, &#39;semeval16&#39;, &#39;sst&#39;, &#39;wa&#39;, &#39;wb&#39;</span>
<span class="sd"> :param for_model_selection: if True, then returns the train split as the training set and the devel split</span>
<span class="sd"> as the test set; if False, then returns the train+devel split as the training set and the test set as the</span>
<span class="sd"> test set</span>
<span class="sd"> :param min_df: minimun number of documents that should contain a term in order for the term to be kept</span>
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
<span class="sd"> ~/quay_data/ directory)</span>
<span class="sd"> :param pickle: set to True to pickle the Dataset object the first time it is generated, in order to allow for</span>
<span class="sd"> faster subsequent invokations</span>
<span class="sd"> :return: a :class:`quapy.data.base.Dataset` instance</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">dataset_name</span> <span class="ow">in</span> <span class="n">TWITTER_SENTIMENT_DATASETS_TRAIN</span> <span class="o">+</span> <span class="n">TWITTER_SENTIMENT_DATASETS_TEST</span><span class="p">,</span> \
<span class="sa">f</span><span class="s1">&#39;Name </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> does not match any known dataset for sentiment twitter. &#39;</span> \
<span class="sa">f</span><span class="s1">&#39;Valid ones are </span><span class="si">{</span><span class="n">TWITTER_SENTIMENT_DATASETS_TRAIN</span><span class="si">}</span><span class="s1"> for model selection and &#39;</span> \
<span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">TWITTER_SENTIMENT_DATASETS_TEST</span><span class="si">}</span><span class="s1"> for test (datasets &quot;semeval14&quot;, &quot;semeval15&quot;, &quot;semeval16&quot; share &#39;</span> \
<span class="sa">f</span><span class="s1">&#39;a common training set &quot;semeval&quot;)&#39;</span>
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
<span class="n">URL</span> <span class="o">=</span> <span class="s1">&#39;https://zenodo.org/record/4255764/files/tweet_sentiment_quantification_snam.zip&#39;</span>
<span class="n">unzipped_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">&#39;tweet_sentiment_quantification_snam&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">):</span>
<span class="n">downloaded_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">&#39;tweet_sentiment_quantification_snam.zip&#39;</span><span class="p">)</span>
<span class="n">download_file</span><span class="p">(</span><span class="n">URL</span><span class="p">,</span> <span class="n">downloaded_path</span><span class="p">)</span>
<span class="k">with</span> <span class="n">zipfile</span><span class="o">.</span><span class="n">ZipFile</span><span class="p">(</span><span class="n">downloaded_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span>
<span class="n">file</span><span class="o">.</span><span class="n">extractall</span><span class="p">(</span><span class="n">data_home</span><span class="p">)</span>
<span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">downloaded_path</span><span class="p">)</span>
<span class="k">if</span> <span class="n">dataset_name</span> <span class="ow">in</span> <span class="p">{</span><span class="s1">&#39;semeval13&#39;</span><span class="p">,</span> <span class="s1">&#39;semeval14&#39;</span><span class="p">,</span> <span class="s1">&#39;semeval15&#39;</span><span class="p">}:</span>
<span class="n">trainset_name</span> <span class="o">=</span> <span class="s1">&#39;semeval&#39;</span>
<span class="n">testset_name</span> <span class="o">=</span> <span class="s1">&#39;semeval&#39;</span> <span class="k">if</span> <span class="n">for_model_selection</span> <span class="k">else</span> <span class="n">dataset_name</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;the training and development sets for datasets &#39;semeval13&#39;, &#39;semeval14&#39;, &#39;semeval15&#39; are common &quot;</span>
<span class="sa">f</span><span class="s2">&quot;(called &#39;semeval&#39;); returning trainin-set=&#39;</span><span class="si">{</span><span class="n">trainset_name</span><span class="si">}</span><span class="s2">&#39; and test-set=</span><span class="si">{</span><span class="n">testset_name</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;semeval&#39;</span> <span class="ow">and</span> <span class="n">for_model_selection</span><span class="o">==</span><span class="kc">False</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;dataset &quot;semeval&quot; can only be used for model selection. &#39;</span>
<span class="s1">&#39;Use &quot;semeval13&quot;, &quot;semeval14&quot;, or &quot;semeval15&quot; for model evaluation.&#39;</span><span class="p">)</span>
<span class="n">trainset_name</span> <span class="o">=</span> <span class="n">testset_name</span> <span class="o">=</span> <span class="n">dataset_name</span>
<span class="k">if</span> <span class="n">for_model_selection</span><span class="p">:</span>
<span class="n">train</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">&#39;train&#39;</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">trainset_name</span><span class="si">}</span><span class="s1">.train.feature.txt&#39;</span><span class="p">)</span>
<span class="n">test</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">&#39;test&#39;</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">testset_name</span><span class="si">}</span><span class="s1">.dev.feature.txt&#39;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">train</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">&#39;train&#39;</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">trainset_name</span><span class="si">}</span><span class="s1">.train+dev.feature.txt&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;semeval16&#39;</span><span class="p">:</span> <span class="c1"># there is a different test name in the case of semeval16 only</span>
<span class="n">test</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">&#39;test&#39;</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">testset_name</span><span class="si">}</span><span class="s1">.dev-test.feature.txt&#39;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">test</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">&#39;test&#39;</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">testset_name</span><span class="si">}</span><span class="s1">.test.feature.txt&#39;</span><span class="p">)</span>
<span class="n">pickle_path</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">pickle</span><span class="p">:</span>
<span class="n">mode</span> <span class="o">=</span> <span class="s2">&quot;train-dev&quot;</span> <span class="k">if</span> <span class="n">for_model_selection</span> <span class="k">else</span> <span class="s2">&quot;train+dev-test&quot;</span>
<span class="n">pickle_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">&#39;pickle&#39;</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">testset_name</span><span class="si">}</span><span class="s1">.</span><span class="si">{</span><span class="n">mode</span><span class="si">}</span><span class="s1">.pkl&#39;</span><span class="p">)</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">pickled_resource</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">,</span> <span class="n">Dataset</span><span class="o">.</span><span class="n">load</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">,</span> <span class="n">from_sparse</span><span class="p">)</span>
<span class="k">if</span> <span class="n">min_df</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">reduce_columns</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="n">min_df</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">data</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">dataset_name</span>
<span class="k">return</span> <span class="n">data</span></div>
<div class="viewcode-block" id="fetch_UCIBinaryDataset">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_UCIBinaryDataset">[docs]</a>
<span class="k">def</span> <span class="nf">fetch_UCIBinaryDataset</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">test_split</span><span class="o">=</span><span class="mf">0.3</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dataset</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads a UCI dataset as an instance of :class:`quapy.data.base.Dataset`, as used in</span>
<span class="sd"> `Pérez-Gállego, P., Quevedo, J. R., &amp; del Coz, J. J. (2017).</span>
<span class="sd"> Using ensembles for problems with characterizable changes in data distribution: A case study on quantification.</span>
<span class="sd"> Information Fusion, 34, 87-100. &lt;https://www.sciencedirect.com/science/article/pii/S1566253516300628&gt;`_</span>
<span class="sd"> and</span>
<span class="sd"> `Pérez-Gállego, P., Castano, A., Quevedo, J. R., &amp; del Coz, J. J. (2019).</span>
<span class="sd"> Dynamic ensemble selection for quantification tasks.</span>
<span class="sd"> Information Fusion, 45, 1-15. &lt;https://www.sciencedirect.com/science/article/pii/S1566253517303652&gt;`_.</span>
<span class="sd"> The datasets do not come with a predefined train-test split (see :meth:`fetch_UCILabelledCollection` for further</span>
<span class="sd"> information on how to use these collections), and so a train-test split is generated at desired proportion.</span>
<span class="sd"> The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS`</span>
<span class="sd"> :param dataset_name: a dataset name</span>
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
<span class="sd"> ~/quay_data/ directory)</span>
<span class="sd"> :param test_split: proportion of documents to be included in the test set. The rest conforms the training set</span>
<span class="sd"> :param verbose: set to True (default is False) to get information (from the UCI ML repository) about the datasets</span>
<span class="sd"> :return: a :class:`quapy.data.base.Dataset` instance</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">fetch_UCIBinaryLabelledCollection</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="p">,</span> <span class="n">verbose</span><span class="p">)</span>
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="o">*</span><span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">test_split</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span></div>
<div class="viewcode-block" id="fetch_UCIBinaryLabelledCollection">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_UCIBinaryLabelledCollection">[docs]</a>
<span class="k">def</span> <span class="nf">fetch_UCIBinaryLabelledCollection</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">LabelledCollection</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads a UCI collection as an instance of :class:`quapy.data.base.LabelledCollection`, as used in</span>
<span class="sd"> `Pérez-Gállego, P., Quevedo, J. R., &amp; del Coz, J. J. (2017).</span>
<span class="sd"> Using ensembles for problems with characterizable changes in data distribution: A case study on quantification.</span>
<span class="sd"> Information Fusion, 34, 87-100. &lt;https://www.sciencedirect.com/science/article/pii/S1566253516300628&gt;`_</span>
<span class="sd"> and</span>
<span class="sd"> `Pérez-Gállego, P., Castano, A., Quevedo, J. R., &amp; del Coz, J. J. (2019).</span>
<span class="sd"> Dynamic ensemble selection for quantification tasks.</span>
<span class="sd"> Information Fusion, 45, 1-15. &lt;https://www.sciencedirect.com/science/article/pii/S1566253517303652&gt;`_.</span>
<span class="sd"> The datasets do not come with a predefined train-test split, and so Pérez-Gállego et al. adopted a 5FCVx2 evaluation</span>
<span class="sd"> protocol, meaning that each collection was used to generate two rounds (hence the x2) of 5 fold cross validation.</span>
<span class="sd"> This can be reproduced by using :meth:`quapy.data.base.Dataset.kFCV`, e.g.:</span>
<span class="sd"> &gt;&gt;&gt; import quapy as qp</span>
<span class="sd"> &gt;&gt;&gt; collection = qp.datasets.fetch_UCIBinaryLabelledCollection(&quot;yeast&quot;)</span>
<span class="sd"> &gt;&gt;&gt; for data in qp.train.Dataset.kFCV(collection, nfolds=5, nrepeats=2):</span>
<span class="sd"> &gt;&gt;&gt; ...</span>
<span class="sd"> The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS`</span>
<span class="sd"> :param dataset_name: a dataset name</span>
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
<span class="sd"> ~/quay_data/ directory)</span>
<span class="sd"> :param test_split: proportion of documents to be included in the test set. The rest conforms the training set</span>
<span class="sd"> :param verbose: set to True (default is False) to get information (from the UCI ML repository) about the datasets</span>
<span class="sd"> :return: a :class:`quapy.data.base.LabelledCollection` instance</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">dataset_name</span> <span class="ow">in</span> <span class="n">UCI_BINARY_DATASETS</span><span class="p">,</span> \
<span class="sa">f</span><span class="s1">&#39;Name </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> does not match any known dataset from the UCI Machine Learning datasets repository. &#39;</span> \
<span class="sa">f</span><span class="s1">&#39;Valid ones are </span><span class="si">{</span><span class="n">UCI_BINARY_DATASETS</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
<span class="n">dataset_fullname</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;acute.a&#39;</span><span class="p">:</span> <span class="s1">&#39;Acute Inflammations (urinary bladder)&#39;</span><span class="p">,</span>
<span class="s1">&#39;acute.b&#39;</span><span class="p">:</span> <span class="s1">&#39;Acute Inflammations (renal pelvis)&#39;</span><span class="p">,</span>
<span class="s1">&#39;balance.1&#39;</span><span class="p">:</span> <span class="s1">&#39;Balance Scale Weight &amp; Distance Database (left)&#39;</span><span class="p">,</span>
<span class="s1">&#39;balance.2&#39;</span><span class="p">:</span> <span class="s1">&#39;Balance Scale Weight &amp; Distance Database (balanced)&#39;</span><span class="p">,</span>
<span class="s1">&#39;balance.3&#39;</span><span class="p">:</span> <span class="s1">&#39;Balance Scale Weight &amp; Distance Database (right)&#39;</span><span class="p">,</span>
<span class="s1">&#39;breast-cancer&#39;</span><span class="p">:</span> <span class="s1">&#39;Breast Cancer Wisconsin (Original)&#39;</span><span class="p">,</span>
<span class="s1">&#39;cmc.1&#39;</span><span class="p">:</span> <span class="s1">&#39;Contraceptive Method Choice (no use)&#39;</span><span class="p">,</span>
<span class="s1">&#39;cmc.2&#39;</span><span class="p">:</span> <span class="s1">&#39;Contraceptive Method Choice (long term)&#39;</span><span class="p">,</span>
<span class="s1">&#39;cmc.3&#39;</span><span class="p">:</span> <span class="s1">&#39;Contraceptive Method Choice (short term)&#39;</span><span class="p">,</span>
<span class="s1">&#39;ctg.1&#39;</span><span class="p">:</span> <span class="s1">&#39;Cardiotocography Data Set (normal)&#39;</span><span class="p">,</span>
<span class="s1">&#39;ctg.2&#39;</span><span class="p">:</span> <span class="s1">&#39;Cardiotocography Data Set (suspect)&#39;</span><span class="p">,</span>
<span class="s1">&#39;ctg.3&#39;</span><span class="p">:</span> <span class="s1">&#39;Cardiotocography Data Set (pathologic)&#39;</span><span class="p">,</span>
<span class="s1">&#39;german&#39;</span><span class="p">:</span> <span class="s1">&#39;Statlog German Credit Data&#39;</span><span class="p">,</span>
<span class="s1">&#39;haberman&#39;</span><span class="p">:</span> <span class="s2">&quot;Haberman&#39;s Survival Data&quot;</span><span class="p">,</span>
<span class="s1">&#39;ionosphere&#39;</span><span class="p">:</span> <span class="s1">&#39;Johns Hopkins University Ionosphere DB&#39;</span><span class="p">,</span>
<span class="s1">&#39;iris.1&#39;</span><span class="p">:</span> <span class="s1">&#39;Iris Plants Database(x)&#39;</span><span class="p">,</span>
<span class="s1">&#39;iris.2&#39;</span><span class="p">:</span> <span class="s1">&#39;Iris Plants Database(versicolour)&#39;</span><span class="p">,</span>
<span class="s1">&#39;iris.3&#39;</span><span class="p">:</span> <span class="s1">&#39;Iris Plants Database(virginica)&#39;</span><span class="p">,</span>
<span class="s1">&#39;mammographic&#39;</span><span class="p">:</span> <span class="s1">&#39;Mammographic Mass&#39;</span><span class="p">,</span>
<span class="s1">&#39;pageblocks.5&#39;</span><span class="p">:</span> <span class="s1">&#39;Page Blocks Classification (5)&#39;</span><span class="p">,</span>
<span class="s1">&#39;semeion&#39;</span><span class="p">:</span> <span class="s1">&#39;Semeion Handwritten Digit (8)&#39;</span><span class="p">,</span>
<span class="s1">&#39;sonar&#39;</span><span class="p">:</span> <span class="s1">&#39;Sonar, Mines vs. Rocks&#39;</span><span class="p">,</span>
<span class="s1">&#39;spambase&#39;</span><span class="p">:</span> <span class="s1">&#39;Spambase Data Set&#39;</span><span class="p">,</span>
<span class="s1">&#39;spectf&#39;</span><span class="p">:</span> <span class="s1">&#39;SPECTF Heart Data&#39;</span><span class="p">,</span>
<span class="s1">&#39;tictactoe&#39;</span><span class="p">:</span> <span class="s1">&#39;Tic-Tac-Toe Endgame Database&#39;</span><span class="p">,</span>
<span class="s1">&#39;transfusion&#39;</span><span class="p">:</span> <span class="s1">&#39;Blood Transfusion Service Center Data Set&#39;</span><span class="p">,</span>
<span class="s1">&#39;wdbc&#39;</span><span class="p">:</span> <span class="s1">&#39;Wisconsin Diagnostic Breast Cancer&#39;</span><span class="p">,</span>
<span class="s1">&#39;wine.1&#39;</span><span class="p">:</span> <span class="s1">&#39;Wine Recognition Data (1)&#39;</span><span class="p">,</span>
<span class="s1">&#39;wine.2&#39;</span><span class="p">:</span> <span class="s1">&#39;Wine Recognition Data (2)&#39;</span><span class="p">,</span>
<span class="s1">&#39;wine.3&#39;</span><span class="p">:</span> <span class="s1">&#39;Wine Recognition Data (3)&#39;</span><span class="p">,</span>
<span class="s1">&#39;wine-q-red&#39;</span><span class="p">:</span> <span class="s1">&#39;Wine Quality Red (6-10)&#39;</span><span class="p">,</span>
<span class="s1">&#39;wine-q-white&#39;</span><span class="p">:</span> <span class="s1">&#39;Wine Quality White (6-10)&#39;</span><span class="p">,</span>
<span class="s1">&#39;yeast&#39;</span><span class="p">:</span> <span class="s1">&#39;Yeast&#39;</span><span class="p">,</span>
<span class="p">}</span>
<span class="c1"># the identifier is an alias for the dataset group, it&#39;s part of the url data-folder, and is the name we use</span>
<span class="c1"># to download the raw dataset</span>
<span class="n">identifier_map</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;acute.a&#39;</span><span class="p">:</span> <span class="s1">&#39;acute&#39;</span><span class="p">,</span>
<span class="s1">&#39;acute.b&#39;</span><span class="p">:</span> <span class="s1">&#39;acute&#39;</span><span class="p">,</span>
<span class="s1">&#39;balance.1&#39;</span><span class="p">:</span> <span class="s1">&#39;balance-scale&#39;</span><span class="p">,</span>
<span class="s1">&#39;balance.2&#39;</span><span class="p">:</span> <span class="s1">&#39;balance-scale&#39;</span><span class="p">,</span>
<span class="s1">&#39;balance.3&#39;</span><span class="p">:</span> <span class="s1">&#39;balance-scale&#39;</span><span class="p">,</span>
<span class="s1">&#39;breast-cancer&#39;</span><span class="p">:</span> <span class="s1">&#39;breast-cancer-wisconsin&#39;</span><span class="p">,</span>
<span class="s1">&#39;cmc.1&#39;</span><span class="p">:</span> <span class="s1">&#39;cmc&#39;</span><span class="p">,</span>
<span class="s1">&#39;cmc.2&#39;</span><span class="p">:</span> <span class="s1">&#39;cmc&#39;</span><span class="p">,</span>
<span class="s1">&#39;cmc.3&#39;</span><span class="p">:</span> <span class="s1">&#39;cmc&#39;</span><span class="p">,</span>
<span class="s1">&#39;ctg.1&#39;</span><span class="p">:</span> <span class="s1">&#39;00193&#39;</span><span class="p">,</span>
<span class="s1">&#39;ctg.2&#39;</span><span class="p">:</span> <span class="s1">&#39;00193&#39;</span><span class="p">,</span>
<span class="s1">&#39;ctg.3&#39;</span><span class="p">:</span> <span class="s1">&#39;00193&#39;</span><span class="p">,</span>
<span class="s1">&#39;german&#39;</span><span class="p">:</span> <span class="s1">&#39;statlog/german&#39;</span><span class="p">,</span>
<span class="s1">&#39;haberman&#39;</span><span class="p">:</span> <span class="s1">&#39;haberman&#39;</span><span class="p">,</span>
<span class="s1">&#39;ionosphere&#39;</span><span class="p">:</span> <span class="s1">&#39;ionosphere&#39;</span><span class="p">,</span>
<span class="s1">&#39;iris.1&#39;</span><span class="p">:</span> <span class="s1">&#39;iris&#39;</span><span class="p">,</span>
<span class="s1">&#39;iris.2&#39;</span><span class="p">:</span> <span class="s1">&#39;iris&#39;</span><span class="p">,</span>
<span class="s1">&#39;iris.3&#39;</span><span class="p">:</span> <span class="s1">&#39;iris&#39;</span><span class="p">,</span>
<span class="s1">&#39;mammographic&#39;</span><span class="p">:</span> <span class="s1">&#39;mammographic-masses&#39;</span><span class="p">,</span>
<span class="s1">&#39;pageblocks.5&#39;</span><span class="p">:</span> <span class="s1">&#39;page-blocks&#39;</span><span class="p">,</span>
<span class="s1">&#39;semeion&#39;</span><span class="p">:</span> <span class="s1">&#39;semeion&#39;</span><span class="p">,</span>
<span class="s1">&#39;sonar&#39;</span><span class="p">:</span> <span class="s1">&#39;undocumented/connectionist-bench/sonar&#39;</span><span class="p">,</span>
<span class="s1">&#39;spambase&#39;</span><span class="p">:</span> <span class="s1">&#39;spambase&#39;</span><span class="p">,</span>
<span class="s1">&#39;spectf&#39;</span><span class="p">:</span> <span class="s1">&#39;spect&#39;</span><span class="p">,</span>
<span class="s1">&#39;tictactoe&#39;</span><span class="p">:</span> <span class="s1">&#39;tic-tac-toe&#39;</span><span class="p">,</span>
<span class="s1">&#39;transfusion&#39;</span><span class="p">:</span> <span class="s1">&#39;blood-transfusion&#39;</span><span class="p">,</span>
<span class="s1">&#39;wdbc&#39;</span><span class="p">:</span> <span class="s1">&#39;breast-cancer-wisconsin&#39;</span><span class="p">,</span>
<span class="s1">&#39;wine-q-red&#39;</span><span class="p">:</span> <span class="s1">&#39;wine-quality&#39;</span><span class="p">,</span>
<span class="s1">&#39;wine-q-white&#39;</span><span class="p">:</span> <span class="s1">&#39;wine-quality&#39;</span><span class="p">,</span>
<span class="s1">&#39;wine.1&#39;</span><span class="p">:</span> <span class="s1">&#39;wine&#39;</span><span class="p">,</span>
<span class="s1">&#39;wine.2&#39;</span><span class="p">:</span> <span class="s1">&#39;wine&#39;</span><span class="p">,</span>
<span class="s1">&#39;wine.3&#39;</span><span class="p">:</span> <span class="s1">&#39;wine&#39;</span><span class="p">,</span>
<span class="s1">&#39;yeast&#39;</span><span class="p">:</span> <span class="s1">&#39;yeast&#39;</span><span class="p">,</span>
<span class="p">}</span>
<span class="c1"># the filename is the name of the file within the data_folder indexed by the identifier</span>
<span class="n">file_name</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;acute&#39;</span><span class="p">:</span> <span class="s1">&#39;diagnosis.data&#39;</span><span class="p">,</span>
<span class="s1">&#39;00193&#39;</span><span class="p">:</span> <span class="s1">&#39;CTG.xls&#39;</span><span class="p">,</span>
<span class="s1">&#39;statlog/german&#39;</span><span class="p">:</span> <span class="s1">&#39;german.data-numeric&#39;</span><span class="p">,</span>
<span class="s1">&#39;mammographic-masses&#39;</span><span class="p">:</span> <span class="s1">&#39;mammographic_masses.data&#39;</span><span class="p">,</span>
<span class="s1">&#39;page-blocks&#39;</span><span class="p">:</span> <span class="s1">&#39;page-blocks.data.Z&#39;</span><span class="p">,</span>
<span class="s1">&#39;undocumented/connectionist-bench/sonar&#39;</span><span class="p">:</span> <span class="s1">&#39;sonar.all-data&#39;</span><span class="p">,</span>
<span class="s1">&#39;spect&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;SPECTF.train&#39;</span><span class="p">,</span> <span class="s1">&#39;SPECTF.test&#39;</span><span class="p">],</span>
<span class="s1">&#39;blood-transfusion&#39;</span><span class="p">:</span> <span class="s1">&#39;transfusion.data&#39;</span><span class="p">,</span>
<span class="s1">&#39;wine-quality&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;winequality-red.csv&#39;</span><span class="p">,</span> <span class="s1">&#39;winequality-white.csv&#39;</span><span class="p">],</span>
<span class="s1">&#39;breast-cancer-wisconsin&#39;</span><span class="p">:</span> <span class="s1">&#39;breast-cancer-wisconsin.data&#39;</span> <span class="k">if</span> <span class="n">dataset_name</span><span class="o">==</span><span class="s1">&#39;breast-cancer&#39;</span> <span class="k">else</span> <span class="s1">&#39;wdbc.data&#39;</span>
<span class="p">}</span>
<span class="c1"># the filename containing the dataset description (if any)</span>
<span class="n">desc_name</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;acute&#39;</span><span class="p">:</span> <span class="s1">&#39;diagnosis.names&#39;</span><span class="p">,</span>
<span class="s1">&#39;00193&#39;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="s1">&#39;statlog/german&#39;</span><span class="p">:</span> <span class="s1">&#39;german.doc&#39;</span><span class="p">,</span>
<span class="s1">&#39;mammographic-masses&#39;</span><span class="p">:</span> <span class="s1">&#39;mammographic_masses.names&#39;</span><span class="p">,</span>
<span class="s1">&#39;undocumented/connectionist-bench/sonar&#39;</span><span class="p">:</span> <span class="s1">&#39;sonar.names&#39;</span><span class="p">,</span>
<span class="s1">&#39;spect&#39;</span><span class="p">:</span> <span class="s1">&#39;SPECTF.names&#39;</span><span class="p">,</span>
<span class="s1">&#39;blood-transfusion&#39;</span><span class="p">:</span> <span class="s1">&#39;transfusion.names&#39;</span><span class="p">,</span>
<span class="s1">&#39;wine-quality&#39;</span><span class="p">:</span> <span class="s1">&#39;winequality.names&#39;</span><span class="p">,</span>
<span class="s1">&#39;breast-cancer-wisconsin&#39;</span><span class="p">:</span> <span class="s1">&#39;breast-cancer-wisconsin.names&#39;</span> <span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;breast-cancer&#39;</span> <span class="k">else</span> <span class="s1">&#39;wdbc.names&#39;</span>
<span class="p">}</span>
<span class="n">identifier</span> <span class="o">=</span> <span class="n">identifier_map</span><span class="p">[</span><span class="n">dataset_name</span><span class="p">]</span>
<span class="n">filename</span> <span class="o">=</span> <span class="n">file_name</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">identifier</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">identifier</span><span class="si">}</span><span class="s1">.data&#39;</span><span class="p">)</span>
<span class="n">descfile</span> <span class="o">=</span> <span class="n">desc_name</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">identifier</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">identifier</span><span class="si">}</span><span class="s1">.names&#39;</span><span class="p">)</span>
<span class="n">fullname</span> <span class="o">=</span> <span class="n">dataset_fullname</span><span class="p">[</span><span class="n">dataset_name</span><span class="p">]</span>
<span class="n">URL</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39;http://archive.ics.uci.edu/ml/machine-learning-databases/</span><span class="si">{</span><span class="n">identifier</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="n">data_dir</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">&#39;uci_datasets&#39;</span><span class="p">,</span> <span class="n">identifier</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> <span class="c1"># filename could be a list of files, in which case it will be processed later</span>
<span class="n">data_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_dir</span><span class="p">,</span> <span class="n">filename</span><span class="p">)</span>
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">URL</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">filename</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">,</span> <span class="n">data_path</span><span class="p">)</span>
<span class="k">if</span> <span class="n">descfile</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">URL</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">descfile</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">data_dir</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">descfile</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">data_dir</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">descfile</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">,</span> <span class="s1">&#39;rt&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
<span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;could not read the description file&#39;</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">verbose</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;no file description available&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Loading </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> (</span><span class="si">{</span><span class="n">fullname</span><span class="si">}</span><span class="s1">)&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;acute&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">&#39;utf-16&#39;</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">&#39;</span><span class="se">\t</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">float</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;,&#39;</span><span class="p">,</span> <span class="s1">&#39;.&#39;</span><span class="p">)))</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="p">[</span><span class="n">_df_replace</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">6</span><span class="p">)]</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">5</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;acute.a&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="mi">6</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">&#39;yes&#39;</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;acute.b&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="mi">7</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">&#39;yes&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;balance-scale&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">&#39;,&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;balance.1&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">&#39;L&#39;</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;balance.2&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">&#39;B&#39;</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;balance.3&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">&#39;R&#39;</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;breast-cancer-wisconsin&#39;</span> <span class="ow">and</span> <span class="n">dataset_name</span><span class="o">==</span><span class="s1">&#39;breast-cancer&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">&#39;,&#39;</span><span class="p">)</span>
<span class="n">Xy</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:</span><span class="mi">10</span><span class="p">]</span>
<span class="n">Xy</span><span class="p">[</span><span class="n">Xy</span><span class="o">==</span><span class="s1">&#39;?&#39;</span><span class="p">]</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="n">Xy</span> <span class="o">=</span> <span class="n">Xy</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">Xy</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:</span><span class="mi">9</span><span class="p">]</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">Xy</span><span class="p">[</span><span class="mi">10</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;breast-cancer-wisconsin&#39;</span> <span class="ow">and</span> <span class="n">dataset_name</span><span class="o">==</span><span class="s1">&#39;wdbc&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">&#39;,&#39;</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">2</span><span class="p">:</span><span class="mi">32</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">&#39;M&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;cmc&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">&#39;,&#39;</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">8</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;cmc.1&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;cmc.2&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;cmc.3&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;00193&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_excel</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Data&#39;</span><span class="p">,</span> <span class="n">skipfooter</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">24</span><span class="p">))]</span> <span class="c1"># select columns numbered (number 23 is the target label)</span>
<span class="c1"># replaces the header with the first row</span>
<span class="n">new_header</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="c1"># grab the first row for the header</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span> <span class="c1"># take the data less the header row</span>
<span class="n">df</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="n">new_header</span> <span class="c1"># set the header row as the df header</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">22</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="s1">&#39;NSP&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;ctg.1&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span> <span class="c1"># 1==Normal</span>
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;ctg.2&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span> <span class="c1"># 2==Suspect</span>
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;ctg.3&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span> <span class="c1"># 3==Pathologic</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;statlog/german&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">delim_whitespace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">24</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">24</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;haberman&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">3</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;ionosphere&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">34</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">34</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">&#39;b&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;iris&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">4</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">4</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;iris.1&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">&#39;Iris-setosa&#39;</span><span class="p">)</span> <span class="c1"># 1==Setosa</span>
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;iris.2&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">&#39;Iris-versicolor&#39;</span><span class="p">)</span> <span class="c1"># 2==Versicolor</span>
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;iris.3&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">&#39;Iris-virginica&#39;</span><span class="p">)</span> <span class="c1"># 3==Virginica</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;mammographic-masses&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">&#39;,&#39;</span><span class="p">)</span>
<span class="n">df</span><span class="p">[</span><span class="n">df</span> <span class="o">==</span> <span class="s1">&#39;?&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="n">Xy</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">Xy</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">5</span><span class="p">]</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">Xy</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span><span class="mi">5</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;page-blocks&#39;</span><span class="p">:</span>
<span class="n">data_path_</span> <span class="o">=</span> <span class="n">data_path</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;.Z&#39;</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">data_path_</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Warning: file </span><span class="si">{</span><span class="n">data_path_</span><span class="si">}</span><span class="s1"> does not exist. If this is the first time you &#39;</span>
<span class="sa">f</span><span class="s1">&#39;attempt to load this dataset, then you have to manually unzip the </span><span class="si">{</span><span class="n">data_path</span><span class="si">}</span><span class="s1"> &#39;</span>
<span class="sa">f</span><span class="s1">&#39;and name the extracted file </span><span class="si">{</span><span class="n">data_path_</span><span class="si">}</span><span class="s1"> (unfortunately, neither zipfile, nor &#39;</span>
<span class="sa">f</span><span class="s1">&#39;gzip can handle unix compressed files automatically -- there is a repo in GitHub &#39;</span>
<span class="sa">f</span><span class="s1">&#39;https://github.com/umeat/unlzw where the problem seems to be solved anyway).&#39;</span><span class="p">)</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path_</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">delim_whitespace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">10</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">10</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span> <span class="c1"># 5==block &quot;graphic&quot;</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;semeion&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">delim_whitespace</span><span class="o">=</span><span class="kc">True</span> <span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">256</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">263</span><span class="p">]</span><span class="o">.</span><span class="n">values</span> <span class="c1"># 263 stands for digit 8 (labels are one-hot vectors from col 256-266)</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;undocumented/connectionist-bench/sonar&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">&#39;,&#39;</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">60</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">60</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">&#39;R&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;spambase&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">&#39;,&#39;</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">57</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">57</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;spect&#39;</span><span class="p">:</span>
<span class="n">dfs</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">file</span> <span class="ow">in</span> <span class="n">filename</span><span class="p">:</span>
<span class="n">data_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_dir</span><span class="p">,</span> <span class="n">file</span><span class="p">)</span>
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">URL</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">file</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">,</span> <span class="n">data_path</span><span class="p">)</span>
<span class="n">dfs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">&#39;,&#39;</span><span class="p">))</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">concat</span><span class="p">(</span><span class="n">dfs</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:</span><span class="mi">45</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;tic-tac-toe&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">&#39;,&#39;</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;o&#39;</span><span class="p">,</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;b&#39;</span><span class="p">,</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;x&#39;</span><span class="p">,</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">&#39;negative&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;blood-transfusion&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">&#39;,&#39;</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">4</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">4</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;wine&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">&#39;,&#39;</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:</span><span class="mi">14</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;wine.1&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;wine.2&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;wine.3&#39;</span><span class="p">:</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;wine-quality&#39;</span><span class="p">:</span>
<span class="n">filename</span> <span class="o">=</span> <span class="n">filename</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">if</span> <span class="n">dataset_name</span><span class="o">==</span><span class="s1">&#39;wine-q-red&#39;</span> <span class="k">else</span> <span class="n">filename</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="n">data_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_dir</span><span class="p">,</span> <span class="n">filename</span><span class="p">)</span>
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">URL</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">filename</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">,</span> <span class="n">data_path</span><span class="p">)</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">&#39;;&#39;</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">11</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">11</span><span class="p">]</span><span class="o">.</span><span class="n">values</span> <span class="o">&gt;</span> <span class="mi">5</span>
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">&#39;yeast&#39;</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">delim_whitespace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:</span><span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">&#39;NUC&#39;</span><span class="p">)</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
<span class="n">data</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
<span class="k">return</span> <span class="n">data</span></div>
<div class="viewcode-block" id="fetch_UCIMulticlassDataset">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_UCIMulticlassDataset">[docs]</a>
<span class="k">def</span> <span class="nf">fetch_UCIMulticlassDataset</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">test_split</span><span class="o">=</span><span class="mf">0.3</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dataset</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads a UCI multiclass dataset as an instance of :class:`quapy.data.base.Dataset`. </span>
<span class="sd"> The list of available datasets is taken from https://archive.ics.uci.edu/, following these criteria:</span>
<span class="sd"> - It has more than 1000 instances</span>
<span class="sd"> - It is suited for classification</span>
<span class="sd"> - It has more than two classes</span>
<span class="sd"> - It is available for Python import (requires ucimlrepo package)</span>
<span class="sd"> &gt;&gt;&gt; import quapy as qp</span>
<span class="sd"> &gt;&gt;&gt; dataset = qp.datasets.fetch_UCIMulticlassDataset(&quot;dry-bean&quot;)</span>
<span class="sd"> &gt;&gt;&gt; train, test = dataset.train_test</span>
<span class="sd"> &gt;&gt;&gt; ...</span>
<span class="sd"> The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_MULTICLASS_DATASETS`</span>
<span class="sd"> The datasets are downloaded only once and pickled into disk, saving time for consecutive calls.</span>
<span class="sd"> :param dataset_name: a dataset name</span>
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
<span class="sd"> ~/quay_data/ directory)</span>
<span class="sd"> :param test_split: proportion of documents to be included in the test set. The rest conforms the training set</span>
<span class="sd"> :param verbose: set to True (default is False) to get information (stats) about the dataset</span>
<span class="sd"> :return: a :class:`quapy.data.base.Dataset` instance</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">fetch_UCIMulticlassLabelledCollection</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="p">,</span> <span class="n">verbose</span><span class="p">)</span>
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="o">*</span><span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">test_split</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span></div>
<div class="viewcode-block" id="fetch_UCIMulticlassLabelledCollection">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_UCIMulticlassLabelledCollection">[docs]</a>
<span class="k">def</span> <span class="nf">fetch_UCIMulticlassLabelledCollection</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">LabelledCollection</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads a UCI multiclass collection as an instance of :class:`quapy.data.base.LabelledCollection`.</span>
<span class="sd"> The list of available datasets is taken from https://archive.ics.uci.edu/, following these criteria:</span>
<span class="sd"> - It has more than 1000 instances</span>
<span class="sd"> - It is suited for classification</span>
<span class="sd"> - It has more than two classes</span>
<span class="sd"> - It is available for Python import (requires ucimlrepo package)</span>
<span class="sd"> </span>
<span class="sd"> &gt;&gt;&gt; import quapy as qp</span>
<span class="sd"> &gt;&gt;&gt; collection = qp.datasets.fetch_UCIMulticlassLabelledCollection(&quot;dry-bean&quot;)</span>
<span class="sd"> &gt;&gt;&gt; X, y = collection.Xy</span>
<span class="sd"> &gt;&gt;&gt; ...</span>
<span class="sd"> The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_MULTICLASS_DATASETS`</span>
<span class="sd"> The datasets are downloaded only once and pickled into disk, saving time for consecutive calls.</span>
<span class="sd"> :param dataset_name: a dataset name</span>
<span class="sd"> :param data_home: specify the quapy home directory where the dataset will be dumped (leave empty to use the default</span>
<span class="sd"> ~/quay_data/ directory)</span>
<span class="sd"> :param test_split: proportion of documents to be included in the test set. The rest conforms the training set</span>
<span class="sd"> :param verbose: set to True (default is False) to get information (stats) about the dataset</span>
<span class="sd"> :return: a :class:`quapy.data.base.LabelledCollection` instance</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">dataset_name</span> <span class="ow">in</span> <span class="n">UCI_MULTICLASS_DATASETS</span><span class="p">,</span> \
<span class="sa">f</span><span class="s1">&#39;Name </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> does not match any known dataset from the &#39;</span> \
<span class="sa">f</span><span class="s1">&#39;UCI Machine Learning datasets repository (multiclass). &#39;</span> \
<span class="sa">f</span><span class="s1">&#39;Valid ones are </span><span class="si">{</span><span class="n">UCI_MULTICLASS_DATASETS</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
<span class="n">identifiers</span> <span class="o">=</span> <span class="p">{</span>
<span class="s2">&quot;dry-bean&quot;</span><span class="p">:</span> <span class="mi">602</span><span class="p">,</span>
<span class="s2">&quot;wine-quality&quot;</span><span class="p">:</span> <span class="mi">186</span><span class="p">,</span>
<span class="s2">&quot;academic-success&quot;</span><span class="p">:</span> <span class="mi">697</span><span class="p">,</span>
<span class="s2">&quot;digits&quot;</span><span class="p">:</span> <span class="mi">80</span><span class="p">,</span>
<span class="s2">&quot;letter&quot;</span><span class="p">:</span> <span class="mi">59</span>
<span class="p">}</span>
<span class="n">full_names</span> <span class="o">=</span> <span class="p">{</span>
<span class="s2">&quot;dry-bean&quot;</span><span class="p">:</span> <span class="s2">&quot;Dry Bean Dataset&quot;</span><span class="p">,</span>
<span class="s2">&quot;wine-quality&quot;</span><span class="p">:</span> <span class="s2">&quot;Wine Quality&quot;</span><span class="p">,</span>
<span class="s2">&quot;academic-success&quot;</span><span class="p">:</span> <span class="s2">&quot;Predict students&#39; dropout and academic success&quot;</span><span class="p">,</span>
<span class="s2">&quot;digits&quot;</span><span class="p">:</span> <span class="s2">&quot;Optical Recognition of Handwritten Digits&quot;</span><span class="p">,</span>
<span class="s2">&quot;letter&quot;</span><span class="p">:</span> <span class="s2">&quot;Letter Recognition&quot;</span>
<span class="p">}</span>
<span class="n">identifier</span> <span class="o">=</span> <span class="n">identifiers</span><span class="p">[</span><span class="n">dataset_name</span><span class="p">]</span>
<span class="n">fullname</span> <span class="o">=</span> <span class="n">full_names</span><span class="p">[</span><span class="n">dataset_name</span><span class="p">]</span>
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Loading UCI Muticlass </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> (</span><span class="si">{</span><span class="n">fullname</span><span class="si">}</span><span class="s1">)&#39;</span><span class="p">)</span>
<span class="n">file</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">&#39;uci_multiclass&#39;</span><span class="p">,</span> <span class="n">dataset_name</span><span class="o">+</span><span class="s1">&#39;.pkl&#39;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">download</span><span class="p">(</span><span class="nb">id</span><span class="p">):</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">fetch_ucirepo</span><span class="p">(</span><span class="nb">id</span><span class="o">=</span><span class="nb">id</span><span class="p">)</span>
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="s1">&#39;data&#39;</span><span class="p">][</span><span class="s1">&#39;features&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">(),</span> <span class="n">data</span><span class="p">[</span><span class="s1">&#39;data&#39;</span><span class="p">][</span><span class="s1">&#39;targets&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span><span class="o">.</span><span class="n">squeeze</span><span class="p">()</span>
<span class="n">classes</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">))</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">searchsorted</span><span class="p">(</span><span class="n">classes</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">pickled_resource</span><span class="p">(</span><span class="n">file</span><span class="p">,</span> <span class="n">download</span><span class="p">,</span> <span class="n">identifier</span><span class="p">)</span>
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
<span class="n">data</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
<span class="k">return</span> <span class="n">data</span></div>
<span class="k">def</span> <span class="nf">_df_replace</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">col</span><span class="p">,</span> <span class="n">repl</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;yes&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;no&#39;</span><span class="p">:</span><span class="mi">0</span><span class="p">},</span> <span class="n">astype</span><span class="o">=</span><span class="nb">float</span><span class="p">):</span>
<span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span><span class="n">repl</span><span class="p">[</span><span class="n">x</span><span class="p">])</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">astype</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<div class="viewcode-block" id="fetch_lequa2022">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_lequa2022">[docs]</a>
<span class="k">def</span> <span class="nf">fetch_lequa2022</span><span class="p">(</span><span class="n">task</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads the official datasets provided for the `LeQua &lt;https://lequa2022.github.io/index&gt;`_ competition.</span>
<span class="sd"> In brief, there are 4 tasks (T1A, T1B, T2A, T2B) having to do with text quantification</span>
<span class="sd"> problems. Tasks T1A and T1B provide documents in vector form, while T2A and T2B provide raw documents instead.</span>
<span class="sd"> Tasks T1A and T2A are binary sentiment quantification problems, while T2A and T2B are multiclass quantification</span>
<span class="sd"> problems consisting of estimating the class prevalence values of 28 different merchandise products.</span>
<span class="sd"> We refer to the `Esuli, A., Moreo, A., Sebastiani, F., &amp; Sperduti, G. (2022).</span>
<span class="sd"> A Detailed Overview of LeQua@ CLEF 2022: Learning to Quantify.</span>
<span class="sd"> &lt;https://ceur-ws.org/Vol-3180/paper-146.pdf&gt;`_ for a detailed description</span>
<span class="sd"> on the tasks and datasets.</span>
<span class="sd"> The datasets are downloaded only once, and stored for fast reuse.</span>
<span class="sd"> See `lequa2022_experiments.py` provided in the example folder, that can serve as a guide on how to use these</span>
<span class="sd"> datasets.</span>
<span class="sd"> :param task: a string representing the task name; valid ones are T1A, T1B, T2A, and T2B</span>
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
<span class="sd"> ~/quay_data/ directory)</span>
<span class="sd"> :return: a tuple `(train, val_gen, test_gen)` where `train` is an instance of</span>
<span class="sd"> :class:`quapy.data.base.LabelledCollection`, `val_gen` and `test_gen` are instances of</span>
<span class="sd"> :class:`quapy.data._lequa2022.SamplesFromDir`, a subclass of :class:`quapy.protocol.AbstractProtocol`,</span>
<span class="sd"> that return a series of samples stored in a directory which are labelled by prevalence.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">quapy.data._lequa2022</span> <span class="kn">import</span> <span class="n">load_raw_documents</span><span class="p">,</span> <span class="n">load_vector_documents</span><span class="p">,</span> <span class="n">SamplesFromDir</span>
<span class="k">assert</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">LEQUA2022_TASKS</span><span class="p">,</span> \
<span class="sa">f</span><span class="s1">&#39;Unknown task </span><span class="si">{</span><span class="n">task</span><span class="si">}</span><span class="s1">. Valid ones are </span><span class="si">{</span><span class="n">LEQUA2022_TASKS</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
<span class="n">URL_TRAINDEV</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;https://zenodo.org/record/6546188/files/</span><span class="si">{</span><span class="n">task</span><span class="si">}</span><span class="s1">.train_dev.zip&#39;</span>
<span class="n">URL_TEST</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;https://zenodo.org/record/6546188/files/</span><span class="si">{</span><span class="n">task</span><span class="si">}</span><span class="s1">.test.zip&#39;</span>
<span class="n">URL_TEST_PREV</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;https://zenodo.org/record/6546188/files/</span><span class="si">{</span><span class="n">task</span><span class="si">}</span><span class="s1">.test_prevalences.zip&#39;</span>
<span class="n">lequa_dir</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">&#39;lequa2022&#39;</span><span class="p">)</span>
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">download_unzip_and_remove</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="n">url</span><span class="p">):</span>
<span class="n">tmp_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span> <span class="o">+</span> <span class="s1">&#39;_tmp.zip&#39;</span><span class="p">)</span>
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">tmp_path</span><span class="p">)</span>
<span class="k">with</span> <span class="n">zipfile</span><span class="o">.</span><span class="n">ZipFile</span><span class="p">(</span><span class="n">tmp_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span>
<span class="n">file</span><span class="o">.</span><span class="n">extractall</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">)</span>
<span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">tmp_path</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">)):</span>
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">URL_TRAINDEV</span><span class="p">)</span>
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">URL_TEST</span><span class="p">)</span>
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">URL_TEST_PREV</span><span class="p">)</span>
<span class="k">if</span> <span class="n">task</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;T1A&#39;</span><span class="p">,</span> <span class="s1">&#39;T1B&#39;</span><span class="p">]:</span>
<span class="n">load_fn</span> <span class="o">=</span> <span class="n">load_vector_documents</span>
<span class="k">elif</span> <span class="n">task</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;T2A&#39;</span><span class="p">,</span> <span class="s1">&#39;T2B&#39;</span><span class="p">]:</span>
<span class="n">load_fn</span> <span class="o">=</span> <span class="n">load_raw_documents</span>
<span class="n">tr_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">,</span> <span class="s1">&#39;public&#39;</span><span class="p">,</span> <span class="s1">&#39;training_data.txt&#39;</span><span class="p">)</span>
<span class="n">train</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">tr_path</span><span class="p">,</span> <span class="n">loader_func</span><span class="o">=</span><span class="n">load_fn</span><span class="p">)</span>
<span class="n">val_samples_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">,</span> <span class="s1">&#39;public&#39;</span><span class="p">,</span> <span class="s1">&#39;dev_samples&#39;</span><span class="p">)</span>
<span class="n">val_true_prev_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">,</span> <span class="s1">&#39;public&#39;</span><span class="p">,</span> <span class="s1">&#39;dev_prevalences.txt&#39;</span><span class="p">)</span>
<span class="n">val_gen</span> <span class="o">=</span> <span class="n">SamplesFromDir</span><span class="p">(</span><span class="n">val_samples_path</span><span class="p">,</span> <span class="n">val_true_prev_path</span><span class="p">,</span> <span class="n">load_fn</span><span class="o">=</span><span class="n">load_fn</span><span class="p">)</span>
<span class="n">test_samples_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">,</span> <span class="s1">&#39;public&#39;</span><span class="p">,</span> <span class="s1">&#39;test_samples&#39;</span><span class="p">)</span>
<span class="n">test_true_prev_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">,</span> <span class="s1">&#39;public&#39;</span><span class="p">,</span> <span class="s1">&#39;test_prevalences.txt&#39;</span><span class="p">)</span>
<span class="n">test_gen</span> <span class="o">=</span> <span class="n">SamplesFromDir</span><span class="p">(</span><span class="n">test_samples_path</span><span class="p">,</span> <span class="n">test_true_prev_path</span><span class="p">,</span> <span class="n">load_fn</span><span class="o">=</span><span class="n">load_fn</span><span class="p">)</span>
<span class="k">return</span> <span class="n">train</span><span class="p">,</span> <span class="n">val_gen</span><span class="p">,</span> <span class="n">test_gen</span></div>
<div class="viewcode-block" id="fetch_IFCB">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_IFCB">[docs]</a>
<span class="k">def</span> <span class="nf">fetch_IFCB</span><span class="p">(</span><span class="n">single_sample_train</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">for_model_selection</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads the IFCB dataset for quantification from `Zenodo &lt;https://zenodo.org/records/10036244&gt;`_ (for more</span>
<span class="sd"> information on this dataset, please follow the zenodo link).</span>
<span class="sd"> This dataset is based on the data available publicly at</span>
<span class="sd"> `WHOI-Plankton repo &lt;https://github.com/hsosik/WHOI-Plankton&gt;`_.</span>
<span class="sd"> The scripts for the processing are available at `P. González&#39;s repo &lt;https://github.com/pglez82/IFCB_Zenodo&gt;`_.</span>
<span class="sd"> Basically, this is the IFCB dataset with precomputed features for testing quantification algorithms.</span>
<span class="sd"> The datasets are downloaded only once, and stored for fast reuse.</span>
<span class="sd"> :param single_sample_train: a boolean. If true, it will return the train dataset as a</span>
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (all examples together).</span>
<span class="sd"> If false, a generator of training samples will be returned. Each example in the training set has an individual label.</span>
<span class="sd"> :param for_model_selection: if True, then returns a split 30% of the training set (86 out of 286 samples) to be used for model selection; </span>
<span class="sd"> if False, then returns the full training set as training set and the test set as the test set</span>
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
<span class="sd"> ~/quay_data/ directory)</span>
<span class="sd"> :return: a tuple `(train, test_gen)` where `train` is an instance of</span>
<span class="sd"> :class:`quapy.data.base.LabelledCollection`, if `single_sample_train` is true or</span>
<span class="sd"> :class:`quapy.data._ifcb.IFCBTrainSamplesFromDir`, i.e. a sampling protocol that returns a series of samples</span>
<span class="sd"> labelled example by example. test_gen will be a :class:`quapy.data._ifcb.IFCBTestSamples`, </span>
<span class="sd"> i.e., a sampling protocol that returns a series of samples labelled by prevalence.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">quapy.data._ifcb</span> <span class="kn">import</span> <span class="n">IFCBTrainSamplesFromDir</span><span class="p">,</span> <span class="n">IFCBTestSamples</span><span class="p">,</span> <span class="n">get_sample_list</span><span class="p">,</span> <span class="n">generate_modelselection_split</span>
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
<span class="n">URL_TRAIN</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;https://zenodo.org/records/10036244/files/IFCB.train.zip&#39;</span>
<span class="n">URL_TEST</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;https://zenodo.org/records/10036244/files/IFCB.test.zip&#39;</span>
<span class="n">URL_TEST_PREV</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;https://zenodo.org/records/10036244/files/IFCB.test_prevalences.zip&#39;</span>
<span class="n">ifcb_dir</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">&#39;ifcb&#39;</span><span class="p">)</span>
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">download_unzip_and_remove</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="n">url</span><span class="p">):</span>
<span class="n">tmp_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="s1">&#39;ifcb_tmp.zip&#39;</span><span class="p">)</span>
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">tmp_path</span><span class="p">)</span>
<span class="k">with</span> <span class="n">zipfile</span><span class="o">.</span><span class="n">ZipFile</span><span class="p">(</span><span class="n">tmp_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span>
<span class="n">file</span><span class="o">.</span><span class="n">extractall</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">)</span>
<span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">tmp_path</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span><span class="s1">&#39;train&#39;</span><span class="p">)):</span>
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="n">URL_TRAIN</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span><span class="s1">&#39;test&#39;</span><span class="p">)):</span>
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="n">URL_TEST</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span><span class="s1">&#39;test_prevalences.csv&#39;</span><span class="p">)):</span>
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="n">URL_TEST_PREV</span><span class="p">)</span>
<span class="c1"># Load test prevalences and classes</span>
<span class="n">test_true_prev_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="s1">&#39;test_prevalences.csv&#39;</span><span class="p">)</span>
<span class="n">test_true_prev</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">test_true_prev_path</span><span class="p">)</span>
<span class="n">classes</span> <span class="o">=</span> <span class="n">test_true_prev</span><span class="o">.</span><span class="n">columns</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span>
<span class="c1">#Load train and test samples</span>
<span class="n">train_samples_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span><span class="s1">&#39;train&#39;</span><span class="p">)</span>
<span class="n">test_samples_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span><span class="s1">&#39;test&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">for_model_selection</span><span class="p">:</span>
<span class="c1"># In this case, return 70% of training data as the training set and 30% as the test set</span>
<span class="n">samples</span> <span class="o">=</span> <span class="n">get_sample_list</span><span class="p">(</span><span class="n">train_samples_path</span><span class="p">)</span>
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">generate_modelselection_split</span><span class="p">(</span><span class="n">samples</span><span class="p">,</span> <span class="n">split</span><span class="o">=</span><span class="mf">0.3</span><span class="p">)</span>
<span class="n">train_gen</span> <span class="o">=</span> <span class="n">IFCBTrainSamplesFromDir</span><span class="p">(</span><span class="n">path_dir</span><span class="o">=</span><span class="n">train_samples_path</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">classes</span><span class="p">,</span> <span class="n">samples</span><span class="o">=</span><span class="n">train</span><span class="p">)</span>
<span class="c1"># Test prevalence is computed from class labels</span>
<span class="n">test_gen</span> <span class="o">=</span> <span class="n">IFCBTestSamples</span><span class="p">(</span><span class="n">path_dir</span><span class="o">=</span><span class="n">train_samples_path</span><span class="p">,</span> <span class="n">test_prevalences</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">samples</span><span class="o">=</span><span class="n">test</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">classes</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># In this case, we use all training samples as the training set and the test samples as the test set</span>
<span class="n">train_gen</span> <span class="o">=</span> <span class="n">IFCBTrainSamplesFromDir</span><span class="p">(</span><span class="n">path_dir</span><span class="o">=</span><span class="n">train_samples_path</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">classes</span><span class="p">)</span>
<span class="n">test_gen</span> <span class="o">=</span> <span class="n">IFCBTestSamples</span><span class="p">(</span><span class="n">path_dir</span><span class="o">=</span><span class="n">test_samples_path</span><span class="p">,</span> <span class="n">test_prevalences</span><span class="o">=</span><span class="n">test_true_prev</span><span class="p">)</span>
<span class="c1"># In the case the user wants it, join all the train samples in one LabelledCollection</span>
<span class="k">if</span> <span class="n">single_sample_train</span><span class="p">:</span>
<span class="n">train</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">lc</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">train_gen</span><span class="p">()])</span>
<span class="k">return</span> <span class="n">train</span><span class="p">,</span> <span class="n">test_gen</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">train_gen</span><span class="p">,</span> <span class="n">test_gen</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,373 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.data.preprocessing &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.data.preprocessing</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.data.preprocessing</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">scipy.sparse</span> <span class="kn">import</span> <span class="n">spmatrix</span>
<span class="kn">from</span> <span class="nn">sklearn.feature_extraction.text</span> <span class="kn">import</span> <span class="n">TfidfVectorizer</span><span class="p">,</span> <span class="n">CountVectorizer</span>
<span class="kn">from</span> <span class="nn">sklearn.preprocessing</span> <span class="kn">import</span> <span class="n">StandardScaler</span>
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="kn">from</span> <span class="nn">quapy.data.base</span> <span class="kn">import</span> <span class="n">Dataset</span>
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">map_parallel</span>
<span class="kn">from</span> <span class="nn">.base</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
<div class="viewcode-block" id="text2tfidf">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.text2tfidf">[docs]</a>
<span class="k">def</span> <span class="nf">text2tfidf</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span><span class="n">Dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">sublinear_tf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Transforms a :class:`quapy.data.base.Dataset` of textual instances into a :class:`quapy.data.base.Dataset` of</span>
<span class="sd"> tfidf weighted sparse vectors</span>
<span class="sd"> :param dataset: a :class:`quapy.data.base.Dataset` where the instances of training and test collections are</span>
<span class="sd"> lists of str</span>
<span class="sd"> :param min_df: minimum number of occurrences for a word to be considered as part of the vocabulary (default 3)</span>
<span class="sd"> :param sublinear_tf: whether or not to apply the log scalling to the tf counters (default True)</span>
<span class="sd"> :param inplace: whether or not to apply the transformation inplace (True), or to a new copy (False, default)</span>
<span class="sd"> :param kwargs: the rest of parameters of the transformation (as for sklearn&#39;s</span>
<span class="sd"> `TfidfVectorizer &lt;https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html&gt;`_)</span>
<span class="sd"> :return: a new :class:`quapy.data.base.Dataset` in `csr_matrix` format (if inplace=False) or a reference to the</span>
<span class="sd"> current Dataset (if inplace=True) where the instances are stored in a `csr_matrix` of real-valued tfidf scores</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
<span class="n">vectorizer</span> <span class="o">=</span> <span class="n">TfidfVectorizer</span><span class="p">(</span><span class="n">min_df</span><span class="o">=</span><span class="n">min_df</span><span class="p">,</span> <span class="n">sublinear_tf</span><span class="o">=</span><span class="n">sublinear_tf</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
<span class="n">training_documents</span> <span class="o">=</span> <span class="n">vectorizer</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="n">test_documents</span> <span class="o">=</span> <span class="n">vectorizer</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">training_documents</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">test_documents</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">vocabulary</span> <span class="o">=</span> <span class="n">vectorizer</span><span class="o">.</span><span class="n">vocabulary_</span>
<span class="k">return</span> <span class="n">dataset</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">training_documents</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">test_documents</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">test</span><span class="p">,</span> <span class="n">vectorizer</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">)</span></div>
<div class="viewcode-block" id="reduce_columns">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.reduce_columns">[docs]</a>
<span class="k">def</span> <span class="nf">reduce_columns</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Reduces the dimensionality of the instances, represented as a `csr_matrix` (or any subtype of</span>
<span class="sd"> `scipy.sparse.spmatrix`), of training and test documents by removing the columns of words which are not present</span>
<span class="sd"> in at least `min_df` instances in the training set</span>
<span class="sd"> :param dataset: a :class:`quapy.data.base.Dataset` in which instances are represented in sparse format (any</span>
<span class="sd"> subtype of scipy.sparse.spmatrix)</span>
<span class="sd"> :param min_df: integer, minimum number of instances below which the columns are removed</span>
<span class="sd"> :param inplace: whether or not to apply the transformation inplace (True), or to a new copy (False, default)</span>
<span class="sd"> :return: a new :class:`quapy.data.base.Dataset` (if inplace=False) or a reference to the current</span>
<span class="sd"> :class:`quapy.data.base.Dataset` (inplace=True) where the dimensions corresponding to infrequent terms</span>
<span class="sd"> in the training set have been removed</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">spmatrix</span><span class="p">)</span>
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">spmatrix</span><span class="p">)</span>
<span class="k">assert</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="s1">&#39;unaligned vector spaces&#39;</span>
<span class="k">def</span> <span class="nf">filter_by_occurrences</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">W</span><span class="p">):</span>
<span class="n">column_prevalence</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">((</span><span class="n">X</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
<span class="n">take_columns</span> <span class="o">=</span> <span class="n">column_prevalence</span> <span class="o">&gt;=</span> <span class="n">min_df</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">X</span><span class="p">[:,</span> <span class="n">take_columns</span><span class="p">]</span>
<span class="n">W</span> <span class="o">=</span> <span class="n">W</span><span class="p">[:,</span> <span class="n">take_columns</span><span class="p">]</span>
<span class="k">return</span> <span class="n">X</span><span class="p">,</span> <span class="n">W</span>
<span class="n">Xtr</span><span class="p">,</span> <span class="n">Xte</span> <span class="o">=</span> <span class="n">filter_by_occurrences</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">Xtr</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">Xte</span>
<span class="k">return</span> <span class="n">dataset</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">Xtr</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">Xte</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span></div>
<div class="viewcode-block" id="standardize">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.standardize">[docs]</a>
<span class="k">def</span> <span class="nf">standardize</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Standardizes the real-valued columns of a :class:`quapy.data.base.Dataset`.</span>
<span class="sd"> Standardization, aka z-scoring, of a variable `X` comes down to subtracting the average and normalizing by the</span>
<span class="sd"> standard deviation.</span>
<span class="sd"> :param dataset: a :class:`quapy.data.base.Dataset` object</span>
<span class="sd"> :param inplace: set to True if the transformation is to be applied inplace, or to False (default) if a new</span>
<span class="sd"> :class:`quapy.data.base.Dataset` is to be returned</span>
<span class="sd"> :return: an instance of :class:`quapy.data.base.Dataset`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">s</span> <span class="o">=</span> <span class="n">StandardScaler</span><span class="p">(</span><span class="n">copy</span><span class="o">=</span><span class="ow">not</span> <span class="n">inplace</span><span class="p">)</span>
<span class="n">training</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="n">test</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="k">return</span> <span class="n">dataset</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">test</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">vocabulary</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="index">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.index">[docs]</a>
<span class="k">def</span> <span class="nf">index</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Indexes the tokens of a textual :class:`quapy.data.base.Dataset` of string documents.</span>
<span class="sd"> To index a document means to replace each different token by a unique numerical index.</span>
<span class="sd"> Rare words (i.e., words occurring less than `min_df` times) are replaced by a special token `UNK`</span>
<span class="sd"> :param dataset: a :class:`quapy.data.base.Dataset` object where the instances of training and test documents</span>
<span class="sd"> are lists of str</span>
<span class="sd"> :param min_df: minimum number of occurrences below which the term is replaced by a `UNK` index</span>
<span class="sd"> :param inplace: whether or not to apply the transformation inplace (True), or to a new copy (False, default)</span>
<span class="sd"> :param kwargs: the rest of parameters of the transformation (as for sklearn&#39;s</span>
<span class="sd"> `CountVectorizer &lt;https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html&gt;_`)</span>
<span class="sd"> :return: a new :class:`quapy.data.base.Dataset` (if inplace=False) or a reference to the current</span>
<span class="sd"> :class:`quapy.data.base.Dataset` (inplace=True) consisting of lists of integer values representing indices.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
<span class="n">indexer</span> <span class="o">=</span> <span class="n">IndexTransformer</span><span class="p">(</span><span class="n">min_df</span><span class="o">=</span><span class="n">min_df</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
<span class="n">training_index</span> <span class="o">=</span> <span class="n">indexer</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="n">test_index</span> <span class="o">=</span> <span class="n">indexer</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="n">training_index</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">training_index</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
<span class="n">test_index</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">test_index</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">training_index</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">test_index</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">vocabulary</span> <span class="o">=</span> <span class="n">indexer</span><span class="o">.</span><span class="n">vocabulary_</span>
<span class="k">return</span> <span class="n">dataset</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">training_index</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">test_index</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">test</span><span class="p">,</span> <span class="n">indexer</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">__check_type</span><span class="p">(</span><span class="n">container</span><span class="p">,</span> <span class="n">container_type</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">element_type</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="k">if</span> <span class="n">container_type</span><span class="p">:</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">container</span><span class="p">,</span> <span class="n">container_type</span><span class="p">),</span> \
<span class="sa">f</span><span class="s1">&#39;unexpected type of container (expected </span><span class="si">{</span><span class="n">container_type</span><span class="si">}</span><span class="s1">, found </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">container</span><span class="p">)</span><span class="si">}</span><span class="s1">)&#39;</span>
<span class="k">if</span> <span class="n">element_type</span><span class="p">:</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">container</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">element_type</span><span class="p">),</span> \
<span class="sa">f</span><span class="s1">&#39;unexpected type of element (expected </span><span class="si">{</span><span class="n">container_type</span><span class="si">}</span><span class="s1">, found </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">container</span><span class="p">)</span><span class="si">}</span><span class="s1">)&#39;</span>
<div class="viewcode-block" id="IndexTransformer">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer">[docs]</a>
<span class="k">class</span> <span class="nc">IndexTransformer</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> This class implements a sklearn&#39;s-style transformer that indexes text as numerical ids for the tokens it</span>
<span class="sd"> contains, and that would be generated by sklearn&#39;s</span>
<span class="sd"> `CountVectorizer &lt;https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html&gt;`_</span>
<span class="sd"> :param kwargs: keyworded arguments from</span>
<span class="sd"> `CountVectorizer &lt;https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html&gt;`_</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">vect</span> <span class="o">=</span> <span class="n">CountVectorizer</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unk</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span> <span class="c1"># a valid index is assigned after fit</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pad</span> <span class="o">=</span> <span class="o">-</span><span class="mi">2</span> <span class="c1"># a valid index is assigned after fit</span>
<div class="viewcode-block" id="IndexTransformer.fit">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer.fit">[docs]</a>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Fits the transformer, i.e., decides on the vocabulary, given a list of strings.</span>
<span class="sd"> :param X: a list of strings</span>
<span class="sd"> :return: self</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">vect</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">analyzer</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">vect</span><span class="o">.</span><span class="n">build_analyzer</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">vect</span><span class="o">.</span><span class="n">vocabulary_</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unk</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">add_word</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;UNK_TOKEN&#39;</span><span class="p">],</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;UNK_INDEX&#39;</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pad</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">add_word</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;PAD_TOKEN&#39;</span><span class="p">],</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;PAD_INDEX&#39;</span><span class="p">])</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="IndexTransformer.transform">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer.transform">[docs]</a>
<span class="k">def</span> <span class="nf">transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Transforms the strings in `X` as lists of numerical ids</span>
<span class="sd"> :param X: a list of strings</span>
<span class="sd"> :param n_jobs: the number of parallel workers to carry out this task</span>
<span class="sd"> :return: a `np.ndarray` of numerical ids</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># given the number of tasks and the number of jobs, generates the slices for the parallel processes</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">unk</span> <span class="o">!=</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;transform called before fit&#39;</span>
<span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
<span class="k">return</span> <span class="n">map_parallel</span><span class="p">(</span><span class="n">func</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_index</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="n">X</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_index</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">documents</span><span class="p">):</span>
<span class="n">vocab</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="k">return</span> <span class="p">[[</span><span class="n">vocab</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">word</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">unk</span><span class="p">)</span> <span class="k">for</span> <span class="n">word</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">analyzer</span><span class="p">(</span><span class="n">doc</span><span class="p">)]</span> <span class="k">for</span> <span class="n">doc</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="n">documents</span><span class="p">,</span> <span class="s1">&#39;indexing&#39;</span><span class="p">)]</span>
<div class="viewcode-block" id="IndexTransformer.fit_transform">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer.fit_transform">[docs]</a>
<span class="k">def</span> <span class="nf">fit_transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Fits the transform on `X` and transforms it.</span>
<span class="sd"> :param X: a list of strings</span>
<span class="sd"> :param n_jobs: the number of parallel workers to carry out this task</span>
<span class="sd"> :return: a `np.ndarray` of numerical ids</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">)</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">)</span></div>
<div class="viewcode-block" id="IndexTransformer.vocabulary_size">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer.vocabulary_size">[docs]</a>
<span class="k">def</span> <span class="nf">vocabulary_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Gets the length of the vocabulary according to which the document tokens have been indexed</span>
<span class="sd"> :return: integer</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">)</span></div>
<div class="viewcode-block" id="IndexTransformer.add_word">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer.add_word">[docs]</a>
<span class="k">def</span> <span class="nf">add_word</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">word</span><span class="p">,</span> <span class="nb">id</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">nogaps</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Adds a new token (regardless of whether it has been found in the text or not), with dedicated id.</span>
<span class="sd"> Useful to define special tokens for codifying unknown words, or padding tokens.</span>
<span class="sd"> :param word: string, surface form of the token</span>
<span class="sd"> :param id: integer, numerical value to assign to the token (leave as None for indicating the next valid id,</span>
<span class="sd"> default)</span>
<span class="sd"> :param nogaps: if set to True (default) asserts that the id indicated leads to no numerical gaps with</span>
<span class="sd"> precedent ids stored so far</span>
<span class="sd"> :return: integer, the numerical id for the new token</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">word</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;word </span><span class="si">{</span><span class="n">word</span><span class="si">}</span><span class="s1"> already in dictionary&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">id</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># add the word with the next id</span>
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">[</span><span class="n">word</span><span class="p">]</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">id2word</span> <span class="o">=</span> <span class="p">{</span><span class="n">id_</span><span class="p">:</span><span class="n">word_</span> <span class="k">for</span> <span class="n">word_</span><span class="p">,</span> <span class="n">id_</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
<span class="k">if</span> <span class="nb">id</span> <span class="ow">in</span> <span class="n">id2word</span><span class="p">:</span>
<span class="n">old_word</span> <span class="o">=</span> <span class="n">id2word</span><span class="p">[</span><span class="nb">id</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">[</span><span class="n">word</span><span class="p">]</span> <span class="o">=</span> <span class="nb">id</span>
<span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">[</span><span class="n">old_word</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">add_word</span><span class="p">(</span><span class="n">old_word</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">nogaps</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">id</span> <span class="o">&gt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size</span><span class="p">()</span><span class="o">+</span><span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;word </span><span class="si">{</span><span class="n">word</span><span class="si">}</span><span class="s1"> added with id </span><span class="si">{</span><span class="nb">id</span><span class="si">}</span><span class="s1">, while the current vocabulary size &#39;</span>
<span class="sa">f</span><span class="s1">&#39;is of </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size</span><span class="p">()</span><span class="si">}</span><span class="s1">, and id gaps are not allowed&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">[</span><span class="n">word</span><span class="p">]</span></div>
</div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,244 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.data.reader &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.data.reader</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.data.reader</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">scipy.sparse</span> <span class="kn">import</span> <span class="n">dok_matrix</span>
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
<div class="viewcode-block" id="from_text">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.reader.from_text">[docs]</a>
<span class="k">def</span> <span class="nf">from_text</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">&#39;utf-8&#39;</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">class2int</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Reads a labelled colletion of documents.</span>
<span class="sd"> File fomart &lt;0 or 1&gt;\t&lt;document&gt;\n</span>
<span class="sd"> :param path: path to the labelled collection</span>
<span class="sd"> :param encoding: the text encoding used to open the file</span>
<span class="sd"> :param verbose: if &gt;0 (default) shows some progress information in standard output</span>
<span class="sd"> :return: a list of sentences, and a list of labels</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">all_sentences</span><span class="p">,</span> <span class="n">all_labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
<span class="k">if</span> <span class="n">verbose</span><span class="o">&gt;</span><span class="mi">0</span><span class="p">:</span>
<span class="n">file</span> <span class="o">=</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s1">&#39;rt&#39;</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">(),</span> <span class="sa">f</span><span class="s1">&#39;loading </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s1">&#39;rt&#39;</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">()</span>
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">file</span><span class="p">:</span>
<span class="n">line</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
<span class="k">if</span> <span class="n">line</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">label</span><span class="p">,</span> <span class="n">sentence</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;</span><span class="se">\t</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">sentence</span> <span class="o">=</span> <span class="n">sentence</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
<span class="k">if</span> <span class="n">class2int</span><span class="p">:</span>
<span class="n">label</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">label</span><span class="p">)</span>
<span class="k">if</span> <span class="n">sentence</span><span class="p">:</span>
<span class="n">all_sentences</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sentence</span><span class="p">)</span>
<span class="n">all_labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">label</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;format error in </span><span class="si">{</span><span class="n">line</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">all_sentences</span><span class="p">,</span> <span class="n">all_labels</span></div>
<div class="viewcode-block" id="from_sparse">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.reader.from_sparse">[docs]</a>
<span class="k">def</span> <span class="nf">from_sparse</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Reads a labelled collection of real-valued instances expressed in sparse format</span>
<span class="sd"> File format &lt;-1 or 0 or 1&gt;[\s col(int):val(float)]\n</span>
<span class="sd"> :param path: path to the labelled collection</span>
<span class="sd"> :return: a `csr_matrix` containing the instances (rows), and a ndarray containing the labels</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">split_col_val</span><span class="p">(</span><span class="n">col_val</span><span class="p">):</span>
<span class="n">col</span><span class="p">,</span> <span class="n">val</span> <span class="o">=</span> <span class="n">col_val</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;:&#39;</span><span class="p">)</span>
<span class="n">col</span><span class="p">,</span> <span class="n">val</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="nb">float</span><span class="p">(</span><span class="n">val</span><span class="p">)</span>
<span class="k">return</span> <span class="n">col</span><span class="p">,</span> <span class="n">val</span>
<span class="n">all_documents</span><span class="p">,</span> <span class="n">all_labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
<span class="n">max_col</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s1">&#39;rt&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">(),</span> <span class="sa">f</span><span class="s1">&#39;loading </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">):</span>
<span class="n">parts</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">split</span><span class="p">()</span>
<span class="k">if</span> <span class="n">parts</span><span class="p">:</span>
<span class="n">all_labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">parts</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span>
<span class="n">cols</span><span class="p">,</span> <span class="n">vals</span> <span class="o">=</span> <span class="nb">zip</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">split_col_val</span><span class="p">(</span><span class="n">col_val</span><span class="p">)</span> <span class="k">for</span> <span class="n">col_val</span> <span class="ow">in</span> <span class="n">parts</span><span class="p">[</span><span class="mi">1</span><span class="p">:]])</span>
<span class="n">cols</span><span class="p">,</span> <span class="n">vals</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">cols</span><span class="p">),</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">vals</span><span class="p">)</span>
<span class="n">max_col</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="n">max_col</span><span class="p">,</span> <span class="n">cols</span><span class="o">.</span><span class="n">max</span><span class="p">())</span>
<span class="n">all_documents</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">cols</span><span class="p">,</span> <span class="n">vals</span><span class="p">))</span>
<span class="n">n_docs</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">all_labels</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">dok_matrix</span><span class="p">((</span><span class="n">n_docs</span><span class="p">,</span> <span class="n">max_col</span> <span class="o">+</span> <span class="mi">1</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">cols</span><span class="p">,</span> <span class="n">vals</span><span class="p">)</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">enumerate</span><span class="p">(</span><span class="n">all_documents</span><span class="p">),</span> <span class="n">total</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="n">all_documents</span><span class="p">),</span>
<span class="n">desc</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;\-- filling matrix of shape </span><span class="si">{</span><span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">):</span>
<span class="n">X</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">cols</span><span class="p">]</span> <span class="o">=</span> <span class="n">vals</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">tocsr</span><span class="p">()</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">all_labels</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span>
<span class="k">return</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span></div>
<div class="viewcode-block" id="from_csv">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.reader.from_csv">[docs]</a>
<span class="k">def</span> <span class="nf">from_csv</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">&#39;utf-8&#39;</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Reads a csv file in which columns are separated by &#39;,&#39;.</span>
<span class="sd"> File format &lt;label&gt;,&lt;feat1&gt;,&lt;feat2&gt;,...,&lt;featn&gt;\n</span>
<span class="sd"> :param path: path to the csv file</span>
<span class="sd"> :param encoding: the text encoding used to open the file</span>
<span class="sd"> :return: a np.ndarray for the labels and a ndarray (float) for the covariates</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">instance</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s1">&#39;rt&#39;</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">(),</span> <span class="n">desc</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;reading </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">):</span>
<span class="n">yi</span><span class="p">,</span> <span class="o">*</span><span class="n">xi</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;,&#39;</span><span class="p">)</span>
<span class="n">X</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="nb">float</span><span class="p">,</span><span class="n">xi</span><span class="p">)))</span>
<span class="n">y</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">yi</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">y</span><span class="p">)</span>
<span class="k">return</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span></div>
<div class="viewcode-block" id="reindex_labels">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.reader.reindex_labels">[docs]</a>
<span class="k">def</span> <span class="nf">reindex_labels</span><span class="p">(</span><span class="n">y</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Re-indexes a list of labels as a list of indexes, and returns the classnames corresponding to the indexes.</span>
<span class="sd"> E.g.:</span>
<span class="sd"> &gt;&gt;&gt; reindex_labels([&#39;B&#39;, &#39;B&#39;, &#39;A&#39;, &#39;C&#39;])</span>
<span class="sd"> &gt;&gt;&gt; (array([1, 1, 0, 2]), array([&#39;A&#39;, &#39;B&#39;, &#39;C&#39;], dtype=&#39;&lt;U1&#39;))</span>
<span class="sd"> :param y: the list or array of original labels</span>
<span class="sd"> :return: a ndarray (int) of class indexes, and a ndarray of classnames corresponding to the indexes.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">y</span><span class="p">)</span>
<span class="n">classnames</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">)))</span>
<span class="n">label2index</span> <span class="o">=</span> <span class="p">{</span><span class="n">label</span><span class="p">:</span> <span class="n">index</span> <span class="k">for</span> <span class="n">index</span><span class="p">,</span> <span class="n">label</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">classnames</span><span class="p">)}</span>
<span class="n">indexed</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">y</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">)</span>
<span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">classnames</span><span class="p">:</span>
<span class="n">indexed</span><span class="p">[</span><span class="n">y</span><span class="o">==</span><span class="n">label</span><span class="p">]</span> <span class="o">=</span> <span class="n">label2index</span><span class="p">[</span><span class="n">label</span><span class="p">]</span>
<span class="k">return</span> <span class="n">indexed</span><span class="p">,</span> <span class="n">classnames</span></div>
<div class="viewcode-block" id="binarize">
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.reader.binarize">[docs]</a>
<span class="k">def</span> <span class="nf">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Binarizes a categorical array-like collection of labels towards the positive class `pos_class`. E.g.,:</span>
<span class="sd"> &gt;&gt;&gt; binarize([1, 2, 3, 1, 1, 0], pos_class=2)</span>
<span class="sd"> &gt;&gt;&gt; array([0, 1, 0, 0, 0, 0])</span>
<span class="sd"> :param y: array-like of labels</span>
<span class="sd"> :param pos_class: integer, the positive class</span>
<span class="sd"> :return: a binary np.ndarray, in which values 1 corresponds to positions in whcih `y` had `pos_class` labels, and</span>
<span class="sd"> 0 otherwise</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">y</span><span class="p">)</span>
<span class="n">ybin</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">y</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">)</span>
<span class="n">ybin</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="n">pos_class</span><span class="p">]</span> <span class="o">=</span> <span class="mi">1</span>
<span class="k">return</span> <span class="n">ybin</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,486 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.error &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=22607128"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.error</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.error</h1><div class="highlight"><pre>
<span></span><span class="sd">&quot;&quot;&quot;Implementation of error measures used for quantification&quot;&quot;&quot;</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">sklearn.metrics</span> <span class="kn">import</span> <span class="n">f1_score</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<div class="viewcode-block" id="from_name">
<a class="viewcode-back" href="../../quapy.html#quapy.error.from_name">[docs]</a>
<span class="k">def</span> <span class="nf">from_name</span><span class="p">(</span><span class="n">err_name</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Gets an error function from its name. E.g., `from_name(&quot;mae&quot;)`</span>
<span class="sd"> will return function :meth:`quapy.error.mae`</span>
<span class="sd"> :param err_name: string, the error name</span>
<span class="sd"> :return: a callable implementing the requested error</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">err_name</span> <span class="ow">in</span> <span class="n">ERROR_NAMES</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;unknown error </span><span class="si">{</span><span class="n">err_name</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="n">callable_error</span> <span class="o">=</span> <span class="nb">globals</span><span class="p">()[</span><span class="n">err_name</span><span class="p">]</span>
<span class="k">return</span> <span class="n">callable_error</span></div>
<div class="viewcode-block" id="f1e">
<a class="viewcode-back" href="../../quapy.html#quapy.error.f1e">[docs]</a>
<span class="k">def</span> <span class="nf">f1e</span><span class="p">(</span><span class="n">y_true</span><span class="p">,</span> <span class="n">y_pred</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;F1 error: simply computes the error in terms of macro :math:`F_1`, i.e.,</span>
<span class="sd"> :math:`1-F_1^M`, where :math:`F_1` is the harmonic mean of precision and recall,</span>
<span class="sd"> defined as :math:`\\frac{2tp}{2tp+fp+fn}`, with `tp`, `fp`, and `fn` standing</span>
<span class="sd"> for true positives, false positives, and false negatives, respectively.</span>
<span class="sd"> `Macro` averaging means the :math:`F_1` is computed for each category independently,</span>
<span class="sd"> and then averaged.</span>
<span class="sd"> :param y_true: array-like of true labels</span>
<span class="sd"> :param y_pred: array-like of predicted labels</span>
<span class="sd"> :return: :math:`1-F_1^M`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="mf">1.</span> <span class="o">-</span> <span class="n">f1_score</span><span class="p">(</span><span class="n">y_true</span><span class="p">,</span> <span class="n">y_pred</span><span class="p">,</span> <span class="n">average</span><span class="o">=</span><span class="s1">&#39;macro&#39;</span><span class="p">)</span></div>
<div class="viewcode-block" id="acce">
<a class="viewcode-back" href="../../quapy.html#quapy.error.acce">[docs]</a>
<span class="k">def</span> <span class="nf">acce</span><span class="p">(</span><span class="n">y_true</span><span class="p">,</span> <span class="n">y_pred</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Computes the error in terms of 1-accuracy. The accuracy is computed as</span>
<span class="sd"> :math:`\\frac{tp+tn}{tp+fp+fn+tn}`, with `tp`, `fp`, `fn`, and `tn` standing</span>
<span class="sd"> for true positives, false positives, false negatives, and true negatives,</span>
<span class="sd"> respectively</span>
<span class="sd"> :param y_true: array-like of true labels</span>
<span class="sd"> :param y_pred: array-like of predicted labels</span>
<span class="sd"> :return: 1-accuracy</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="mf">1.</span> <span class="o">-</span> <span class="p">(</span><span class="n">y_true</span> <span class="o">==</span> <span class="n">y_pred</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
<div class="viewcode-block" id="mae">
<a class="viewcode-back" href="../../quapy.html#quapy.error.mae">[docs]</a>
<span class="k">def</span> <span class="nf">mae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Computes the mean absolute error (see :meth:`quapy.error.ae`) across the sample pairs.</span>
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values</span>
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
<span class="sd"> prevalence values</span>
<span class="sd"> :return: mean absolute error</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">ae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
<div class="viewcode-block" id="ae">
<a class="viewcode-back" href="../../quapy.html#quapy.error.ae">[docs]</a>
<span class="k">def</span> <span class="nf">ae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Computes the absolute error between the two prevalence vectors.</span>
<span class="sd"> Absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as</span>
<span class="sd"> :math:`AE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}|\\hat{p}(y)-p(y)|`,</span>
<span class="sd"> where :math:`\\mathcal{Y}` are the classes of interest.</span>
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
<span class="sd"> :return: absolute error</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">prevs</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="n">prevs_hat</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;wrong shape </span><span class="si">{</span><span class="n">prevs</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s1"> vs. </span><span class="si">{</span><span class="n">prevs_hat</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="k">return</span> <span class="nb">abs</span><span class="p">(</span><span class="n">prevs_hat</span> <span class="o">-</span> <span class="n">prevs</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></div>
<div class="viewcode-block" id="nae">
<a class="viewcode-back" href="../../quapy.html#quapy.error.nae">[docs]</a>
<span class="k">def</span> <span class="nf">nae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Computes the normalized absolute error between the two prevalence vectors.</span>
<span class="sd"> Normalized absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as</span>
<span class="sd"> :math:`NAE(p,\\hat{p})=\\frac{AE(p,\\hat{p})}{z_{AE}}`,</span>
<span class="sd"> where :math:`z_{AE}=\\frac{2(1-\\min_{y\\in \\mathcal{Y}} p(y))}{|\\mathcal{Y}|}`, and :math:`\\mathcal{Y}`</span>
<span class="sd"> are the classes of interest.</span>
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
<span class="sd"> :return: normalized absolute error</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">prevs</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="n">prevs_hat</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;wrong shape </span><span class="si">{</span><span class="n">prevs</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s1"> vs. </span><span class="si">{</span><span class="n">prevs_hat</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="k">return</span> <span class="nb">abs</span><span class="p">(</span><span class="n">prevs_hat</span> <span class="o">-</span> <span class="n">prevs</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="mi">2</span><span class="o">*</span><span class="p">(</span><span class="mi">1</span><span class="o">-</span><span class="n">prevs</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)))</span></div>
<div class="viewcode-block" id="mnae">
<a class="viewcode-back" href="../../quapy.html#quapy.error.mnae">[docs]</a>
<span class="k">def</span> <span class="nf">mnae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Computes the mean normalized absolute error (see :meth:`quapy.error.nae`) across the sample pairs.</span>
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values</span>
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
<span class="sd"> prevalence values</span>
<span class="sd"> :return: mean normalized absolute error</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">nae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
<div class="viewcode-block" id="mse">
<a class="viewcode-back" href="../../quapy.html#quapy.error.mse">[docs]</a>
<span class="k">def</span> <span class="nf">mse</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Computes the mean squared error (see :meth:`quapy.error.se`) across the sample pairs.</span>
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the</span>
<span class="sd"> true prevalence values</span>
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the</span>
<span class="sd"> predicted prevalence values</span>
<span class="sd"> :return: mean squared error</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">se</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
<div class="viewcode-block" id="se">
<a class="viewcode-back" href="../../quapy.html#quapy.error.se">[docs]</a>
<span class="k">def</span> <span class="nf">se</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Computes the squared error between the two prevalence vectors.</span>
<span class="sd"> Squared error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as</span>
<span class="sd"> :math:`SE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}(\\hat{p}(y)-p(y))^2`,</span>
<span class="sd"> where</span>
<span class="sd"> :math:`\\mathcal{Y}` are the classes of interest.</span>
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
<span class="sd"> :return: absolute error</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">((</span><span class="n">prevs_hat</span> <span class="o">-</span> <span class="n">prevs</span><span class="p">)</span> <span class="o">**</span> <span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></div>
<div class="viewcode-block" id="mkld">
<a class="viewcode-back" href="../../quapy.html#quapy.error.mkld">[docs]</a>
<span class="k">def</span> <span class="nf">mkld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Computes the mean Kullback-Leibler divergence (see :meth:`quapy.error.kld`) across the</span>
<span class="sd"> sample pairs. The distributions are smoothed using the `eps` factor</span>
<span class="sd"> (see :meth:`quapy.error.smooth`).</span>
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true</span>
<span class="sd"> prevalence values</span>
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
<span class="sd"> prevalence values</span>
<span class="sd"> :param eps: smoothing factor. KLD is not defined in cases in which the distributions contain</span>
<span class="sd"> zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.</span>
<span class="sd"> If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`</span>
<span class="sd"> (which has thus to be set beforehand).</span>
<span class="sd"> :return: mean Kullback-Leibler distribution</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">kld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
<div class="viewcode-block" id="kld">
<a class="viewcode-back" href="../../quapy.html#quapy.error.kld">[docs]</a>
<span class="k">def</span> <span class="nf">kld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Computes the Kullback-Leibler divergence between the two prevalence distributions.</span>
<span class="sd"> Kullback-Leibler divergence between two prevalence distributions :math:`p` and :math:`\\hat{p}`</span>
<span class="sd"> is computed as</span>
<span class="sd"> :math:`KLD(p,\\hat{p})=D_{KL}(p||\\hat{p})=</span>
<span class="sd"> \\sum_{y\\in \\mathcal{Y}} p(y)\\log\\frac{p(y)}{\\hat{p}(y)}`,</span>
<span class="sd"> where :math:`\\mathcal{Y}` are the classes of interest.</span>
<span class="sd"> The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).</span>
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
<span class="sd"> :param eps: smoothing factor. KLD is not defined in cases in which the distributions contain</span>
<span class="sd"> zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.</span>
<span class="sd"> If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`</span>
<span class="sd"> (which has thus to be set beforehand).</span>
<span class="sd"> :return: Kullback-Leibler divergence between the two distributions</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">eps</span> <span class="o">=</span> <span class="n">__check_eps</span><span class="p">(</span><span class="n">eps</span><span class="p">)</span>
<span class="n">smooth_prevs</span> <span class="o">=</span> <span class="n">prevs</span> <span class="o">+</span> <span class="n">eps</span>
<span class="n">smooth_prevs_hat</span> <span class="o">=</span> <span class="n">prevs_hat</span> <span class="o">+</span> <span class="n">eps</span>
<span class="k">return</span> <span class="p">(</span><span class="n">smooth_prevs</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">smooth_prevs</span><span class="o">/</span><span class="n">smooth_prevs_hat</span><span class="p">))</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></div>
<div class="viewcode-block" id="mnkld">
<a class="viewcode-back" href="../../quapy.html#quapy.error.mnkld">[docs]</a>
<span class="k">def</span> <span class="nf">mnkld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Computes the mean Normalized Kullback-Leibler divergence (see :meth:`quapy.error.nkld`)</span>
<span class="sd"> across the sample pairs. The distributions are smoothed using the `eps` factor</span>
<span class="sd"> (see :meth:`quapy.error.smooth`).</span>
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values</span>
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
<span class="sd"> prevalence values</span>
<span class="sd"> :param eps: smoothing factor. NKLD is not defined in cases in which the distributions contain</span>
<span class="sd"> zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.</span>
<span class="sd"> If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`</span>
<span class="sd"> (which has thus to be set beforehand).</span>
<span class="sd"> :return: mean Normalized Kullback-Leibler distribution</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">nkld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
<div class="viewcode-block" id="nkld">
<a class="viewcode-back" href="../../quapy.html#quapy.error.nkld">[docs]</a>
<span class="k">def</span> <span class="nf">nkld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.</span>
<span class="sd"> Normalized Kullback-Leibler divergence between two prevalence distributions :math:`p` and</span>
<span class="sd"> :math:`\\hat{p}` is computed as</span>
<span class="sd"> math:`NKLD(p,\\hat{p}) = 2\\frac{e^{KLD(p,\\hat{p})}}{e^{KLD(p,\\hat{p})}+1}-1`,</span>
<span class="sd"> where</span>
<span class="sd"> :math:`\\mathcal{Y}` are the classes of interest.</span>
<span class="sd"> The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).</span>
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
<span class="sd"> :param eps: smoothing factor. NKLD is not defined in cases in which the distributions</span>
<span class="sd"> contain zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample</span>
<span class="sd"> size. If `eps=None`, the sample size will be taken from the environment variable</span>
<span class="sd"> `SAMPLE_SIZE` (which has thus to be set beforehand).</span>
<span class="sd"> :return: Normalized Kullback-Leibler divergence between the two distributions</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">ekld</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="n">kld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">))</span>
<span class="k">return</span> <span class="mf">2.</span> <span class="o">*</span> <span class="n">ekld</span> <span class="o">/</span> <span class="p">(</span><span class="mi">1</span> <span class="o">+</span> <span class="n">ekld</span><span class="p">)</span> <span class="o">-</span> <span class="mf">1.</span></div>
<div class="viewcode-block" id="mrae">
<a class="viewcode-back" href="../../quapy.html#quapy.error.mrae">[docs]</a>
<span class="k">def</span> <span class="nf">mrae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Computes the mean relative absolute error (see :meth:`quapy.error.rae`) across</span>
<span class="sd"> the sample pairs. The distributions are smoothed using the `eps` factor (see</span>
<span class="sd"> :meth:`quapy.error.smooth`).</span>
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true</span>
<span class="sd"> prevalence values</span>
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
<span class="sd"> prevalence values</span>
<span class="sd"> :param eps: smoothing factor. `mrae` is not defined in cases in which the true</span>
<span class="sd"> distribution contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`,</span>
<span class="sd"> with :math:`T` the sample size. If `eps=None`, the sample size will be taken from</span>
<span class="sd"> the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).</span>
<span class="sd"> :return: mean relative absolute error</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">rae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
<div class="viewcode-block" id="rae">
<a class="viewcode-back" href="../../quapy.html#quapy.error.rae">[docs]</a>
<span class="k">def</span> <span class="nf">rae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Computes the absolute relative error between the two prevalence vectors.</span>
<span class="sd"> Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}`</span>
<span class="sd"> is computed as</span>
<span class="sd"> :math:`RAE(p,\\hat{p})=</span>
<span class="sd"> \\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}\\frac{|\\hat{p}(y)-p(y)|}{p(y)}`,</span>
<span class="sd"> where :math:`\\mathcal{Y}` are the classes of interest.</span>
<span class="sd"> The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).</span>
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
<span class="sd"> :param eps: smoothing factor. `rae` is not defined in cases in which the true distribution</span>
<span class="sd"> contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the</span>
<span class="sd"> sample size. If `eps=None`, the sample size will be taken from the environment variable</span>
<span class="sd"> `SAMPLE_SIZE` (which has thus to be set beforehand).</span>
<span class="sd"> :return: relative absolute error</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">eps</span> <span class="o">=</span> <span class="n">__check_eps</span><span class="p">(</span><span class="n">eps</span><span class="p">)</span>
<span class="n">prevs</span> <span class="o">=</span> <span class="n">smooth</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span>
<span class="n">prevs_hat</span> <span class="o">=</span> <span class="n">smooth</span><span class="p">(</span><span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span>
<span class="k">return</span> <span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">prevs</span> <span class="o">-</span> <span class="n">prevs_hat</span><span class="p">)</span> <span class="o">/</span> <span class="n">prevs</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></div>
<div class="viewcode-block" id="nrae">
<a class="viewcode-back" href="../../quapy.html#quapy.error.nrae">[docs]</a>
<span class="k">def</span> <span class="nf">nrae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Computes the normalized absolute relative error between the two prevalence vectors.</span>
<span class="sd"> Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}`</span>
<span class="sd"> is computed as</span>
<span class="sd"> :math:`NRAE(p,\\hat{p})= \\frac{RAE(p,\\hat{p})}{z_{RAE}}`,</span>
<span class="sd"> where</span>
<span class="sd"> :math:`z_{RAE} = \\frac{|\\mathcal{Y}|-1+\\frac{1-\\min_{y\\in \\mathcal{Y}} p(y)}{\\min_{y\\in \\mathcal{Y}} p(y)}}{|\\mathcal{Y}|}`</span>
<span class="sd"> and :math:`\\mathcal{Y}` are the classes of interest.</span>
<span class="sd"> The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).</span>
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
<span class="sd"> :param eps: smoothing factor. `nrae` is not defined in cases in which the true distribution</span>
<span class="sd"> contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the</span>
<span class="sd"> sample size. If `eps=None`, the sample size will be taken from the environment variable</span>
<span class="sd"> `SAMPLE_SIZE` (which has thus to be set beforehand).</span>
<span class="sd"> :return: normalized relative absolute error</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">eps</span> <span class="o">=</span> <span class="n">__check_eps</span><span class="p">(</span><span class="n">eps</span><span class="p">)</span>
<span class="n">prevs</span> <span class="o">=</span> <span class="n">smooth</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span>
<span class="n">prevs_hat</span> <span class="o">=</span> <span class="n">smooth</span><span class="p">(</span><span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span>
<span class="n">min_p</span> <span class="o">=</span> <span class="n">prevs</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
<span class="k">return</span> <span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">prevs</span> <span class="o">-</span> <span class="n">prevs_hat</span><span class="p">)</span> <span class="o">/</span> <span class="n">prevs</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="n">prevs</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="mi">1</span><span class="o">+</span><span class="p">(</span><span class="mi">1</span><span class="o">-</span><span class="n">min_p</span><span class="p">)</span><span class="o">/</span><span class="n">min_p</span><span class="p">)</span></div>
<div class="viewcode-block" id="mnrae">
<a class="viewcode-back" href="../../quapy.html#quapy.error.mnrae">[docs]</a>
<span class="k">def</span> <span class="nf">mnrae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Computes the mean normalized relative absolute error (see :meth:`quapy.error.nrae`) across</span>
<span class="sd"> the sample pairs. The distributions are smoothed using the `eps` factor (see</span>
<span class="sd"> :meth:`quapy.error.smooth`).</span>
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true</span>
<span class="sd"> prevalence values</span>
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
<span class="sd"> prevalence values</span>
<span class="sd"> :param eps: smoothing factor. `mnrae` is not defined in cases in which the true</span>
<span class="sd"> distribution contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`,</span>
<span class="sd"> with :math:`T` the sample size. If `eps=None`, the sample size will be taken from</span>
<span class="sd"> the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).</span>
<span class="sd"> :return: mean normalized relative absolute error</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">nrae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
<div class="viewcode-block" id="smooth">
<a class="viewcode-back" href="../../quapy.html#quapy.error.smooth">[docs]</a>
<span class="k">def</span> <span class="nf">smooth</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">eps</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot; Smooths a prevalence distribution with :math:`\\epsilon` (`eps`) as:</span>
<span class="sd"> :math:`\\underline{p}(y)=\\frac{\\epsilon+p(y)}{\\epsilon|\\mathcal{Y}|+</span>
<span class="sd"> \\displaystyle\\sum_{y\\in \\mathcal{Y}}p(y)}`</span>
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
<span class="sd"> :param eps: smoothing factor</span>
<span class="sd"> :return: array-like of shape `(n_classes,)` with the smoothed distribution</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">n_classes</span> <span class="o">=</span> <span class="n">prevs</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
<span class="k">return</span> <span class="p">(</span><span class="n">prevs</span> <span class="o">+</span> <span class="n">eps</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span><span class="n">eps</span> <span class="o">*</span> <span class="n">n_classes</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">__check_eps</span><span class="p">(</span><span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="k">if</span> <span class="n">eps</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;SAMPLE_SIZE&#39;</span><span class="p">]</span>
<span class="k">if</span> <span class="n">sample_size</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;eps was not defined, and qp.environ[&quot;SAMPLE_SIZE&quot;] was not set&#39;</span><span class="p">)</span>
<span class="n">eps</span> <span class="o">=</span> <span class="mf">1.</span> <span class="o">/</span> <span class="p">(</span><span class="mf">2.</span> <span class="o">*</span> <span class="n">sample_size</span><span class="p">)</span>
<span class="k">return</span> <span class="n">eps</span>
<span class="n">CLASSIFICATION_ERROR</span> <span class="o">=</span> <span class="p">{</span><span class="n">f1e</span><span class="p">,</span> <span class="n">acce</span><span class="p">}</span>
<span class="n">QUANTIFICATION_ERROR</span> <span class="o">=</span> <span class="p">{</span><span class="n">mae</span><span class="p">,</span> <span class="n">mnae</span><span class="p">,</span> <span class="n">mrae</span><span class="p">,</span> <span class="n">mnrae</span><span class="p">,</span> <span class="n">mse</span><span class="p">,</span> <span class="n">mkld</span><span class="p">,</span> <span class="n">mnkld</span><span class="p">}</span>
<span class="n">QUANTIFICATION_ERROR_SINGLE</span> <span class="o">=</span> <span class="p">{</span><span class="n">ae</span><span class="p">,</span> <span class="n">nae</span><span class="p">,</span> <span class="n">rae</span><span class="p">,</span> <span class="n">nrae</span><span class="p">,</span> <span class="n">se</span><span class="p">,</span> <span class="n">kld</span><span class="p">,</span> <span class="n">nkld</span><span class="p">}</span>
<span class="n">QUANTIFICATION_ERROR_SMOOTH</span> <span class="o">=</span> <span class="p">{</span><span class="n">kld</span><span class="p">,</span> <span class="n">nkld</span><span class="p">,</span> <span class="n">rae</span><span class="p">,</span> <span class="n">nrae</span><span class="p">,</span> <span class="n">mkld</span><span class="p">,</span> <span class="n">mnkld</span><span class="p">,</span> <span class="n">mrae</span><span class="p">}</span>
<span class="n">CLASSIFICATION_ERROR_NAMES</span> <span class="o">=</span> <span class="p">{</span><span class="n">func</span><span class="o">.</span><span class="vm">__name__</span> <span class="k">for</span> <span class="n">func</span> <span class="ow">in</span> <span class="n">CLASSIFICATION_ERROR</span><span class="p">}</span>
<span class="n">QUANTIFICATION_ERROR_NAMES</span> <span class="o">=</span> <span class="p">{</span><span class="n">func</span><span class="o">.</span><span class="vm">__name__</span> <span class="k">for</span> <span class="n">func</span> <span class="ow">in</span> <span class="n">QUANTIFICATION_ERROR</span><span class="p">}</span>
<span class="n">QUANTIFICATION_ERROR_SINGLE_NAMES</span> <span class="o">=</span> <span class="p">{</span><span class="n">func</span><span class="o">.</span><span class="vm">__name__</span> <span class="k">for</span> <span class="n">func</span> <span class="ow">in</span> <span class="n">QUANTIFICATION_ERROR_SINGLE</span><span class="p">}</span>
<span class="n">QUANTIFICATION_ERROR_SMOOTH_NAMES</span> <span class="o">=</span> <span class="p">{</span><span class="n">func</span><span class="o">.</span><span class="vm">__name__</span> <span class="k">for</span> <span class="n">func</span> <span class="ow">in</span> <span class="n">QUANTIFICATION_ERROR_SMOOTH</span><span class="p">}</span>
<span class="n">ERROR_NAMES</span> <span class="o">=</span> \
<span class="n">CLASSIFICATION_ERROR_NAMES</span> <span class="o">|</span> <span class="n">QUANTIFICATION_ERROR_NAMES</span> <span class="o">|</span> <span class="n">QUANTIFICATION_ERROR_SINGLE_NAMES</span>
<span class="n">f1_error</span> <span class="o">=</span> <span class="n">f1e</span>
<span class="n">acc_error</span> <span class="o">=</span> <span class="n">acce</span>
<span class="n">mean_absolute_error</span> <span class="o">=</span> <span class="n">mae</span>
<span class="n">absolute_error</span> <span class="o">=</span> <span class="n">ae</span>
<span class="n">mean_relative_absolute_error</span> <span class="o">=</span> <span class="n">mrae</span>
<span class="n">relative_absolute_error</span> <span class="o">=</span> <span class="n">rae</span>
<span class="n">normalized_absolute_error</span> <span class="o">=</span> <span class="n">nae</span>
<span class="n">normalized_relative_absolute_error</span> <span class="o">=</span> <span class="n">nrae</span>
<span class="n">mean_normalized_absolute_error</span> <span class="o">=</span> <span class="n">mnae</span>
<span class="n">mean_normalized_relative_absolute_error</span> <span class="o">=</span> <span class="n">mnrae</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,302 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.evaluation &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=22607128"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.evaluation</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.evaluation</h1><div class="highlight"><pre>
<span></span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Callable</span><span class="p">,</span> <span class="n">Iterable</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">AbstractProtocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">,</span> <span class="n">IterateProtocol</span>
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span>
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
<div class="viewcode-block" id="prediction">
<a class="viewcode-back" href="../../quapy.html#quapy.evaluation.prediction">[docs]</a>
<span class="k">def</span> <span class="nf">prediction</span><span class="p">(</span>
<span class="n">model</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
<span class="n">protocol</span><span class="p">:</span> <span class="n">AbstractProtocol</span><span class="p">,</span>
<span class="n">aggr_speedup</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;auto&#39;</span><span class="p">,</span>
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Uses a quantification model to generate predictions for the samples generated via a specific protocol.</span>
<span class="sd"> This function is central to all evaluation processes, and is endowed with an optimization to speed-up the</span>
<span class="sd"> prediction of protocols that generate samples from a large collection. The optimization applies to aggregative</span>
<span class="sd"> quantifiers only, and to OnLabelledCollectionProtocol protocols, and comes down to generating the classification</span>
<span class="sd"> predictions once and for all, and then generating samples over the classification predictions (instead of over</span>
<span class="sd"> the raw instances), so that the classifier prediction is never called again. This behaviour is obtained by</span>
<span class="sd"> setting `aggr_speedup` to &#39;auto&#39; or True, and is only carried out if the overall process is convenient in terms</span>
<span class="sd"> of computations (e.g., if the number of classification predictions needed for the original collection exceed the</span>
<span class="sd"> number of classification predictions needed for all samples, then the optimization is not undertaken).</span>
<span class="sd"> :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier`</span>
<span class="sd"> :param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of</span>
<span class="sd"> :class:`quapy.protocol.OnLabelledCollectionProtocol`, then the aggregation speed-up can be run. This is the protocol</span>
<span class="sd"> in charge of generating the samples for which the model has to issue class prevalence predictions.</span>
<span class="sd"> :param aggr_speedup: whether or not to apply the speed-up. Set to &quot;force&quot; for applying it even if the number of</span>
<span class="sd"> instances in the original collection on which the protocol acts is larger than the number of instances</span>
<span class="sd"> in the samples to be generated. Set to True or &quot;auto&quot; (default) for letting QuaPy decide whether it is</span>
<span class="sd"> convenient or not. Set to False to deactivate.</span>
<span class="sd"> :param verbose: boolean, show or not information in stdout</span>
<span class="sd"> :return: a tuple `(true_prevs, estim_prevs)` in which each element in the tuple is an array of shape</span>
<span class="sd"> `(n_samples, n_classes)` containing the true, or predicted, prevalence values for each sample</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">aggr_speedup</span> <span class="ow">in</span> <span class="p">[</span><span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">&#39;auto&#39;</span><span class="p">,</span> <span class="s1">&#39;force&#39;</span><span class="p">],</span> <span class="s1">&#39;invalid value for aggr_speedup&#39;</span>
<span class="n">sout</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">print</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">if</span> <span class="n">verbose</span> <span class="k">else</span> <span class="kc">None</span>
<span class="n">apply_optimization</span> <span class="o">=</span> <span class="kc">False</span>
<span class="k">if</span> <span class="n">aggr_speedup</span> <span class="ow">in</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="s1">&#39;auto&#39;</span><span class="p">,</span> <span class="s1">&#39;force&#39;</span><span class="p">]:</span>
<span class="c1"># checks whether the prediction can be made more efficiently; this check consists in verifying if the model is</span>
<span class="c1"># of type aggregative, if the protocol is based on LabelledCollection, and if the total number of documents to</span>
<span class="c1"># classify using the protocol would exceed the number of test documents in the original collection</span>
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">AggregativeQuantifier</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">protocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
<span class="k">if</span> <span class="n">aggr_speedup</span> <span class="o">==</span> <span class="s1">&#39;force&#39;</span><span class="p">:</span>
<span class="n">apply_optimization</span> <span class="o">=</span> <span class="kc">True</span>
<span class="n">sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;forcing aggregative speedup&#39;</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">protocol</span><span class="p">,</span> <span class="s1">&#39;sample_size&#39;</span><span class="p">):</span>
<span class="n">nD</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">protocol</span><span class="o">.</span><span class="n">get_labelled_collection</span><span class="p">())</span>
<span class="n">samplesD</span> <span class="o">=</span> <span class="n">protocol</span><span class="o">.</span><span class="n">total</span><span class="p">()</span> <span class="o">*</span> <span class="n">protocol</span><span class="o">.</span><span class="n">sample_size</span>
<span class="k">if</span> <span class="n">nD</span> <span class="o">&lt;</span> <span class="n">samplesD</span><span class="p">:</span>
<span class="n">apply_optimization</span> <span class="o">=</span> <span class="kc">True</span>
<span class="n">sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;speeding up the prediction for the aggregative quantifier, &#39;</span>
<span class="sa">f</span><span class="s1">&#39;total classifications </span><span class="si">{</span><span class="n">nD</span><span class="si">}</span><span class="s1"> instead of </span><span class="si">{</span><span class="n">samplesD</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">apply_optimization</span><span class="p">:</span>
<span class="n">pre_classified</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">classify</span><span class="p">(</span><span class="n">protocol</span><span class="o">.</span><span class="n">get_labelled_collection</span><span class="p">()</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="n">protocol_with_predictions</span> <span class="o">=</span> <span class="n">protocol</span><span class="o">.</span><span class="n">on_preclassified_instances</span><span class="p">(</span><span class="n">pre_classified</span><span class="p">)</span>
<span class="k">return</span> <span class="n">__prediction_helper</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">aggregate</span><span class="p">,</span> <span class="n">protocol_with_predictions</span><span class="p">,</span> <span class="n">verbose</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">__prediction_helper</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">verbose</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">__prediction_helper</span><span class="p">(</span><span class="n">quantification_fn</span><span class="p">,</span> <span class="n">protocol</span><span class="p">:</span> <span class="n">AbstractProtocol</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">sample_instances</span><span class="p">,</span> <span class="n">sample_prev</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="n">protocol</span><span class="p">(),</span> <span class="n">total</span><span class="o">=</span><span class="n">protocol</span><span class="o">.</span><span class="n">total</span><span class="p">(),</span> <span class="n">desc</span><span class="o">=</span><span class="s1">&#39;predicting&#39;</span><span class="p">)</span> <span class="k">if</span> <span class="n">verbose</span> <span class="k">else</span> <span class="n">protocol</span><span class="p">():</span>
<span class="n">estim_prevs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">quantification_fn</span><span class="p">(</span><span class="n">sample_instances</span><span class="p">))</span>
<span class="n">true_prevs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sample_prev</span><span class="p">)</span>
<span class="n">true_prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">)</span>
<span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">estim_prevs</span><span class="p">)</span>
<span class="k">return</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span>
<div class="viewcode-block" id="evaluation_report">
<a class="viewcode-back" href="../../quapy.html#quapy.evaluation.evaluation_report">[docs]</a>
<span class="k">def</span> <span class="nf">evaluation_report</span><span class="p">(</span><span class="n">model</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
<span class="n">protocol</span><span class="p">:</span> <span class="n">AbstractProtocol</span><span class="p">,</span>
<span class="n">error_metrics</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span><span class="n">Callable</span><span class="p">]]</span> <span class="o">=</span> <span class="s1">&#39;mae&#39;</span><span class="p">,</span>
<span class="n">aggr_speedup</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;auto&#39;</span><span class="p">,</span>
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Generates a report (a pandas&#39; DataFrame) containing information of the evaluation of the model as according</span>
<span class="sd"> to a specific protocol and in terms of one or more evaluation metrics (errors).</span>
<span class="sd"> :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier`</span>
<span class="sd"> :param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of</span>
<span class="sd"> :class:`quapy.protocol.OnLabelledCollectionProtocol`, then the aggregation speed-up can be run. This is the protocol</span>
<span class="sd"> in charge of generating the samples in which the model is evaluated.</span>
<span class="sd"> :param error_metrics: a string, or list of strings, representing the name(s) of an error function in `qp.error`</span>
<span class="sd"> (e.g., &#39;mae&#39;, the default value), or a callable function, or a list of callable functions, implementing</span>
<span class="sd"> the error function itself.</span>
<span class="sd"> :param aggr_speedup: whether or not to apply the speed-up. Set to &quot;force&quot; for applying it even if the number of</span>
<span class="sd"> instances in the original collection on which the protocol acts is larger than the number of instances</span>
<span class="sd"> in the samples to be generated. Set to True or &quot;auto&quot; (default) for letting QuaPy decide whether it is</span>
<span class="sd"> convenient or not. Set to False to deactivate.</span>
<span class="sd"> :param verbose: boolean, show or not information in stdout</span>
<span class="sd"> :return: a pandas&#39; DataFrame containing the columns &#39;true-prev&#39; (the true prevalence of each sample),</span>
<span class="sd"> &#39;estim-prev&#39; (the prevalence estimated by the model for each sample), and as many columns as error metrics</span>
<span class="sd"> have been indicated, each displaying the score in terms of that metric for every sample.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">prediction</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">aggr_speedup</span><span class="o">=</span><span class="n">aggr_speedup</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span>
<span class="k">return</span> <span class="n">_prevalence_report</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">error_metrics</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_prevalence_report</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">error_metrics</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">]]</span> <span class="o">=</span> <span class="s1">&#39;mae&#39;</span><span class="p">):</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">error_metrics</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">error_metrics</span> <span class="o">=</span> <span class="p">[</span><span class="n">error_metrics</span><span class="p">]</span>
<span class="n">error_funcs</span> <span class="o">=</span> <span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span> <span class="k">else</span> <span class="n">e</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">error_metrics</span><span class="p">]</span>
<span class="k">assert</span> <span class="nb">all</span><span class="p">(</span><span class="nb">hasattr</span><span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="s1">&#39;__call__&#39;</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">error_funcs</span><span class="p">),</span> <span class="s1">&#39;invalid error functions&#39;</span>
<span class="n">error_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">e</span><span class="o">.</span><span class="vm">__name__</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">error_funcs</span><span class="p">]</span>
<span class="n">row_entries</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">):</span>
<span class="n">series</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;true-prev&#39;</span><span class="p">:</span> <span class="n">true_prev</span><span class="p">,</span> <span class="s1">&#39;estim-prev&#39;</span><span class="p">:</span> <span class="n">estim_prev</span><span class="p">}</span>
<span class="k">for</span> <span class="n">error_name</span><span class="p">,</span> <span class="n">error_metric</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">error_names</span><span class="p">,</span> <span class="n">error_funcs</span><span class="p">):</span>
<span class="n">score</span> <span class="o">=</span> <span class="n">error_metric</span><span class="p">(</span><span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span><span class="p">)</span>
<span class="n">series</span><span class="p">[</span><span class="n">error_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">score</span>
<span class="n">row_entries</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">series</span><span class="p">)</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_records</span><span class="p">(</span><span class="n">row_entries</span><span class="p">)</span>
<span class="k">return</span> <span class="n">df</span>
<div class="viewcode-block" id="evaluate">
<a class="viewcode-back" href="../../quapy.html#quapy.evaluation.evaluate">[docs]</a>
<span class="k">def</span> <span class="nf">evaluate</span><span class="p">(</span>
<span class="n">model</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
<span class="n">protocol</span><span class="p">:</span> <span class="n">AbstractProtocol</span><span class="p">,</span>
<span class="n">error_metric</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">],</span>
<span class="n">aggr_speedup</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;auto&#39;</span><span class="p">,</span>
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Evaluates a quantification model according to a specific sample generation protocol and in terms of one</span>
<span class="sd"> evaluation metric (error).</span>
<span class="sd"> :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier`</span>
<span class="sd"> :param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of</span>
<span class="sd"> :class:`quapy.protocol.OnLabelledCollectionProtocol`, then the aggregation speed-up can be run. This is the</span>
<span class="sd"> protocol in charge of generating the samples in which the model is evaluated.</span>
<span class="sd"> :param error_metric: a string representing the name(s) of an error function in `qp.error`</span>
<span class="sd"> (e.g., &#39;mae&#39;), or a callable function implementing the error function itself.</span>
<span class="sd"> :param aggr_speedup: whether or not to apply the speed-up. Set to &quot;force&quot; for applying it even if the number of</span>
<span class="sd"> instances in the original collection on which the protocol acts is larger than the number of instances</span>
<span class="sd"> in the samples to be generated. Set to True or &quot;auto&quot; (default) for letting QuaPy decide whether it is</span>
<span class="sd"> convenient or not. Set to False to deactivate.</span>
<span class="sd"> :param verbose: boolean, show or not information in stdout</span>
<span class="sd"> :return: if the error metric is not averaged (e.g., &#39;ae&#39;, &#39;rae&#39;), returns an array of shape `(n_samples,)` with</span>
<span class="sd"> the error scores for each sample; if the error metric is averaged (e.g., &#39;mae&#39;, &#39;mrae&#39;) then returns</span>
<span class="sd"> a single float</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">error_metric</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">error_metric</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">error_metric</span><span class="p">)</span>
<span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">prediction</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">aggr_speedup</span><span class="o">=</span><span class="n">aggr_speedup</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span>
<span class="k">return</span> <span class="n">error_metric</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">)</span></div>
<div class="viewcode-block" id="evaluate_on_samples">
<a class="viewcode-back" href="../../quapy.html#quapy.evaluation.evaluate_on_samples">[docs]</a>
<span class="k">def</span> <span class="nf">evaluate_on_samples</span><span class="p">(</span>
<span class="n">model</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
<span class="n">samples</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">],</span>
<span class="n">error_metric</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">],</span>
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Evaluates a quantification model on a given set of samples and in terms of one evaluation metric (error).</span>
<span class="sd"> :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier`</span>
<span class="sd"> :param samples: a list of samples on which the quantifier is to be evaluated</span>
<span class="sd"> :param error_metric: a string representing the name(s) of an error function in `qp.error`</span>
<span class="sd"> (e.g., &#39;mae&#39;), or a callable function implementing the error function itself.</span>
<span class="sd"> :param verbose: boolean, show or not information in stdout</span>
<span class="sd"> :return: if the error metric is not averaged (e.g., &#39;ae&#39;, &#39;rae&#39;), returns an array of shape `(n_samples,)` with</span>
<span class="sd"> the error scores for each sample; if the error metric is averaged (e.g., &#39;mae&#39;, &#39;mrae&#39;) then returns</span>
<span class="sd"> a single float</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">evaluate</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">IterateProtocol</span><span class="p">(</span><span class="n">samples</span><span class="p">),</span> <span class="n">error_metric</span><span class="p">,</span> <span class="n">aggr_speedup</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,518 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.functional &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=22607128"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.functional</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.functional</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">itertools</span>
<span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">defaultdict</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Callable</span>
<span class="kn">import</span> <span class="nn">scipy</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<div class="viewcode-block" id="prevalence_linspace">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.prevalence_linspace">[docs]</a>
<span class="k">def</span> <span class="nf">prevalence_linspace</span><span class="p">(</span><span class="n">n_prevalences</span><span class="o">=</span><span class="mi">21</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">smooth_limits_epsilon</span><span class="o">=</span><span class="mf">0.01</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Produces an array of uniformly separated values of prevalence.</span>
<span class="sd"> By default, produces an array of 21 prevalence values, with</span>
<span class="sd"> step 0.05 and with the limits smoothed, i.e.:</span>
<span class="sd"> [0.01, 0.05, 0.10, 0.15, ..., 0.90, 0.95, 0.99]</span>
<span class="sd"> :param n_prevalences: the number of prevalence values to sample from the [0,1] interval (default 21)</span>
<span class="sd"> :param repeats: number of times each prevalence is to be repeated (defaults to 1)</span>
<span class="sd"> :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1</span>
<span class="sd"> :return: an array of uniformly separated prevalence values</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mf">0.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">,</span> <span class="n">num</span><span class="o">=</span><span class="n">n_prevalences</span><span class="p">,</span> <span class="n">endpoint</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">p</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">+=</span> <span class="n">smooth_limits_epsilon</span>
<span class="n">p</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">-=</span> <span class="n">smooth_limits_epsilon</span>
<span class="k">if</span> <span class="n">p</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">&gt;</span> <span class="n">p</span><span class="p">[</span><span class="mi">1</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;the smoothing in the limits is greater than the prevalence step&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">repeats</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">repeat</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">repeats</span><span class="p">)</span>
<span class="k">return</span> <span class="n">p</span></div>
<div class="viewcode-block" id="prevalence_from_labels">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.prevalence_from_labels">[docs]</a>
<span class="k">def</span> <span class="nf">prevalence_from_labels</span><span class="p">(</span><span class="n">labels</span><span class="p">,</span> <span class="n">classes</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Computed the prevalence values from a vector of labels.</span>
<span class="sd"> :param labels: array-like of shape `(n_instances)` with the label for each instance</span>
<span class="sd"> :param classes: the class labels. This is needed in order to correctly compute the prevalence vector even when</span>
<span class="sd"> some classes have no examples.</span>
<span class="sd"> :return: an ndarray of shape `(len(classes))` with the class prevalence values</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">labels</span><span class="o">.</span><span class="n">ndim</span> <span class="o">!=</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;param labels does not seem to be a ndarray of label predictions&#39;</span><span class="p">)</span>
<span class="n">unique</span><span class="p">,</span> <span class="n">counts</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">labels</span><span class="p">,</span> <span class="n">return_counts</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">by_class</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="k">lambda</span><span class="p">:</span><span class="mi">0</span><span class="p">,</span> <span class="nb">dict</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">unique</span><span class="p">,</span> <span class="n">counts</span><span class="p">)))</span>
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="n">by_class</span><span class="p">[</span><span class="n">class_</span><span class="p">]</span> <span class="k">for</span> <span class="n">class_</span> <span class="ow">in</span> <span class="n">classes</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
<span class="n">prevalences</span> <span class="o">/=</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="k">return</span> <span class="n">prevalences</span></div>
<div class="viewcode-block" id="prevalence_from_probabilities">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.prevalence_from_probabilities">[docs]</a>
<span class="k">def</span> <span class="nf">prevalence_from_probabilities</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="n">binarize</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns a vector of prevalence values from a matrix of posterior probabilities.</span>
<span class="sd"> :param posteriors: array-like of shape `(n_instances, n_classes,)` with posterior probabilities for each class</span>
<span class="sd"> :param binarize: set to True (default is False) for computing the prevalence values on crisp decisions (i.e.,</span>
<span class="sd"> converting the vectors of posterior probabilities into class indices, by taking the argmax).</span>
<span class="sd"> :return: array of shape `(n_classes,)` containing the prevalence values</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">posteriors</span><span class="o">.</span><span class="n">ndim</span> <span class="o">!=</span> <span class="mi">2</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;param posteriors does not seem to be a ndarray of posteior probabilities&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">binarize</span><span class="p">:</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
<span class="k">return</span> <span class="n">prevalence_from_labels</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="n">posteriors</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">posteriors</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">prevalences</span> <span class="o">/=</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="k">return</span> <span class="n">prevalences</span></div>
<div class="viewcode-block" id="as_binary_prevalence">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.as_binary_prevalence">[docs]</a>
<span class="k">def</span> <span class="nf">as_binary_prevalence</span><span class="p">(</span><span class="n">positive_prevalence</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">],</span> <span class="n">clip_if_necessary</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Helper that, given a float representing the prevalence for the positive class, returns a np.ndarray of two</span>
<span class="sd"> values representing a binary distribution.</span>
<span class="sd"> :param positive_prevalence: prevalence for the positive class</span>
<span class="sd"> :param clip_if_necessary: if True, clips the value in [0,1] in order to guarantee the resulting distribution</span>
<span class="sd"> is valid. If False, it then checks that the value is in the valid range, and raises an error if not.</span>
<span class="sd"> :return: np.ndarray of shape `(2,)`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">clip_if_necessary</span><span class="p">:</span>
<span class="n">positive_prevalence</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="n">positive_prevalence</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">assert</span> <span class="mi">0</span> <span class="o">&lt;=</span> <span class="n">positive_prevalence</span> <span class="o">&lt;=</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;the value provided is not a valid prevalence for the positive class&#39;</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="mi">1</span><span class="o">-</span><span class="n">positive_prevalence</span><span class="p">,</span> <span class="n">positive_prevalence</span><span class="p">])</span><span class="o">.</span><span class="n">T</span></div>
<div class="viewcode-block" id="HellingerDistance">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.HellingerDistance">[docs]</a>
<span class="k">def</span> <span class="nf">HellingerDistance</span><span class="p">(</span><span class="n">P</span><span class="p">,</span> <span class="n">Q</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Computes the Hellingher Distance (HD) between (discretized) distributions `P` and `Q`.</span>
<span class="sd"> The HD for two discrete distributions of `k` bins is defined as:</span>
<span class="sd"> .. math::</span>
<span class="sd"> HD(P,Q) = \\frac{ 1 }{ \\sqrt{ 2 } } \\sqrt{ \\sum_{i=1}^k ( \\sqrt{p_i} - \\sqrt{q_i} )^2 }</span>
<span class="sd"> :param P: real-valued array-like of shape `(k,)` representing a discrete distribution</span>
<span class="sd"> :param Q: real-valued array-like of shape `(k,)` representing a discrete distribution</span>
<span class="sd"> :return: float</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">((</span><span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">P</span><span class="p">)</span> <span class="o">-</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">Q</span><span class="p">))</span><span class="o">**</span><span class="mi">2</span><span class="p">))</span></div>
<div class="viewcode-block" id="TopsoeDistance">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.TopsoeDistance">[docs]</a>
<span class="k">def</span> <span class="nf">TopsoeDistance</span><span class="p">(</span><span class="n">P</span><span class="p">,</span> <span class="n">Q</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="mf">1e-20</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Topsoe distance between two (discretized) distributions `P` and `Q`.</span>
<span class="sd"> The Topsoe distance for two discrete distributions of `k` bins is defined as:</span>
<span class="sd"> .. math::</span>
<span class="sd"> Topsoe(P,Q) = \\sum_{i=1}^k \\left( p_i \\log\\left(\\frac{ 2 p_i + \\epsilon }{ p_i+q_i+\\epsilon }\\right) +</span>
<span class="sd"> q_i \\log\\left(\\frac{ 2 q_i + \\epsilon }{ p_i+q_i+\\epsilon }\\right) \\right)</span>
<span class="sd"> :param P: real-valued array-like of shape `(k,)` representing a discrete distribution</span>
<span class="sd"> :param Q: real-valued array-like of shape `(k,)` representing a discrete distribution</span>
<span class="sd"> :return: float</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">P</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">((</span><span class="mi">2</span><span class="o">*</span><span class="n">P</span><span class="o">+</span><span class="n">epsilon</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="n">P</span><span class="o">+</span><span class="n">Q</span><span class="o">+</span><span class="n">epsilon</span><span class="p">))</span> <span class="o">+</span> <span class="n">Q</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">((</span><span class="mi">2</span><span class="o">*</span><span class="n">Q</span><span class="o">+</span><span class="n">epsilon</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="n">P</span><span class="o">+</span><span class="n">Q</span><span class="o">+</span><span class="n">epsilon</span><span class="p">)))</span></div>
<div class="viewcode-block" id="uniform_prevalence_sampling">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.uniform_prevalence_sampling">[docs]</a>
<span class="k">def</span> <span class="nf">uniform_prevalence_sampling</span><span class="p">(</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Implements the `Kraemer algorithm &lt;http://www.cs.cmu.edu/~nasmith/papers/smith+tromble.tr04.pdf&gt;`_</span>
<span class="sd"> for sampling uniformly at random from the unit simplex. This implementation is adapted from this</span>
<span class="sd"> `post &lt;https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex&gt;_`.</span>
<span class="sd"> :param n_classes: integer, number of classes (dimensionality of the simplex)</span>
<span class="sd"> :param size: number of samples to return</span>
<span class="sd"> :return: `np.ndarray` of shape `(size, n_classes,)` if `size&gt;1`, or of shape `(n_classes,)` otherwise</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">n_classes</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
<span class="n">u</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="n">size</span><span class="p">)</span>
<span class="n">u</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">vstack</span><span class="p">([</span><span class="mi">1</span><span class="o">-</span><span class="n">u</span><span class="p">,</span> <span class="n">u</span><span class="p">])</span><span class="o">.</span><span class="n">T</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">u</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="n">n_classes</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="n">u</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
<span class="n">_0s</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span>
<span class="n">_1s</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span>
<span class="n">a</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">hstack</span><span class="p">([</span><span class="n">_0s</span><span class="p">,</span> <span class="n">u</span><span class="p">])</span>
<span class="n">b</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">hstack</span><span class="p">([</span><span class="n">u</span><span class="p">,</span> <span class="n">_1s</span><span class="p">])</span>
<span class="n">u</span> <span class="o">=</span> <span class="n">b</span><span class="o">-</span><span class="n">a</span>
<span class="k">if</span> <span class="n">size</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">u</span> <span class="o">=</span> <span class="n">u</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
<span class="k">return</span> <span class="n">u</span></div>
<span class="n">uniform_simplex_sampling</span> <span class="o">=</span> <span class="n">uniform_prevalence_sampling</span>
<div class="viewcode-block" id="strprev">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.strprev">[docs]</a>
<span class="k">def</span> <span class="nf">strprev</span><span class="p">(</span><span class="n">prevalences</span><span class="p">,</span> <span class="n">prec</span><span class="o">=</span><span class="mi">3</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns a string representation for a prevalence vector. E.g.,</span>
<span class="sd"> &gt;&gt;&gt; strprev([1/3, 2/3], prec=2)</span>
<span class="sd"> &gt;&gt;&gt; &#39;[0.33, 0.67]&#39;</span>
<span class="sd"> :param prevalences: a vector of prevalence values</span>
<span class="sd"> :param prec: float precision</span>
<span class="sd"> :return: string</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="s1">&#39;[&#39;</span><span class="o">+</span> <span class="s1">&#39;, &#39;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">p</span><span class="si">:</span><span class="s1">.</span><span class="si">{</span><span class="n">prec</span><span class="si">}</span><span class="s1">f</span><span class="si">}</span><span class="s1">&#39;</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">prevalences</span><span class="p">])</span> <span class="o">+</span> <span class="s1">&#39;]&#39;</span></div>
<div class="viewcode-block" id="adjusted_quantification">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.adjusted_quantification">[docs]</a>
<span class="k">def</span> <span class="nf">adjusted_quantification</span><span class="p">(</span><span class="n">prevalence_estim</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">,</span> <span class="n">clip</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Implements the adjustment of ACC and PACC for the binary case. The adjustment for a prevalence estimate of the</span>
<span class="sd"> positive class `p` comes down to computing:</span>
<span class="sd"> .. math::</span>
<span class="sd"> ACC(p) = \\frac{ p - fpr }{ tpr - fpr }</span>
<span class="sd"> :param prevalence_estim: float, the estimated value for the positive class</span>
<span class="sd"> :param tpr: float, the true positive rate of the classifier</span>
<span class="sd"> :param fpr: float, the false positive rate of the classifier</span>
<span class="sd"> :param clip: set to True (default) to clip values that might exceed the range [0,1]</span>
<span class="sd"> :return: float, the adjusted count</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">den</span> <span class="o">=</span> <span class="n">tpr</span> <span class="o">-</span> <span class="n">fpr</span>
<span class="k">if</span> <span class="n">den</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">den</span> <span class="o">+=</span> <span class="mf">1e-8</span>
<span class="n">adjusted</span> <span class="o">=</span> <span class="p">(</span><span class="n">prevalence_estim</span> <span class="o">-</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">/</span> <span class="n">den</span>
<span class="k">if</span> <span class="n">clip</span><span class="p">:</span>
<span class="n">adjusted</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="n">adjusted</span><span class="p">,</span> <span class="mf">0.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">)</span>
<span class="k">return</span> <span class="n">adjusted</span></div>
<div class="viewcode-block" id="normalize_prevalence">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.normalize_prevalence">[docs]</a>
<span class="k">def</span> <span class="nf">normalize_prevalence</span><span class="p">(</span><span class="n">prevalences</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Normalize a vector or matrix of prevalence values. The normalization consists of applying a L1 normalization in</span>
<span class="sd"> cases in which the prevalence values are not all-zeros, and to convert the prevalence values into `1/n_classes` in</span>
<span class="sd"> cases in which all values are zero.</span>
<span class="sd"> :param prevalences: array-like of shape `(n_classes,)` or of shape `(n_samples, n_classes,)` with prevalence values</span>
<span class="sd"> :return: a normalized vector or matrix of prevalence values</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prevalences</span><span class="p">)</span>
<span class="n">n_classes</span> <span class="o">=</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
<span class="n">accum</span> <span class="o">=</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">keepdims</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">true_divide</span><span class="p">(</span><span class="n">prevalences</span><span class="p">,</span> <span class="n">accum</span><span class="p">,</span> <span class="n">where</span><span class="o">=</span><span class="n">accum</span><span class="o">&gt;</span><span class="mi">0</span><span class="p">)</span>
<span class="n">allzeros</span> <span class="o">=</span> <span class="n">accum</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span><span class="o">==</span><span class="mi">0</span>
<span class="k">if</span> <span class="nb">any</span><span class="p">(</span><span class="n">allzeros</span><span class="p">):</span>
<span class="k">if</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">ndim</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">full</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mf">1.</span><span class="o">/</span><span class="n">n_classes</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">prevalences</span><span class="p">[</span><span class="n">accum</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span><span class="o">==</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">full</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mf">1.</span><span class="o">/</span><span class="n">n_classes</span><span class="p">)</span>
<span class="k">return</span> <span class="n">prevalences</span></div>
<span class="k">def</span> <span class="nf">__num_prevalence_combinations_depr</span><span class="p">(</span><span class="n">n_prevpoints</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_repeats</span><span class="p">:</span><span class="nb">int</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Computes the number of prevalence combinations in the n_classes-dimensional simplex if `nprevpoints` equally distant</span>
<span class="sd"> prevalence values are generated and `n_repeats` repetitions are requested.</span>
<span class="sd"> :param n_classes: integer, number of classes</span>
<span class="sd"> :param n_prevpoints: integer, number of prevalence points.</span>
<span class="sd"> :param n_repeats: integer, number of repetitions for each prevalence combination</span>
<span class="sd"> :return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the</span>
<span class="sd"> number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0]</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">__cache</span><span class="o">=</span><span class="p">{}</span>
<span class="k">def</span> <span class="nf">__f</span><span class="p">(</span><span class="n">nc</span><span class="p">,</span><span class="n">np</span><span class="p">):</span>
<span class="k">if</span> <span class="p">(</span><span class="n">nc</span><span class="p">,</span><span class="n">np</span><span class="p">)</span> <span class="ow">in</span> <span class="n">__cache</span><span class="p">:</span> <span class="c1"># cached result</span>
<span class="k">return</span> <span class="n">__cache</span><span class="p">[(</span><span class="n">nc</span><span class="p">,</span><span class="n">np</span><span class="p">)]</span>
<span class="k">if</span> <span class="n">nc</span><span class="o">==</span><span class="mi">1</span><span class="p">:</span> <span class="c1"># stop condition</span>
<span class="k">return</span> <span class="mi">1</span>
<span class="k">else</span><span class="p">:</span> <span class="c1"># recursive call</span>
<span class="n">x</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">([</span><span class="n">__f</span><span class="p">(</span><span class="n">nc</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">-</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">np</span><span class="p">)])</span>
<span class="n">__cache</span><span class="p">[(</span><span class="n">nc</span><span class="p">,</span><span class="n">np</span><span class="p">)]</span> <span class="o">=</span> <span class="n">x</span>
<span class="k">return</span> <span class="n">x</span>
<span class="k">return</span> <span class="n">__f</span><span class="p">(</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">n_prevpoints</span><span class="p">)</span> <span class="o">*</span> <span class="n">n_repeats</span>
<div class="viewcode-block" id="num_prevalence_combinations">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.num_prevalence_combinations">[docs]</a>
<span class="k">def</span> <span class="nf">num_prevalence_combinations</span><span class="p">(</span><span class="n">n_prevpoints</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_repeats</span><span class="p">:</span><span class="nb">int</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Computes the number of valid prevalence combinations in the n_classes-dimensional simplex if `n_prevpoints` equally</span>
<span class="sd"> distant prevalence values are generated and `n_repeats` repetitions are requested.</span>
<span class="sd"> The computation comes down to calculating:</span>
<span class="sd"> .. math::</span>
<span class="sd"> \\binom{N+C-1}{C-1} \\times r</span>
<span class="sd"> where `N` is `n_prevpoints-1`, i.e., the number of probability mass blocks to allocate, `C` is the number of</span>
<span class="sd"> classes, and `r` is `n_repeats`. This solution comes from the</span>
<span class="sd"> `Stars and Bars &lt;https://brilliant.org/wiki/integer-equations-star-and-bars/&gt;`_ problem.</span>
<span class="sd"> :param n_classes: integer, number of classes</span>
<span class="sd"> :param n_prevpoints: integer, number of prevalence points.</span>
<span class="sd"> :param n_repeats: integer, number of repetitions for each prevalence combination</span>
<span class="sd"> :return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the</span>
<span class="sd"> number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0]</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">N</span> <span class="o">=</span> <span class="n">n_prevpoints</span><span class="o">-</span><span class="mi">1</span>
<span class="n">C</span> <span class="o">=</span> <span class="n">n_classes</span>
<span class="n">r</span> <span class="o">=</span> <span class="n">n_repeats</span>
<span class="k">return</span> <span class="nb">int</span><span class="p">(</span><span class="n">scipy</span><span class="o">.</span><span class="n">special</span><span class="o">.</span><span class="n">binom</span><span class="p">(</span><span class="n">N</span> <span class="o">+</span> <span class="n">C</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">C</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">r</span><span class="p">)</span></div>
<div class="viewcode-block" id="get_nprevpoints_approximation">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.get_nprevpoints_approximation">[docs]</a>
<span class="k">def</span> <span class="nf">get_nprevpoints_approximation</span><span class="p">(</span><span class="n">combinations_budget</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_repeats</span><span class="p">:</span><span class="nb">int</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Searches for the largest number of (equidistant) prevalence points to define for each of the `n_classes` classes so</span>
<span class="sd"> that the number of valid prevalence values generated as combinations of prevalence points (points in a</span>
<span class="sd"> `n_classes`-dimensional simplex) do not exceed combinations_budget.</span>
<span class="sd"> :param combinations_budget: integer, maximum number of combinations allowed</span>
<span class="sd"> :param n_classes: integer, number of classes</span>
<span class="sd"> :param n_repeats: integer, number of repetitions for each prevalence combination</span>
<span class="sd"> :return: the largest number of prevalence points that generate less than combinations_budget valid prevalences</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">n_classes</span> <span class="o">&gt;</span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">n_repeats</span> <span class="o">&gt;</span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">combinations_budget</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s1">&#39;parameters must be positive integers&#39;</span>
<span class="n">n_prevpoints</span> <span class="o">=</span> <span class="mi">1</span>
<span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
<span class="n">combinations</span> <span class="o">=</span> <span class="n">num_prevalence_combinations</span><span class="p">(</span><span class="n">n_prevpoints</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">n_repeats</span><span class="p">)</span>
<span class="k">if</span> <span class="n">combinations</span> <span class="o">&gt;</span> <span class="n">combinations_budget</span><span class="p">:</span>
<span class="k">return</span> <span class="n">n_prevpoints</span><span class="o">-</span><span class="mi">1</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">n_prevpoints</span> <span class="o">+=</span> <span class="mi">1</span></div>
<div class="viewcode-block" id="check_prevalence_vector">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.check_prevalence_vector">[docs]</a>
<span class="k">def</span> <span class="nf">check_prevalence_vector</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">raise_exception</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">toleranze</span><span class="o">=</span><span class="mf">1e-08</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Checks that p is a valid prevalence vector, i.e., that it contains values in [0,1] and that the values sum up to 1.</span>
<span class="sd"> :param p: the prevalence vector to check</span>
<span class="sd"> :return: True if `p` is valid, False otherwise</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">all</span><span class="p">(</span><span class="n">p</span><span class="o">&gt;=</span><span class="mi">0</span><span class="p">):</span>
<span class="k">if</span> <span class="n">raise_exception</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;the prevalence vector contains negative numbers&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">all</span><span class="p">(</span><span class="n">p</span><span class="o">&lt;=</span><span class="mi">1</span><span class="p">):</span>
<span class="k">if</span> <span class="n">raise_exception</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;the prevalence vector contains values &gt;1&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">np</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">sum</span><span class="p">(),</span> <span class="mi">1</span><span class="p">,</span> <span class="n">atol</span><span class="o">=</span><span class="n">toleranze</span><span class="p">):</span>
<span class="k">if</span> <span class="n">raise_exception</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;the prevalence vector does not sum up to 1&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">return</span> <span class="kc">True</span></div>
<div class="viewcode-block" id="get_divergence">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.get_divergence">[docs]</a>
<span class="k">def</span> <span class="nf">get_divergence</span><span class="p">(</span><span class="n">divergence</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">]):</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">divergence</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">if</span> <span class="n">divergence</span><span class="o">==</span><span class="s1">&#39;HD&#39;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">HellingerDistance</span>
<span class="k">elif</span> <span class="n">divergence</span><span class="o">==</span><span class="s1">&#39;topsoe&#39;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">TopsoeDistance</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;unknown divergence </span><span class="si">{</span><span class="n">divergence</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">callable</span><span class="p">(</span><span class="n">divergence</span><span class="p">):</span>
<span class="k">return</span> <span class="n">divergence</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;argument &quot;divergence&quot; not understood; use a str or a callable function&#39;</span><span class="p">)</span></div>
<div class="viewcode-block" id="argmin_prevalence">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.argmin_prevalence">[docs]</a>
<span class="k">def</span> <span class="nf">argmin_prevalence</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">&#39;optim_minimize&#39;</span><span class="p">):</span>
<span class="k">if</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">&#39;optim_minimize&#39;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">optim_minimize</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">&#39;linear_search&#39;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">linear_search</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">&#39;ternary_search&#39;</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
<div class="viewcode-block" id="optim_minimize">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.optim_minimize">[docs]</a>
<span class="k">def</span> <span class="nf">optim_minimize</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Searches for the optimal prevalence values, i.e., an `n_classes`-dimensional vector of the (`n_classes`-1)-simplex</span>
<span class="sd"> that yields the smallest lost. This optimization is carried out by means of a constrained search using scipy&#39;s</span>
<span class="sd"> SLSQP routine.</span>
<span class="sd"> :param loss: (callable) the function to minimize</span>
<span class="sd"> :param n_classes: (int) the number of classes, i.e., the dimensionality of the prevalence vector</span>
<span class="sd"> :return: (ndarray) the best prevalence vector found</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">scipy</span> <span class="kn">import</span> <span class="n">optimize</span>
<span class="c1"># the initial point is set as the uniform distribution</span>
<span class="n">uniform_distribution</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">full</span><span class="p">(</span><span class="n">fill_value</span><span class="o">=</span><span class="mi">1</span> <span class="o">/</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">n_classes</span><span class="p">,))</span>
<span class="c1"># solutions are bounded to those contained in the unit-simplex</span>
<span class="n">bounds</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">((</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_classes</span><span class="p">))</span> <span class="c1"># values in [0,1]</span>
<span class="n">constraints</span> <span class="o">=</span> <span class="p">({</span><span class="s1">&#39;type&#39;</span><span class="p">:</span> <span class="s1">&#39;eq&#39;</span><span class="p">,</span> <span class="s1">&#39;fun&#39;</span><span class="p">:</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="mi">1</span> <span class="o">-</span> <span class="nb">sum</span><span class="p">(</span><span class="n">x</span><span class="p">)})</span> <span class="c1"># values summing up to 1</span>
<span class="n">r</span> <span class="o">=</span> <span class="n">optimize</span><span class="o">.</span><span class="n">minimize</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">x0</span><span class="o">=</span><span class="n">uniform_distribution</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">&#39;SLSQP&#39;</span><span class="p">,</span> <span class="n">bounds</span><span class="o">=</span><span class="n">bounds</span><span class="p">,</span> <span class="n">constraints</span><span class="o">=</span><span class="n">constraints</span><span class="p">)</span>
<span class="k">return</span> <span class="n">r</span><span class="o">.</span><span class="n">x</span></div>
<div class="viewcode-block" id="linear_search">
<a class="viewcode-back" href="../../quapy.html#quapy.functional.linear_search">[docs]</a>
<span class="k">def</span> <span class="nf">linear_search</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Performs a linear search for the best prevalence value in binary problems. The search is carried out by exploring</span>
<span class="sd"> the range [0,1] stepping by 0.01. This search is inefficient, and is added only for completeness (some of the</span>
<span class="sd"> early methods in quantification literature used it, e.g., HDy). A most powerful alternative is `optim_minimize`.</span>
<span class="sd"> :param loss: (callable) the function to minimize</span>
<span class="sd"> :param n_classes: (int) the number of classes, i.e., the dimensionality of the prevalence vector</span>
<span class="sd"> :return: (ndarray) the best prevalence vector found</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">n_classes</span><span class="o">==</span><span class="mi">2</span><span class="p">,</span> <span class="s1">&#39;linear search is only available for binary problems&#39;</span>
<span class="n">prev_selected</span><span class="p">,</span> <span class="n">min_score</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span>
<span class="k">for</span> <span class="n">prev</span> <span class="ow">in</span> <span class="n">prevalence_linspace</span><span class="p">(</span><span class="n">n_prevalences</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">smooth_limits_epsilon</span><span class="o">=</span><span class="mf">0.0</span><span class="p">):</span>
<span class="n">score</span> <span class="o">=</span> <span class="n">loss</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="mi">1</span> <span class="o">-</span> <span class="n">prev</span><span class="p">,</span> <span class="n">prev</span><span class="p">]))</span>
<span class="k">if</span> <span class="n">min_score</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">score</span> <span class="o">&lt;</span> <span class="n">min_score</span><span class="p">:</span>
<span class="n">prev_selected</span><span class="p">,</span> <span class="n">min_score</span> <span class="o">=</span> <span class="n">prev</span><span class="p">,</span> <span class="n">score</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="mi">1</span> <span class="o">-</span> <span class="n">prev_selected</span><span class="p">,</span> <span class="n">prev_selected</span><span class="p">])</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,503 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.method._kdey &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.method._kdey</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.method._kdey</h1><div class="highlight"><pre>
<span></span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span>
<span class="kn">from</span> <span class="nn">sklearn.neighbors</span> <span class="kn">import</span> <span class="n">KernelDensity</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">AggregativeSoftQuantifier</span>
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
<span class="kn">from</span> <span class="nn">sklearn.metrics.pairwise</span> <span class="kn">import</span> <span class="n">rbf_kernel</span>
<div class="viewcode-block" id="KDEBase">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEBase">[docs]</a>
<span class="k">class</span> <span class="nc">KDEBase</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Common ancestor for KDE-based methods. Implements some common routines.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">BANDWIDTH_METHOD</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;scott&#39;</span><span class="p">,</span> <span class="s1">&#39;silverman&#39;</span><span class="p">]</span>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">_check_bandwidth</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">bandwidth</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Checks that the bandwidth parameter is correct</span>
<span class="sd"> :param bandwidth: either a string (see BANDWIDTH_METHOD) or a float</span>
<span class="sd"> :return: nothing, but raises an exception for invalid values</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">bandwidth</span> <span class="ow">in</span> <span class="n">KDEBase</span><span class="o">.</span><span class="n">BANDWIDTH_METHOD</span> <span class="ow">or</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">bandwidth</span><span class="p">,</span> <span class="nb">float</span><span class="p">),</span> \
<span class="sa">f</span><span class="s1">&#39;invalid bandwidth, valid ones are </span><span class="si">{</span><span class="n">KDEBase</span><span class="o">.</span><span class="n">BANDWIDTH_METHOD</span><span class="si">}</span><span class="s1"> or float values&#39;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">bandwidth</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
<span class="k">assert</span> <span class="mi">0</span> <span class="o">&lt;</span> <span class="n">bandwidth</span> <span class="o">&lt;</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;the bandwith for KDEy should be in (0,1), since this method models the unit simplex&quot;</span>
<div class="viewcode-block" id="KDEBase.get_kde_function">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEBase.get_kde_function">[docs]</a>
<span class="k">def</span> <span class="nf">get_kde_function</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">bandwidth</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Wraps the KDE function from scikit-learn.</span>
<span class="sd"> :param X: data for which the density function is to be estimated</span>
<span class="sd"> :param bandwidth: the bandwidth of the kernel</span>
<span class="sd"> :return: a scikit-learn&#39;s KernelDensity object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">KernelDensity</span><span class="p">(</span><span class="n">bandwidth</span><span class="o">=</span><span class="n">bandwidth</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">)</span></div>
<div class="viewcode-block" id="KDEBase.pdf">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEBase.pdf">[docs]</a>
<span class="k">def</span> <span class="nf">pdf</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">kde</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Wraps the density evalution of scikit-learn&#39;s KDE. Scikit-learn returns log-scores (s), so this</span>
<span class="sd"> function returns :math:`e^{s}`</span>
<span class="sd"> :param kde: a previously fit KDE function</span>
<span class="sd"> :param X: the data for which the density is to be estimated</span>
<span class="sd"> :return: np.ndarray with the densities</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="n">kde</span><span class="o">.</span><span class="n">score_samples</span><span class="p">(</span><span class="n">X</span><span class="p">))</span></div>
<div class="viewcode-block" id="KDEBase.get_mixture_components">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEBase.get_mixture_components">[docs]</a>
<span class="k">def</span> <span class="nf">get_mixture_components</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">bandwidth</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns an array containing the mixture components, i.e., the KDE functions for each class.</span>
<span class="sd"> :param X: the data containing the covariates</span>
<span class="sd"> :param y: the class labels</span>
<span class="sd"> :param n_classes: integer, the number of classes</span>
<span class="sd"> :param bandwidth: float, the bandwidth of the kernel</span>
<span class="sd"> :return: a list of KernelDensity objects, each fitted with the corresponding class-specific covariates</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">get_kde_function</span><span class="p">(</span><span class="n">X</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="n">cat</span><span class="p">],</span> <span class="n">bandwidth</span><span class="p">)</span> <span class="k">for</span> <span class="n">cat</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_classes</span><span class="p">)]</span></div>
</div>
<div class="viewcode-block" id="KDEyML">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyML">[docs]</a>
<span class="k">class</span> <span class="nc">KDEyML</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">,</span> <span class="n">KDEBase</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Kernel Density Estimation model for quantification (KDEy) relying on the Kullback-Leibler divergence (KLD) as</span>
<span class="sd"> the divergence measure to be minimized. This method was first proposed in the paper</span>
<span class="sd"> `Kernel Density Estimation for Multiclass Quantification &lt;https://arxiv.org/abs/2401.00490&gt;`_, in which</span>
<span class="sd"> the authors show that minimizing the distribution mathing criterion for KLD is akin to performing</span>
<span class="sd"> maximum likelihood (ML).</span>
<span class="sd"> The distribution matching optimization problem comes down to solving:</span>
<span class="sd"> :math:`\\hat{\\alpha} = \\arg\\min_{\\alpha\\in\\Delta^{n-1}} \\mathcal{D}(\\boldsymbol{p}_{\\alpha}||q_{\\widetilde{U}})`</span>
<span class="sd"> where :math:`p_{\\alpha}` is the mixture of class-specific KDEs with mixture parameter (hence class prevalence)</span>
<span class="sd"> :math:`\\alpha` defined by</span>
<span class="sd"> :math:`\\boldsymbol{p}_{\\alpha}(\\widetilde{x}) = \\sum_{i=1}^n \\alpha_i p_{\\widetilde{L}_i}(\\widetilde{x})`</span>
<span class="sd"> where :math:`p_X(\\boldsymbol{x}) = \\frac{1}{|X|} \\sum_{x_i\\in X} K\\left(\\frac{x-x_i}{h}\\right)` is the</span>
<span class="sd"> KDE function that uses the datapoints in X as the kernel centers.</span>
<span class="sd"> In KDEy-ML, the divergence is taken to be the Kullback-Leibler Divergence. This is equivalent to solving:</span>
<span class="sd"> :math:`\\hat{\\alpha} = \\arg\\min_{\\alpha\\in\\Delta^{n-1}} -</span>
<span class="sd"> \\mathbb{E}_{q_{\\widetilde{U}}} \\left[ \\log \\boldsymbol{p}_{\\alpha}(\\widetilde{x}) \\right]`</span>
<span class="sd"> which corresponds to the maximum likelihood estimate.</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a binary classifier.</span>
<span class="sd"> :param val_split: specifies the data used for generating classifier predictions. This specification</span>
<span class="sd"> can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to</span>
<span class="sd"> be extracted from the training set; or as an integer (default 5), indicating that the predictions</span>
<span class="sd"> are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value</span>
<span class="sd"> for `k`); or as a collection defining the specific set of data to use for validation.</span>
<span class="sd"> Alternatively, this set can be specified at fit time by indicating the exact set of data</span>
<span class="sd"> on which the predictions are to be generated.</span>
<span class="sd"> :param bandwidth: float, the bandwidth of the Kernel</span>
<span class="sd"> :param n_jobs: number of parallel workers</span>
<span class="sd"> :param random_state: a seed to be set before fitting any base quantifier (default None)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">bandwidth</span><span class="o">=</span><span class="mf">0.1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_check_bandwidth</span><span class="p">(</span><span class="n">bandwidth</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span> <span class="o">=</span> <span class="n">bandwidth</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span>
<div class="viewcode-block" id="KDEyML.aggregation_fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyML.aggregation_fit">[docs]</a>
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_mixture_components</span><span class="p">(</span><span class="o">*</span><span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="KDEyML.aggregate">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyML.aggregate">[docs]</a>
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Searches for the mixture model parameter (the sought prevalence values) that maximizes the likelihood</span>
<span class="sd"> of the data (i.e., that minimizes the negative log-likelihood)</span>
<span class="sd"> :param posteriors: instances in the sample converted into posterior probabilities</span>
<span class="sd"> :return: a vector of class prevalence estimates</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">RandomState</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">)</span>
<span class="n">epsilon</span> <span class="o">=</span> <span class="mf">1e-10</span>
<span class="n">n_classes</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span><span class="p">)</span>
<span class="n">test_densities</span> <span class="o">=</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">pdf</span><span class="p">(</span><span class="n">kde_i</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">)</span> <span class="k">for</span> <span class="n">kde_i</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span><span class="p">]</span>
<span class="k">def</span> <span class="nf">neg_loglikelihood</span><span class="p">(</span><span class="n">prev</span><span class="p">):</span>
<span class="n">test_mixture_likelihood</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="n">prev_i</span> <span class="o">*</span> <span class="n">dens_i</span> <span class="k">for</span> <span class="n">prev_i</span><span class="p">,</span> <span class="n">dens_i</span> <span class="ow">in</span> <span class="nb">zip</span> <span class="p">(</span><span class="n">prev</span><span class="p">,</span> <span class="n">test_densities</span><span class="p">))</span>
<span class="n">test_loglikelihood</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">test_mixture_likelihood</span> <span class="o">+</span> <span class="n">epsilon</span><span class="p">)</span>
<span class="k">return</span> <span class="o">-</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">test_loglikelihood</span><span class="p">)</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">optim_minimize</span><span class="p">(</span><span class="n">neg_loglikelihood</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="KDEyHD">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyHD">[docs]</a>
<span class="k">class</span> <span class="nc">KDEyHD</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">,</span> <span class="n">KDEBase</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Kernel Density Estimation model for quantification (KDEy) relying on the squared Hellinger Disntace (HD) as</span>
<span class="sd"> the divergence measure to be minimized. This method was first proposed in the paper</span>
<span class="sd"> `Kernel Density Estimation for Multiclass Quantification &lt;https://arxiv.org/abs/2401.00490&gt;`_, in which</span>
<span class="sd"> the authors proposed a Monte Carlo approach for minimizing the divergence.</span>
<span class="sd"> The distribution matching optimization problem comes down to solving:</span>
<span class="sd"> :math:`\\hat{\\alpha} = \\arg\\min_{\\alpha\\in\\Delta^{n-1}} \\mathcal{D}(\\boldsymbol{p}_{\\alpha}||q_{\\widetilde{U}})`</span>
<span class="sd"> where :math:`p_{\\alpha}` is the mixture of class-specific KDEs with mixture parameter (hence class prevalence)</span>
<span class="sd"> :math:`\\alpha` defined by</span>
<span class="sd"> :math:`\\boldsymbol{p}_{\\alpha}(\\widetilde{x}) = \\sum_{i=1}^n \\alpha_i p_{\\widetilde{L}_i}(\\widetilde{x})`</span>
<span class="sd"> where :math:`p_X(\\boldsymbol{x}) = \\frac{1}{|X|} \\sum_{x_i\\in X} K\\left(\\frac{x-x_i}{h}\\right)` is the</span>
<span class="sd"> KDE function that uses the datapoints in X as the kernel centers.</span>
<span class="sd"> In KDEy-HD, the divergence is taken to be the squared Hellinger Distance, an f-divergence with corresponding</span>
<span class="sd"> f-generator function given by:</span>
<span class="sd"> :math:`f(u)=(\\sqrt{u}-1)^2`</span>
<span class="sd"> The authors proposed a Monte Carlo solution that relies on importance sampling:</span>
<span class="sd"> :math:`\\hat{D}_f(p||q)= \\frac{1}{t} \\sum_{i=1}^t f\\left(\\frac{p(x_i)}{q(x_i)}\\right) \\frac{q(x_i)}{r(x_i)}`</span>
<span class="sd"> where the datapoints (trials) :math:`x_1,\\ldots,x_t\\sim_{\\mathrm{iid}} r` with :math:`r` the</span>
<span class="sd"> uniform distribution.</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a binary classifier.</span>
<span class="sd"> :param val_split: specifies the data used for generating classifier predictions. This specification</span>
<span class="sd"> can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to</span>
<span class="sd"> be extracted from the training set; or as an integer (default 5), indicating that the predictions</span>
<span class="sd"> are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value</span>
<span class="sd"> for `k`); or as a collection defining the specific set of data to use for validation.</span>
<span class="sd"> Alternatively, this set can be specified at fit time by indicating the exact set of data</span>
<span class="sd"> on which the predictions are to be generated.</span>
<span class="sd"> :param bandwidth: float, the bandwidth of the Kernel</span>
<span class="sd"> :param n_jobs: number of parallel workers</span>
<span class="sd"> :param random_state: a seed to be set before fitting any base quantifier (default None)</span>
<span class="sd"> :param montecarlo_trials: number of Monte Carlo trials (default 10000)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">divergence</span><span class="p">:</span> <span class="nb">str</span><span class="o">=</span><span class="s1">&#39;HD&#39;</span><span class="p">,</span>
<span class="n">bandwidth</span><span class="o">=</span><span class="mf">0.1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">montecarlo_trials</span><span class="o">=</span><span class="mi">10000</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_check_bandwidth</span><span class="p">(</span><span class="n">bandwidth</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<span class="bp">self</span><span class="o">.</span><span class="n">divergence</span> <span class="o">=</span> <span class="n">divergence</span>
<span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span> <span class="o">=</span> <span class="n">bandwidth</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span>
<span class="bp">self</span><span class="o">.</span><span class="n">montecarlo_trials</span> <span class="o">=</span> <span class="n">montecarlo_trials</span>
<div class="viewcode-block" id="KDEyHD.aggregation_fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyHD.aggregation_fit">[docs]</a>
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_mixture_components</span><span class="p">(</span><span class="o">*</span><span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span><span class="p">)</span>
<span class="n">N</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">montecarlo_trials</span>
<span class="n">rs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">random_state</span>
<span class="n">n</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">n_classes</span>
<span class="bp">self</span><span class="o">.</span><span class="n">reference_samples</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">vstack</span><span class="p">([</span><span class="n">kde_i</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">N</span><span class="o">//</span><span class="n">n</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">rs</span><span class="p">)</span> <span class="k">for</span> <span class="n">kde_i</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">reference_classwise_densities</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">pdf</span><span class="p">(</span><span class="n">kde_j</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">reference_samples</span><span class="p">)</span> <span class="k">for</span> <span class="n">kde_j</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">reference_density</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">reference_classwise_densities</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span> <span class="c1"># equiv. to (uniform @ self.reference_classwise_densities)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="KDEyHD.aggregate">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyHD.aggregate">[docs]</a>
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
<span class="c1"># we retain all n*N examples (sampled from a mixture with uniform parameter), and then</span>
<span class="c1"># apply importance sampling (IS). In this version we compute D(p_alpha||q) with IS</span>
<span class="n">n_classes</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span><span class="p">)</span>
<span class="n">test_kde</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_kde_function</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span><span class="p">)</span>
<span class="n">test_densities</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">pdf</span><span class="p">(</span><span class="n">test_kde</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">reference_samples</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">f_squared_hellinger</span><span class="p">(</span><span class="n">u</span><span class="p">):</span>
<span class="k">return</span> <span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">u</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">**</span><span class="mi">2</span>
<span class="c1"># todo: this will fail when self.divergence is a callable, and is not the right place to do it anyway</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">divergence</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="o">==</span> <span class="s1">&#39;hd&#39;</span><span class="p">:</span>
<span class="n">f</span> <span class="o">=</span> <span class="n">f_squared_hellinger</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;only squared HD is currently implemented&#39;</span><span class="p">)</span>
<span class="n">epsilon</span> <span class="o">=</span> <span class="mf">1e-10</span>
<span class="n">qs</span> <span class="o">=</span> <span class="n">test_densities</span> <span class="o">+</span> <span class="n">epsilon</span>
<span class="n">rs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">reference_density</span> <span class="o">+</span> <span class="n">epsilon</span>
<span class="n">iw</span> <span class="o">=</span> <span class="n">qs</span><span class="o">/</span><span class="n">rs</span> <span class="c1">#importance weights</span>
<span class="n">p_class</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">reference_classwise_densities</span> <span class="o">+</span> <span class="n">epsilon</span>
<span class="n">fracs</span> <span class="o">=</span> <span class="n">p_class</span><span class="o">/</span><span class="n">qs</span>
<span class="k">def</span> <span class="nf">divergence</span><span class="p">(</span><span class="n">prev</span><span class="p">):</span>
<span class="c1"># ps / qs = (prev @ p_class) / qs = prev @ (p_class / qs) = prev @ fracs</span>
<span class="n">ps_div_qs</span> <span class="o">=</span> <span class="n">prev</span> <span class="o">@</span> <span class="n">fracs</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span> <span class="n">f</span><span class="p">(</span><span class="n">ps_div_qs</span><span class="p">)</span> <span class="o">*</span> <span class="n">iw</span> <span class="p">)</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">optim_minimize</span><span class="p">(</span><span class="n">divergence</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="KDEyCS">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyCS">[docs]</a>
<span class="k">class</span> <span class="nc">KDEyCS</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Kernel Density Estimation model for quantification (KDEy) relying on the Cauchy-Schwarz divergence (CS) as</span>
<span class="sd"> the divergence measure to be minimized. This method was first proposed in the paper</span>
<span class="sd"> `Kernel Density Estimation for Multiclass Quantification &lt;https://arxiv.org/abs/2401.00490&gt;`_, in which</span>
<span class="sd"> the authors proposed a Monte Carlo approach for minimizing the divergence.</span>
<span class="sd"> The distribution matching optimization problem comes down to solving:</span>
<span class="sd"> :math:`\\hat{\\alpha} = \\arg\\min_{\\alpha\\in\\Delta^{n-1}} \\mathcal{D}(\\boldsymbol{p}_{\\alpha}||q_{\\widetilde{U}})`</span>
<span class="sd"> where :math:`p_{\\alpha}` is the mixture of class-specific KDEs with mixture parameter (hence class prevalence)</span>
<span class="sd"> :math:`\\alpha` defined by</span>
<span class="sd"> :math:`\\boldsymbol{p}_{\\alpha}(\\widetilde{x}) = \\sum_{i=1}^n \\alpha_i p_{\\widetilde{L}_i}(\\widetilde{x})`</span>
<span class="sd"> where :math:`p_X(\\boldsymbol{x}) = \\frac{1}{|X|} \\sum_{x_i\\in X} K\\left(\\frac{x-x_i}{h}\\right)` is the</span>
<span class="sd"> KDE function that uses the datapoints in X as the kernel centers.</span>
<span class="sd"> In KDEy-CS, the divergence is taken to be the Cauchy-Schwarz divergence given by:</span>
<span class="sd"> :math:`\\mathcal{D}_{\\mathrm{CS}}(p||q)=-\\log\\left(\\frac{\\int p(x)q(x)dx}{\\sqrt{\\int p(x)^2dx \\int q(x)^2dx}}\\right)`</span>
<span class="sd"> The authors showed that this distribution matching admits a closed-form solution</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a binary classifier.</span>
<span class="sd"> :param val_split: specifies the data used for generating classifier predictions. This specification</span>
<span class="sd"> can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to</span>
<span class="sd"> be extracted from the training set; or as an integer (default 5), indicating that the predictions</span>
<span class="sd"> are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value</span>
<span class="sd"> for `k`); or as a collection defining the specific set of data to use for validation.</span>
<span class="sd"> Alternatively, this set can be specified at fit time by indicating the exact set of data</span>
<span class="sd"> on which the predictions are to be generated.</span>
<span class="sd"> :param bandwidth: float, the bandwidth of the Kernel</span>
<span class="sd"> :param n_jobs: number of parallel workers</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">bandwidth</span><span class="o">=</span><span class="mf">0.1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="n">KDEBase</span><span class="o">.</span><span class="n">_check_bandwidth</span><span class="p">(</span><span class="n">bandwidth</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span> <span class="o">=</span> <span class="n">bandwidth</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
<div class="viewcode-block" id="KDEyCS.gram_matrix_mix_sum">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyCS.gram_matrix_mix_sum">[docs]</a>
<span class="k">def</span> <span class="nf">gram_matrix_mix_sum</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">Y</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="c1"># this adapts the output of the rbf_kernel function (pairwise evaluations of Gaussian kernels k(x,y))</span>
<span class="c1"># to contain pairwise evaluations of N(x|mu,Sigma1+Sigma2) with mu=y and Sigma1 and Sigma2 are </span>
<span class="c1"># two &quot;scalar matrices&quot; (h^2)*I each, so Sigma1+Sigma2 has scalar 2(h^2) (h is the bandwidth)</span>
<span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span>
<span class="n">variance</span> <span class="o">=</span> <span class="mi">2</span> <span class="o">*</span> <span class="p">(</span><span class="n">h</span><span class="o">**</span><span class="mi">2</span><span class="p">)</span>
<span class="n">nD</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="n">gamma</span> <span class="o">=</span> <span class="mi">1</span><span class="o">/</span><span class="p">(</span><span class="mi">2</span><span class="o">*</span><span class="n">variance</span><span class="p">)</span>
<span class="n">norm_factor</span> <span class="o">=</span> <span class="mi">1</span><span class="o">/</span><span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(((</span><span class="mi">2</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">pi</span><span class="p">)</span><span class="o">**</span><span class="n">nD</span><span class="p">)</span> <span class="o">*</span> <span class="p">(</span><span class="n">variance</span><span class="o">**</span><span class="p">(</span><span class="n">nD</span><span class="p">)))</span>
<span class="n">gram</span> <span class="o">=</span> <span class="n">norm_factor</span> <span class="o">*</span> <span class="n">rbf_kernel</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">Y</span><span class="p">,</span> <span class="n">gamma</span><span class="o">=</span><span class="n">gamma</span><span class="p">)</span>
<span class="k">return</span> <span class="n">gram</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span></div>
<div class="viewcode-block" id="KDEyCS.aggregation_fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyCS.aggregation_fit">[docs]</a>
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="n">P</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
<span class="n">n</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">n_classes</span>
<span class="k">assert</span> <span class="nb">all</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">))</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="n">n</span><span class="p">)),</span> \
<span class="s1">&#39;label name gaps not allowed in current implementation&#39;</span>
<span class="c1"># counts_inv keeps track of the relative weight of each datapoint within its class</span>
<span class="c1"># (i.e., the weight in its KDE model)</span>
<span class="n">counts_inv</span> <span class="o">=</span> <span class="mi">1</span> <span class="o">/</span> <span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">counts</span><span class="p">())</span>
<span class="c1"># tr_tr_sums corresponds to symbol \overline{B} in the paper</span>
<span class="n">tr_tr_sums</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">n</span><span class="p">,</span><span class="n">n</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="k">if</span> <span class="n">i</span> <span class="o">&gt;</span> <span class="n">j</span><span class="p">:</span>
<span class="n">tr_tr_sums</span><span class="p">[</span><span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="n">tr_tr_sums</span><span class="p">[</span><span class="n">j</span><span class="p">,</span><span class="n">i</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">block</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">gram_matrix_mix_sum</span><span class="p">(</span><span class="n">P</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="n">i</span><span class="p">],</span> <span class="n">P</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="n">j</span><span class="p">]</span> <span class="k">if</span> <span class="n">i</span><span class="o">!=</span><span class="n">j</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="n">tr_tr_sums</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="n">block</span>
<span class="c1"># keep track of these data structures for the test phase</span>
<span class="bp">self</span><span class="o">.</span><span class="n">Ptr</span> <span class="o">=</span> <span class="n">P</span>
<span class="bp">self</span><span class="o">.</span><span class="n">ytr</span> <span class="o">=</span> <span class="n">y</span>
<span class="bp">self</span><span class="o">.</span><span class="n">tr_tr_sums</span> <span class="o">=</span> <span class="n">tr_tr_sums</span>
<span class="bp">self</span><span class="o">.</span><span class="n">counts_inv</span> <span class="o">=</span> <span class="n">counts_inv</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="KDEyCS.aggregate">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyCS.aggregate">[docs]</a>
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
<span class="n">Ptr</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">Ptr</span>
<span class="n">Pte</span> <span class="o">=</span> <span class="n">posteriors</span>
<span class="n">y</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ytr</span>
<span class="n">tr_tr_sums</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tr_tr_sums</span>
<span class="n">M</span><span class="p">,</span> <span class="n">nD</span> <span class="o">=</span> <span class="n">Pte</span><span class="o">.</span><span class="n">shape</span>
<span class="n">Minv</span> <span class="o">=</span> <span class="p">(</span><span class="mi">1</span><span class="o">/</span><span class="n">M</span><span class="p">)</span> <span class="c1"># t in the paper</span>
<span class="n">n</span> <span class="o">=</span> <span class="n">Ptr</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="c1"># becomes a constant that does not affect the optimization, no need to compute it</span>
<span class="c1"># partC = 0.5*np.log(self.gram_matrix_mix_sum(Pte) * Kinv * Kinv)</span>
<span class="c1"># tr_te_sums corresponds to \overline{a}*(1/Li)*(1/M) in the paper (note the constants</span>
<span class="c1"># are already aggregated to tr_te_sums, so these multiplications are not carried out</span>
<span class="c1"># at each iteration of the optimization phase)</span>
<span class="n">tr_te_sums</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">n</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="n">tr_te_sums</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">gram_matrix_mix_sum</span><span class="p">(</span><span class="n">Ptr</span><span class="p">[</span><span class="n">y</span><span class="o">==</span><span class="n">i</span><span class="p">],</span> <span class="n">Pte</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">divergence</span><span class="p">(</span><span class="n">alpha</span><span class="p">):</span>
<span class="c1"># called \overline{r} in the paper</span>
<span class="n">alpha_ratio</span> <span class="o">=</span> <span class="n">alpha</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">counts_inv</span>
<span class="c1"># recal that tr_te_sums already accounts for the constant terms (1/Li)*(1/M)</span>
<span class="n">partA</span> <span class="o">=</span> <span class="o">-</span><span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">((</span><span class="n">alpha_ratio</span> <span class="o">@</span> <span class="n">tr_te_sums</span><span class="p">)</span> <span class="o">*</span> <span class="n">Minv</span><span class="p">)</span>
<span class="n">partB</span> <span class="o">=</span> <span class="mf">0.5</span> <span class="o">*</span> <span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">alpha_ratio</span> <span class="o">@</span> <span class="n">tr_tr_sums</span> <span class="o">@</span> <span class="n">alpha_ratio</span><span class="p">)</span>
<span class="k">return</span> <span class="n">partA</span> <span class="o">+</span> <span class="n">partB</span> <span class="c1">#+ partC</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">optim_minimize</span><span class="p">(</span><span class="n">divergence</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span></div>
</div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,549 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.method._neural &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.method._neural</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.method._neural</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">os</span>
<span class="kn">from</span> <span class="nn">pathlib</span> <span class="kn">import</span> <span class="n">Path</span>
<span class="kn">import</span> <span class="nn">random</span>
<span class="kn">import</span> <span class="nn">torch</span>
<span class="kn">from</span> <span class="nn">torch.nn</span> <span class="kn">import</span> <span class="n">MSELoss</span>
<span class="kn">from</span> <span class="nn">torch.nn.functional</span> <span class="kn">import</span> <span class="n">relu</span>
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">UPP</span>
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="o">*</span>
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">EarlyStop</span>
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
<div class="viewcode-block" id="QuaNetTrainer">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer">[docs]</a>
<span class="k">class</span> <span class="nc">QuaNetTrainer</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Implementation of `QuaNet &lt;https://dl.acm.org/doi/abs/10.1145/3269206.3269287&gt;`_, a neural network for</span>
<span class="sd"> quantification. This implementation uses `PyTorch &lt;https://pytorch.org/&gt;`_ and can take advantage of GPU</span>
<span class="sd"> for speeding-up the training phase.</span>
<span class="sd"> Example:</span>
<span class="sd"> &gt;&gt;&gt; import quapy as qp</span>
<span class="sd"> &gt;&gt;&gt; from quapy.method.meta import QuaNet</span>
<span class="sd"> &gt;&gt;&gt; from quapy.classification.neural import NeuralClassifierTrainer, CNNnet</span>
<span class="sd"> &gt;&gt;&gt;</span>
<span class="sd"> &gt;&gt;&gt; # use samples of 100 elements</span>
<span class="sd"> &gt;&gt;&gt; qp.environ[&#39;SAMPLE_SIZE&#39;] = 100</span>
<span class="sd"> &gt;&gt;&gt;</span>
<span class="sd"> &gt;&gt;&gt; # load the kindle dataset as text, and convert words to numerical indexes</span>
<span class="sd"> &gt;&gt;&gt; dataset = qp.datasets.fetch_reviews(&#39;kindle&#39;, pickle=True)</span>
<span class="sd"> &gt;&gt;&gt; qp.train.preprocessing.index(dataset, min_df=5, inplace=True)</span>
<span class="sd"> &gt;&gt;&gt;</span>
<span class="sd"> &gt;&gt;&gt; # the text classifier is a CNN trained by NeuralClassifierTrainer</span>
<span class="sd"> &gt;&gt;&gt; cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes)</span>
<span class="sd"> &gt;&gt;&gt; classifier = NeuralClassifierTrainer(cnn, device=&#39;cuda&#39;)</span>
<span class="sd"> &gt;&gt;&gt;</span>
<span class="sd"> &gt;&gt;&gt; # train QuaNet (QuaNet is an alias to QuaNetTrainer)</span>
<span class="sd"> &gt;&gt;&gt; model = QuaNet(classifier, qp.environ[&#39;SAMPLE_SIZE&#39;], device=&#39;cuda&#39;)</span>
<span class="sd"> &gt;&gt;&gt; model.fit(dataset.training)</span>
<span class="sd"> &gt;&gt;&gt; estim_prevalence = model.quantify(dataset.test.instances)</span>
<span class="sd"> :param classifier: an object implementing `fit` (i.e., that can be trained on labelled data),</span>
<span class="sd"> `predict_proba` (i.e., that can generate posterior probabilities of unlabelled examples) and</span>
<span class="sd"> `transform` (i.e., that can generate embedded representations of the unlabelled instances).</span>
<span class="sd"> :param sample_size: integer, the sample size; default is None, meaning that the sample size should be</span>
<span class="sd"> taken from qp.environ[&quot;SAMPLE_SIZE&quot;]</span>
<span class="sd"> :param n_epochs: integer, maximum number of training epochs</span>
<span class="sd"> :param tr_iter_per_poch: integer, number of training iterations before considering an epoch complete</span>
<span class="sd"> :param va_iter_per_poch: integer, number of validation iterations to perform after each epoch</span>
<span class="sd"> :param lr: float, the learning rate</span>
<span class="sd"> :param lstm_hidden_size: integer, hidden dimensionality of the LSTM cells</span>
<span class="sd"> :param lstm_nlayers: integer, number of LSTM layers</span>
<span class="sd"> :param ff_layers: list of integers, dimensions of the densely-connected FF layers on top of the</span>
<span class="sd"> quantification embedding</span>
<span class="sd"> :param bidirectional: boolean, indicates whether the LSTM is bidirectional or not</span>
<span class="sd"> :param qdrop_p: float, dropout probability</span>
<span class="sd"> :param patience: integer, number of epochs showing no improvement in the validation set before stopping the</span>
<span class="sd"> training phase (early stopping)</span>
<span class="sd"> :param checkpointdir: string, a path where to store models&#39; checkpoints</span>
<span class="sd"> :param checkpointname: string (optional), the name of the model&#39;s checkpoint</span>
<span class="sd"> :param device: string, indicate &quot;cpu&quot; or &quot;cuda&quot;</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">classifier</span><span class="p">,</span>
<span class="n">sample_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">n_epochs</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
<span class="n">tr_iter_per_poch</span><span class="o">=</span><span class="mi">500</span><span class="p">,</span>
<span class="n">va_iter_per_poch</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
<span class="n">lr</span><span class="o">=</span><span class="mf">1e-3</span><span class="p">,</span>
<span class="n">lstm_hidden_size</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span>
<span class="n">lstm_nlayers</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
<span class="n">ff_layers</span><span class="o">=</span><span class="p">[</span><span class="mi">1024</span><span class="p">,</span> <span class="mi">512</span><span class="p">],</span>
<span class="n">bidirectional</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">qdrop_p</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span>
<span class="n">patience</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
<span class="n">checkpointdir</span><span class="o">=</span><span class="s1">&#39;../checkpoint&#39;</span><span class="p">,</span>
<span class="n">checkpointname</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">device</span><span class="o">=</span><span class="s1">&#39;cuda&#39;</span><span class="p">):</span>
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="s1">&#39;transform&#39;</span><span class="p">),</span> \
<span class="sa">f</span><span class="s1">&#39;the classifier </span><span class="si">{</span><span class="n">classifier</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> does not seem to be able to produce document embeddings &#39;</span> \
<span class="sa">f</span><span class="s1">&#39;since it does not implement the method &quot;transform&quot;&#39;</span>
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="s1">&#39;predict_proba&#39;</span><span class="p">),</span> \
<span class="sa">f</span><span class="s1">&#39;the classifier </span><span class="si">{</span><span class="n">classifier</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> does not seem to be able to produce posterior probabilities &#39;</span> \
<span class="sa">f</span><span class="s1">&#39;since it does not implement the method &quot;predict_proba&quot;&#39;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_epochs</span> <span class="o">=</span> <span class="n">n_epochs</span>
<span class="bp">self</span><span class="o">.</span><span class="n">tr_iter</span> <span class="o">=</span> <span class="n">tr_iter_per_poch</span>
<span class="bp">self</span><span class="o">.</span><span class="n">va_iter</span> <span class="o">=</span> <span class="n">va_iter_per_poch</span>
<span class="bp">self</span><span class="o">.</span><span class="n">lr</span> <span class="o">=</span> <span class="n">lr</span>
<span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;lstm_hidden_size&#39;</span><span class="p">:</span> <span class="n">lstm_hidden_size</span><span class="p">,</span>
<span class="s1">&#39;lstm_nlayers&#39;</span><span class="p">:</span> <span class="n">lstm_nlayers</span><span class="p">,</span>
<span class="s1">&#39;ff_layers&#39;</span><span class="p">:</span> <span class="n">ff_layers</span><span class="p">,</span>
<span class="s1">&#39;bidirectional&#39;</span><span class="p">:</span> <span class="n">bidirectional</span><span class="p">,</span>
<span class="s1">&#39;qdrop_p&#39;</span><span class="p">:</span> <span class="n">qdrop_p</span>
<span class="p">}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">patience</span> <span class="o">=</span> <span class="n">patience</span>
<span class="k">if</span> <span class="n">checkpointname</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">local_random</span> <span class="o">=</span> <span class="n">random</span><span class="o">.</span><span class="n">Random</span><span class="p">()</span>
<span class="n">random_code</span> <span class="o">=</span> <span class="s1">&#39;-&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">local_random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1000000</span><span class="p">))</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">5</span><span class="p">))</span>
<span class="n">checkpointname</span> <span class="o">=</span> <span class="s1">&#39;QuaNet-&#39;</span><span class="o">+</span><span class="n">random_code</span>
<span class="bp">self</span><span class="o">.</span><span class="n">checkpointdir</span> <span class="o">=</span> <span class="n">checkpointdir</span>
<span class="bp">self</span><span class="o">.</span><span class="n">checkpoint</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">checkpointdir</span><span class="p">,</span> <span class="n">checkpointname</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">device</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">__check_params_colision</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">())</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span> <span class="o">=</span> <span class="kc">None</span>
<div class="viewcode-block" id="QuaNetTrainer.fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.fit">[docs]</a>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Trains QuaNet.</span>
<span class="sd"> :param data: the training data on which to train QuaNet. If `fit_classifier=True`, the data will be split in</span>
<span class="sd"> 40/40/20 for training the classifier, training QuaNet, and validating QuaNet, respectively. If</span>
<span class="sd"> `fit_classifier=False`, the data will be split in 66/34 for training QuaNet and validating it, respectively.</span>
<span class="sd"> :param fit_classifier: if True, trains the classifier on a split containing 40% of the data</span>
<span class="sd"> :return: self</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">classes_</span>
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">checkpointdir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">if</span> <span class="n">fit_classifier</span><span class="p">:</span>
<span class="n">classifier_data</span><span class="p">,</span> <span class="n">unused_data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.4</span><span class="p">)</span>
<span class="n">train_data</span><span class="p">,</span> <span class="n">valid_data</span> <span class="o">=</span> <span class="n">unused_data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.66</span><span class="p">)</span> <span class="c1"># 0.66 split of 60% makes 40% and 20%</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="o">*</span><span class="n">classifier_data</span><span class="o">.</span><span class="n">Xy</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">classifier_data</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">train_data</span><span class="p">,</span> <span class="n">valid_data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.66</span><span class="p">)</span>
<span class="c1"># estimate the hard and soft stats tpr and fpr of the classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">tr_prev</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
<span class="c1"># compute the posterior probabilities of the instances</span>
<span class="n">valid_posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">valid_data</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="n">train_posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">train_data</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="c1"># turn instances&#39; original representations into embeddings</span>
<span class="n">valid_data_embed</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">valid_data</span><span class="o">.</span><span class="n">instances</span><span class="p">),</span> <span class="n">valid_data</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span><span class="p">)</span>
<span class="n">train_data_embed</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">train_data</span><span class="o">.</span><span class="n">instances</span><span class="p">),</span> <span class="n">train_data</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">quantifiers</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;cc&#39;</span><span class="p">:</span> <span class="n">CC</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
<span class="s1">&#39;acc&#39;</span><span class="p">:</span> <span class="n">ACC</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="n">valid_data</span><span class="p">),</span>
<span class="s1">&#39;pcc&#39;</span><span class="p">:</span> <span class="n">PCC</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
<span class="s1">&#39;pacc&#39;</span><span class="p">:</span> <span class="n">PACC</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="n">valid_data</span><span class="p">),</span>
<span class="p">}</span>
<span class="k">if</span> <span class="n">classifier_data</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">quantifiers</span><span class="p">[</span><span class="s1">&#39;emq&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">EMQ</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">classifier_data</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">status</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;tr-loss&#39;</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span>
<span class="s1">&#39;va-loss&#39;</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span>
<span class="s1">&#39;tr-mae&#39;</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span>
<span class="s1">&#39;va-mae&#39;</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span>
<span class="p">}</span>
<span class="n">nQ</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quantifiers</span><span class="p">)</span>
<span class="n">nC</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">n_classes</span>
<span class="bp">self</span><span class="o">.</span><span class="n">quanet</span> <span class="o">=</span> <span class="n">QuaNetModule</span><span class="p">(</span>
<span class="n">doc_embedding_size</span><span class="o">=</span><span class="n">train_data_embed</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span>
<span class="n">n_classes</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span>
<span class="n">stats_size</span><span class="o">=</span><span class="n">nQ</span><span class="o">*</span><span class="n">nC</span><span class="p">,</span>
<span class="n">order_by</span><span class="o">=</span><span class="mi">0</span> <span class="k">if</span> <span class="n">data</span><span class="o">.</span><span class="n">binary</span> <span class="k">else</span> <span class="kc">None</span><span class="p">,</span>
<span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span>
<span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">Adam</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">parameters</span><span class="p">(),</span> <span class="n">lr</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">lr</span><span class="p">)</span>
<span class="n">early_stop</span> <span class="o">=</span> <span class="n">EarlyStop</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">patience</span><span class="p">,</span> <span class="n">lower_is_better</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">checkpoint</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">checkpoint</span>
<span class="k">for</span> <span class="n">epoch_i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_epochs</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_epoch</span><span class="p">(</span><span class="n">train_data_embed</span><span class="p">,</span> <span class="n">train_posteriors</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tr_iter</span><span class="p">,</span> <span class="n">epoch_i</span><span class="p">,</span> <span class="n">early_stop</span><span class="p">,</span> <span class="n">train</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_epoch</span><span class="p">(</span><span class="n">valid_data_embed</span><span class="p">,</span> <span class="n">valid_posteriors</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">va_iter</span><span class="p">,</span> <span class="n">epoch_i</span><span class="p">,</span> <span class="n">early_stop</span><span class="p">,</span> <span class="n">train</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="n">early_stop</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">&#39;va-loss&#39;</span><span class="p">],</span> <span class="n">epoch_i</span><span class="p">)</span>
<span class="k">if</span> <span class="n">early_stop</span><span class="o">.</span><span class="n">IMPROVED</span><span class="p">:</span>
<span class="n">torch</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">state_dict</span><span class="p">(),</span> <span class="n">checkpoint</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">early_stop</span><span class="o">.</span><span class="n">STOP</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;training ended by patience exhausted; loading best model parameters in </span><span class="si">{</span><span class="n">checkpoint</span><span class="si">}</span><span class="s1"> &#39;</span>
<span class="sa">f</span><span class="s1">&#39;for epoch </span><span class="si">{</span><span class="n">early_stop</span><span class="o">.</span><span class="n">best_epoch</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">load_state_dict</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">checkpoint</span><span class="p">))</span>
<span class="k">break</span>
<span class="k">return</span> <span class="bp">self</span></div>
<span class="k">def</span> <span class="nf">_get_aggregative_estims</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">):</span>
<span class="n">label_predictions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
<span class="n">prevs_estim</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">quantifier</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">quantifiers</span><span class="o">.</span><span class="n">values</span><span class="p">():</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="n">posteriors</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">quantifier</span><span class="p">,</span> <span class="n">AggregativeSoftQuantifier</span><span class="p">)</span> <span class="k">else</span> <span class="n">label_predictions</span>
<span class="n">prevs_estim</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">quantifier</span><span class="o">.</span><span class="n">aggregate</span><span class="p">(</span><span class="n">predictions</span><span class="p">))</span>
<span class="c1"># there is no real need for adding static estims like the TPR or FPR from training since those are constant</span>
<span class="k">return</span> <span class="n">prevs_estim</span>
<div class="viewcode-block" id="QuaNetTrainer.quantify">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.quantify">[docs]</a>
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
<span class="n">embeddings</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
<span class="n">quant_estims</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_aggregative_estims</span><span class="p">(</span><span class="n">posteriors</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
<span class="n">prevalence</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="n">embeddings</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">,</span> <span class="n">quant_estims</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">device</span> <span class="o">==</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="s1">&#39;cuda&#39;</span><span class="p">):</span>
<span class="n">prevalence</span> <span class="o">=</span> <span class="n">prevalence</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span>
<span class="n">prevalence</span> <span class="o">=</span> <span class="n">prevalence</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
<span class="k">return</span> <span class="n">prevalence</span></div>
<span class="k">def</span> <span class="nf">_epoch</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">,</span> <span class="n">iterations</span><span class="p">,</span> <span class="n">epoch</span><span class="p">,</span> <span class="n">early_stop</span><span class="p">,</span> <span class="n">train</span><span class="p">):</span>
<span class="n">mse_loss</span> <span class="o">=</span> <span class="n">MSELoss</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">train</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="n">train</span><span class="p">)</span>
<span class="n">losses</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">mae_errors</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">sampler</span> <span class="o">=</span> <span class="n">UPP</span><span class="p">(</span>
<span class="n">data</span><span class="p">,</span>
<span class="n">sample_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span><span class="p">,</span>
<span class="n">repeats</span><span class="o">=</span><span class="n">iterations</span><span class="p">,</span>
<span class="n">random_state</span><span class="o">=</span><span class="kc">None</span> <span class="k">if</span> <span class="n">train</span> <span class="k">else</span> <span class="mi">0</span> <span class="c1"># different samples during train, same samples during validation</span>
<span class="p">)</span>
<span class="n">pbar</span> <span class="o">=</span> <span class="n">tqdm</span><span class="p">(</span><span class="n">sampler</span><span class="o">.</span><span class="n">samples_parameters</span><span class="p">(),</span> <span class="n">total</span><span class="o">=</span><span class="n">sampler</span><span class="o">.</span><span class="n">total</span><span class="p">())</span>
<span class="k">for</span> <span class="n">it</span><span class="p">,</span> <span class="n">index</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">pbar</span><span class="p">):</span>
<span class="n">sample_data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
<span class="n">sample_posteriors</span> <span class="o">=</span> <span class="n">posteriors</span><span class="p">[</span><span class="n">index</span><span class="p">]</span>
<span class="n">quant_estims</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_aggregative_estims</span><span class="p">(</span><span class="n">sample_posteriors</span><span class="p">)</span>
<span class="n">ptrue</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">as_tensor</span><span class="p">([</span><span class="n">sample_data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()],</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>
<span class="k">if</span> <span class="n">train</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span>
<span class="n">phat</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="n">sample_data</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">sample_posteriors</span><span class="p">,</span> <span class="n">quant_estims</span><span class="p">)</span>
<span class="n">loss</span> <span class="o">=</span> <span class="n">mse_loss</span><span class="p">(</span><span class="n">phat</span><span class="p">,</span> <span class="n">ptrue</span><span class="p">)</span>
<span class="n">mae</span> <span class="o">=</span> <span class="n">mae_loss</span><span class="p">(</span><span class="n">phat</span><span class="p">,</span> <span class="n">ptrue</span><span class="p">)</span>
<span class="n">loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
<span class="n">phat</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="n">sample_data</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">sample_posteriors</span><span class="p">,</span> <span class="n">quant_estims</span><span class="p">)</span>
<span class="n">loss</span> <span class="o">=</span> <span class="n">mse_loss</span><span class="p">(</span><span class="n">phat</span><span class="p">,</span> <span class="n">ptrue</span><span class="p">)</span>
<span class="n">mae</span> <span class="o">=</span> <span class="n">mae_loss</span><span class="p">(</span><span class="n">phat</span><span class="p">,</span> <span class="n">ptrue</span><span class="p">)</span>
<span class="n">losses</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">loss</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
<span class="n">mae_errors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">mae</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
<span class="n">mse</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">losses</span><span class="p">)</span>
<span class="n">mae</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">mae_errors</span><span class="p">)</span>
<span class="k">if</span> <span class="n">train</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">&#39;tr-loss&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">mse</span>
<span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">&#39;tr-mae&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">mae</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">&#39;va-loss&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">mse</span>
<span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">&#39;va-mae&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">mae</span>
<span class="k">if</span> <span class="n">train</span><span class="p">:</span>
<span class="n">pbar</span><span class="o">.</span><span class="n">set_description</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;[QuaNet] &#39;</span>
<span class="sa">f</span><span class="s1">&#39;epoch=</span><span class="si">{</span><span class="n">epoch</span><span class="si">}</span><span class="s1"> [it=</span><span class="si">{</span><span class="n">it</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">iterations</span><span class="si">}</span><span class="s1">]</span><span class="se">\t</span><span class="s1">&#39;</span>
<span class="sa">f</span><span class="s1">&#39;tr-mseloss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">&quot;tr-loss&quot;</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> tr-maeloss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">&quot;tr-mae&quot;</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="se">\t</span><span class="s1">&#39;</span>
<span class="sa">f</span><span class="s1">&#39;val-mseloss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">&quot;va-loss&quot;</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> val-maeloss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">&quot;va-mae&quot;</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> &#39;</span>
<span class="sa">f</span><span class="s1">&#39;patience=</span><span class="si">{</span><span class="n">early_stop</span><span class="o">.</span><span class="n">patience</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">early_stop</span><span class="o">.</span><span class="n">PATIENCE_LIMIT</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<div class="viewcode-block" id="QuaNetTrainer.get_params">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.get_params">[docs]</a>
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="n">classifier_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">()</span>
<span class="n">classifier_params</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;classifier__&#39;</span><span class="o">+</span><span class="n">k</span><span class="p">:</span><span class="n">v</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span><span class="n">v</span> <span class="ow">in</span> <span class="n">classifier_params</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
<span class="k">return</span> <span class="p">{</span><span class="o">**</span><span class="n">classifier_params</span><span class="p">,</span> <span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span><span class="p">}</span></div>
<div class="viewcode-block" id="QuaNetTrainer.set_params">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.set_params">[docs]</a>
<span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">parameters</span><span class="p">):</span>
<span class="n">learner_params</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">parameters</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="n">key</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">val</span>
<span class="k">elif</span> <span class="n">key</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">&#39;classifier__&#39;</span><span class="p">):</span>
<span class="n">learner_params</span><span class="p">[</span><span class="n">key</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;classifier__&#39;</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">)]</span> <span class="o">=</span> <span class="n">val</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;unknown parameter &#39;</span><span class="p">,</span> <span class="n">key</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">learner_params</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">__check_params_colision</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">quanet_params</span><span class="p">,</span> <span class="n">learner_params</span><span class="p">):</span>
<span class="n">quanet_keys</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">quanet_params</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
<span class="n">learner_keys</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">learner_params</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
<span class="n">intersection</span> <span class="o">=</span> <span class="n">quanet_keys</span><span class="o">.</span><span class="n">intersection</span><span class="p">(</span><span class="n">learner_keys</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">intersection</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;the use of parameters </span><span class="si">{</span><span class="n">intersection</span><span class="si">}</span><span class="s1"> is ambiguous sine those can refer to &#39;</span>
<span class="sa">f</span><span class="s1">&#39;the parameters of QuaNet or the learner </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<div class="viewcode-block" id="QuaNetTrainer.clean_checkpoint">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.clean_checkpoint">[docs]</a>
<span class="k">def</span> <span class="nf">clean_checkpoint</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Removes the checkpoint</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">checkpoint</span><span class="p">)</span></div>
<div class="viewcode-block" id="QuaNetTrainer.clean_checkpoint_dir">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.clean_checkpoint_dir">[docs]</a>
<span class="k">def</span> <span class="nf">clean_checkpoint_dir</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Removes anything contained in the checkpoint directory</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">import</span> <span class="nn">shutil</span>
<span class="n">shutil</span><span class="o">.</span><span class="n">rmtree</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">checkpointdir</span><span class="p">,</span> <span class="n">ignore_errors</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">classes_</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span></div>
<div class="viewcode-block" id="mae_loss">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.mae_loss">[docs]</a>
<span class="k">def</span> <span class="nf">mae_loss</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="n">target</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Torch-like wrapper for the Mean Absolute Error</span>
<span class="sd"> :param output: predictions</span>
<span class="sd"> :param target: ground truth values</span>
<span class="sd"> :return: mean absolute error loss</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">output</span> <span class="o">-</span> <span class="n">target</span><span class="p">))</span></div>
<div class="viewcode-block" id="QuaNetModule">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetModule">[docs]</a>
<span class="k">class</span> <span class="nc">QuaNetModule</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Implements the `QuaNet &lt;https://dl.acm.org/doi/abs/10.1145/3269206.3269287&gt;`_ forward pass.</span>
<span class="sd"> See :class:`QuaNetTrainer` for training QuaNet.</span>
<span class="sd"> :param doc_embedding_size: integer, the dimensionality of the document embeddings</span>
<span class="sd"> :param n_classes: integer, number of classes</span>
<span class="sd"> :param stats_size: integer, number of statistics estimated by simple quantification methods</span>
<span class="sd"> :param lstm_hidden_size: integer, hidden dimensionality of the LSTM cell</span>
<span class="sd"> :param lstm_nlayers: integer, number of LSTM layers</span>
<span class="sd"> :param ff_layers: list of integers, dimensions of the densely-connected FF layers on top of the</span>
<span class="sd"> quantification embedding</span>
<span class="sd"> :param bidirectional: boolean, whether or not to use bidirectional LSTM</span>
<span class="sd"> :param qdrop_p: float, dropout probability</span>
<span class="sd"> :param order_by: integer, class for which the document embeddings are to be sorted</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">doc_embedding_size</span><span class="p">,</span>
<span class="n">n_classes</span><span class="p">,</span>
<span class="n">stats_size</span><span class="p">,</span>
<span class="n">lstm_hidden_size</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span>
<span class="n">lstm_nlayers</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
<span class="n">ff_layers</span><span class="o">=</span><span class="p">[</span><span class="mi">1024</span><span class="p">,</span> <span class="mi">512</span><span class="p">],</span>
<span class="n">bidirectional</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">qdrop_p</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span>
<span class="n">order_by</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span> <span class="o">=</span> <span class="n">n_classes</span>
<span class="bp">self</span><span class="o">.</span><span class="n">order_by</span> <span class="o">=</span> <span class="n">order_by</span>
<span class="bp">self</span><span class="o">.</span><span class="n">hidden_size</span> <span class="o">=</span> <span class="n">lstm_hidden_size</span>
<span class="bp">self</span><span class="o">.</span><span class="n">nlayers</span> <span class="o">=</span> <span class="n">lstm_nlayers</span>
<span class="bp">self</span><span class="o">.</span><span class="n">bidirectional</span> <span class="o">=</span> <span class="n">bidirectional</span>
<span class="bp">self</span><span class="o">.</span><span class="n">ndirections</span> <span class="o">=</span> <span class="mi">2</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">bidirectional</span> <span class="k">else</span> <span class="mi">1</span>
<span class="bp">self</span><span class="o">.</span><span class="n">qdrop_p</span> <span class="o">=</span> <span class="n">qdrop_p</span>
<span class="bp">self</span><span class="o">.</span><span class="n">lstm</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">LSTM</span><span class="p">(</span><span class="n">doc_embedding_size</span> <span class="o">+</span> <span class="n">n_classes</span><span class="p">,</span> <span class="c1"># +n_classes stands for the posterior probs. (concatenated)</span>
<span class="n">lstm_hidden_size</span><span class="p">,</span> <span class="n">lstm_nlayers</span><span class="p">,</span> <span class="n">bidirectional</span><span class="o">=</span><span class="n">bidirectional</span><span class="p">,</span>
<span class="n">dropout</span><span class="o">=</span><span class="n">qdrop_p</span><span class="p">,</span> <span class="n">batch_first</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">dropout</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Dropout</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">qdrop_p</span><span class="p">)</span>
<span class="n">lstm_output_size</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hidden_size</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">ndirections</span>
<span class="n">ff_input_size</span> <span class="o">=</span> <span class="n">lstm_output_size</span> <span class="o">+</span> <span class="n">stats_size</span>
<span class="n">prev_size</span> <span class="o">=</span> <span class="n">ff_input_size</span>
<span class="bp">self</span><span class="o">.</span><span class="n">ff_layers</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">()</span>
<span class="k">for</span> <span class="n">lin_size</span> <span class="ow">in</span> <span class="n">ff_layers</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">ff_layers</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">prev_size</span><span class="p">,</span> <span class="n">lin_size</span><span class="p">))</span>
<span class="n">prev_size</span> <span class="o">=</span> <span class="n">lin_size</span>
<span class="bp">self</span><span class="o">.</span><span class="n">output</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">prev_size</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">device</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="s1">&#39;cuda&#39;</span><span class="p">)</span> <span class="k">if</span> <span class="nb">next</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">parameters</span><span class="p">())</span><span class="o">.</span><span class="n">is_cuda</span> <span class="k">else</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="s1">&#39;cpu&#39;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_init_hidden</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">directions</span> <span class="o">=</span> <span class="mi">2</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">bidirectional</span> <span class="k">else</span> <span class="mi">1</span>
<span class="n">var_hidden</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nlayers</span> <span class="o">*</span> <span class="n">directions</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">hidden_size</span><span class="p">)</span>
<span class="n">var_cell</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nlayers</span> <span class="o">*</span> <span class="n">directions</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">hidden_size</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">next</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lstm</span><span class="o">.</span><span class="n">parameters</span><span class="p">())</span><span class="o">.</span><span class="n">is_cuda</span><span class="p">:</span>
<span class="n">var_hidden</span><span class="p">,</span> <span class="n">var_cell</span> <span class="o">=</span> <span class="n">var_hidden</span><span class="o">.</span><span class="n">cuda</span><span class="p">(),</span> <span class="n">var_cell</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
<span class="k">return</span> <span class="n">var_hidden</span><span class="p">,</span> <span class="n">var_cell</span>
<div class="viewcode-block" id="QuaNetModule.forward">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetModule.forward">[docs]</a>
<span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">doc_embeddings</span><span class="p">,</span> <span class="n">doc_posteriors</span><span class="p">,</span> <span class="n">statistics</span><span class="p">):</span>
<span class="n">device</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">device</span>
<span class="n">doc_embeddings</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">as_tensor</span><span class="p">(</span><span class="n">doc_embeddings</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">)</span>
<span class="n">doc_posteriors</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">as_tensor</span><span class="p">(</span><span class="n">doc_posteriors</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">)</span>
<span class="n">statistics</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">as_tensor</span><span class="p">(</span><span class="n">statistics</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">order_by</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">order</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">doc_posteriors</span><span class="p">[:,</span> <span class="bp">self</span><span class="o">.</span><span class="n">order_by</span><span class="p">])</span>
<span class="n">doc_embeddings</span> <span class="o">=</span> <span class="n">doc_embeddings</span><span class="p">[</span><span class="n">order</span><span class="p">]</span>
<span class="n">doc_posteriors</span> <span class="o">=</span> <span class="n">doc_posteriors</span><span class="p">[</span><span class="n">order</span><span class="p">]</span>
<span class="n">embeded_posteriors</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">doc_embeddings</span><span class="p">,</span> <span class="n">doc_posteriors</span><span class="p">),</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
<span class="c1"># the entire set represents only one instance in quapy contexts, and so the batch_size=1</span>
<span class="c1"># the shape should be (1, number-of-instances, embedding-size + n_classes)</span>
<span class="n">embeded_posteriors</span> <span class="o">=</span> <span class="n">embeded_posteriors</span><span class="o">.</span><span class="n">unsqueeze</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">lstm</span><span class="o">.</span><span class="n">flatten_parameters</span><span class="p">()</span>
<span class="n">_</span><span class="p">,</span> <span class="p">(</span><span class="n">rnn_hidden</span><span class="p">,</span><span class="n">_</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lstm</span><span class="p">(</span><span class="n">embeded_posteriors</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_init_hidden</span><span class="p">())</span>
<span class="n">rnn_hidden</span> <span class="o">=</span> <span class="n">rnn_hidden</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nlayers</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">ndirections</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">hidden_size</span><span class="p">)</span>
<span class="n">quant_embedding</span> <span class="o">=</span> <span class="n">rnn_hidden</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="n">quant_embedding</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">quant_embedding</span><span class="p">,</span> <span class="n">statistics</span><span class="p">))</span>
<span class="n">abstracted</span> <span class="o">=</span> <span class="n">quant_embedding</span><span class="o">.</span><span class="n">unsqueeze</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="k">for</span> <span class="n">linear</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ff_layers</span><span class="p">:</span>
<span class="n">abstracted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropout</span><span class="p">(</span><span class="n">relu</span><span class="p">(</span><span class="n">linear</span><span class="p">(</span><span class="n">abstracted</span><span class="p">)))</span>
<span class="n">logits</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">output</span><span class="p">(</span><span class="n">abstracted</span><span class="p">)</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="n">prevalence</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="k">return</span> <span class="n">prevalence</span></div>
</div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,417 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.method._threshold_optim &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.method._threshold_optim</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.method._threshold_optim</h1><div class="highlight"><pre>
<span></span><span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">abstractmethod</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">BinaryAggregativeQuantifier</span>
<div class="viewcode-block" id="ThresholdOptimization">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization">[docs]</a>
<span class="k">class</span> <span class="nc">ThresholdOptimization</span><span class="p">(</span><span class="n">BinaryAggregativeQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Abstract class of Threshold Optimization variants for :class:`ACC` as proposed by</span>
<span class="sd"> `Forman 2006 &lt;https://dl.acm.org/doi/abs/10.1145/1150402.1150423&gt;`_ and</span>
<span class="sd"> `Forman 2008 &lt;https://link.springer.com/article/10.1007/s10618-008-0097-y&gt;`_.</span>
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
<span class="sd"> The different variants are based on different heuristics for choosing a decision threshold</span>
<span class="sd"> that would allow for more true positives and many more false positives, on the grounds this</span>
<span class="sd"> would deliver larger denominators.</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
<span class="sd"> misclassification rates are to be estimated.</span>
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
<div class="viewcode-block" id="ThresholdOptimization.condition">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization.condition">[docs]</a>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">condition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Implements the criterion according to which the threshold should be selected.</span>
<span class="sd"> This function should return the (float) score to be minimized.</span>
<span class="sd"> :param tpr: float, true positive rate</span>
<span class="sd"> :param fpr: float, false positive rate</span>
<span class="sd"> :return: float, a score for the given `tpr` and `fpr`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="o">...</span></div>
<div class="viewcode-block" id="ThresholdOptimization.discard">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization.discard">[docs]</a>
<span class="k">def</span> <span class="nf">discard</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Indicates whether a combination of tpr and fpr should be discarded</span>
<span class="sd"> :param tpr: float, true positive rate</span>
<span class="sd"> :param fpr: float, false positive rate</span>
<span class="sd"> :return: true if the combination is to be discarded, false otherwise</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">(</span><span class="n">tpr</span> <span class="o">-</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span></div>
<span class="k">def</span> <span class="nf">_eval_candidate_thresholds</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">decision_scores</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Seeks for the best `tpr` and `fpr` according to the score obtained at different</span>
<span class="sd"> decision thresholds. The scoring function is implemented in function `_condition`.</span>
<span class="sd"> :param decision_scores: array-like with the classification scores</span>
<span class="sd"> :param y: predicted labels for the validation set (or for the training set via `k`-fold cross validation)</span>
<span class="sd"> :return: best `tpr` and `fpr` and `threshold` according to `_condition`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">candidate_thresholds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">decision_scores</span><span class="p">)</span>
<span class="n">candidates</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">scores</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">candidate_threshold</span> <span class="ow">in</span> <span class="n">candidate_thresholds</span><span class="p">:</span>
<span class="n">y_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">[</span><span class="mi">1</span> <span class="o">*</span> <span class="p">(</span><span class="n">decision_scores</span> <span class="o">&gt;=</span> <span class="n">candidate_threshold</span><span class="p">)]</span>
<span class="n">TP</span><span class="p">,</span> <span class="n">FP</span><span class="p">,</span> <span class="n">FN</span><span class="p">,</span> <span class="n">TN</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_table</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">y_</span><span class="p">)</span>
<span class="n">tpr</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_tpr</span><span class="p">(</span><span class="n">TP</span><span class="p">,</span> <span class="n">FN</span><span class="p">)</span>
<span class="n">fpr</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_fpr</span><span class="p">(</span><span class="n">FP</span><span class="p">,</span> <span class="n">TN</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">discard</span><span class="p">(</span><span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">):</span>
<span class="n">candidate_score</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">condition</span><span class="p">(</span><span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span>
<span class="n">candidates</span><span class="o">.</span><span class="n">append</span><span class="p">([</span><span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">,</span> <span class="n">candidate_threshold</span><span class="p">])</span>
<span class="n">scores</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">candidate_score</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">candidates</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="c1"># if no candidate gives rise to a valid combination of tpr and fpr, this method defaults to the standard</span>
<span class="c1"># classify &amp; count; this is akin to assign tpr=1, fpr=0, threshold=0</span>
<span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">,</span> <span class="n">threshold</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span>
<span class="n">candidates</span><span class="o">.</span><span class="n">append</span><span class="p">([</span><span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">,</span> <span class="n">threshold</span><span class="p">])</span>
<span class="n">scores</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="n">candidates</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">candidates</span><span class="p">)</span>
<span class="n">candidates</span> <span class="o">=</span> <span class="n">candidates</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">scores</span><span class="p">)]</span> <span class="c1"># sort candidates by candidate_score</span>
<span class="k">return</span> <span class="n">candidates</span>
<div class="viewcode-block" id="ThresholdOptimization.aggregate_with_threshold">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization.aggregate_with_threshold">[docs]</a>
<span class="k">def</span> <span class="nf">aggregate_with_threshold</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">,</span> <span class="n">tprs</span><span class="p">,</span> <span class="n">fprs</span><span class="p">,</span> <span class="n">thresholds</span><span class="p">):</span>
<span class="c1"># This function performs the adjusted count for given tpr, fpr, and threshold.</span>
<span class="c1"># Note that, due to broadcasting, tprs, fprs, and thresholds could be arrays of length &gt; 1</span>
<span class="n">prevs_estims</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">[:,</span> <span class="kc">None</span><span class="p">]</span> <span class="o">&gt;=</span> <span class="n">thresholds</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">prevs_estims</span> <span class="o">=</span> <span class="p">(</span><span class="n">prevs_estims</span> <span class="o">-</span> <span class="n">fprs</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span><span class="n">tprs</span> <span class="o">-</span> <span class="n">fprs</span><span class="p">)</span>
<span class="n">prevs_estims</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">as_binary_prevalence</span><span class="p">(</span><span class="n">prevs_estims</span><span class="p">,</span> <span class="n">clip_if_necessary</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">return</span> <span class="n">prevs_estims</span><span class="o">.</span><span class="n">squeeze</span><span class="p">()</span></div>
<span class="k">def</span> <span class="nf">_compute_table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">y_</span><span class="p">):</span>
<span class="n">TP</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="n">y</span> <span class="o">==</span> <span class="n">y_</span><span class="p">,</span> <span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="n">FP</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="n">y</span> <span class="o">!=</span> <span class="n">y_</span><span class="p">,</span> <span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">neg_label</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="n">FN</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="n">y</span> <span class="o">!=</span> <span class="n">y_</span><span class="p">,</span> <span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="n">TN</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="n">y</span> <span class="o">==</span> <span class="n">y_</span><span class="p">,</span> <span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">neg_label</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="k">return</span> <span class="n">TP</span><span class="p">,</span> <span class="n">FP</span><span class="p">,</span> <span class="n">FN</span><span class="p">,</span> <span class="n">TN</span>
<span class="k">def</span> <span class="nf">_compute_tpr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">TP</span><span class="p">,</span> <span class="n">FP</span><span class="p">):</span>
<span class="k">if</span> <span class="n">TP</span> <span class="o">+</span> <span class="n">FP</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">return</span> <span class="mi">1</span>
<span class="k">return</span> <span class="n">TP</span> <span class="o">/</span> <span class="p">(</span><span class="n">TP</span> <span class="o">+</span> <span class="n">FP</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_compute_fpr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">FP</span><span class="p">,</span> <span class="n">TN</span><span class="p">):</span>
<span class="k">if</span> <span class="n">FP</span> <span class="o">+</span> <span class="n">TN</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">return</span> <span class="mi">0</span>
<span class="k">return</span> <span class="n">FP</span> <span class="o">/</span> <span class="p">(</span><span class="n">FP</span> <span class="o">+</span> <span class="n">TN</span><span class="p">)</span>
<div class="viewcode-block" id="ThresholdOptimization.aggregation_fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization.aggregation_fit">[docs]</a>
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="n">decision_scores</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
<span class="c1"># the standard behavior is to keep the best threshold only</span>
<span class="bp">self</span><span class="o">.</span><span class="n">tpr</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">fpr</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_eval_candidate_thresholds</span><span class="p">(</span><span class="n">decision_scores</span><span class="p">,</span> <span class="n">y</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="ThresholdOptimization.aggregate">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization.aggregate">[docs]</a>
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
<span class="c1"># the standard behavior is to compute the adjusted count using the best threshold found</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">aggregate_with_threshold</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tpr</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">fpr</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">threshold</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="T50">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.T50">[docs]</a>
<span class="k">class</span> <span class="nc">T50</span><span class="p">(</span><span class="n">ThresholdOptimization</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Threshold Optimization variant for :class:`ACC` as proposed by</span>
<span class="sd"> `Forman 2006 &lt;https://dl.acm.org/doi/abs/10.1145/1150402.1150423&gt;`_ and</span>
<span class="sd"> `Forman 2008 &lt;https://link.springer.com/article/10.1007/s10618-008-0097-y&gt;`_ that looks</span>
<span class="sd"> for the threshold that makes `tpr` closest to 0.5.</span>
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
<span class="sd"> misclassification rates are to be estimated.</span>
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span>
<div class="viewcode-block" id="T50.condition">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.T50.condition">[docs]</a>
<span class="k">def</span> <span class="nf">condition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="k">return</span> <span class="nb">abs</span><span class="p">(</span><span class="n">tpr</span> <span class="o">-</span> <span class="mf">0.5</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="MAX">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MAX">[docs]</a>
<span class="k">class</span> <span class="nc">MAX</span><span class="p">(</span><span class="n">ThresholdOptimization</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Threshold Optimization variant for :class:`ACC` as proposed by</span>
<span class="sd"> `Forman 2006 &lt;https://dl.acm.org/doi/abs/10.1145/1150402.1150423&gt;`_ and</span>
<span class="sd"> `Forman 2008 &lt;https://link.springer.com/article/10.1007/s10618-008-0097-y&gt;`_ that looks</span>
<span class="sd"> for the threshold that maximizes `tpr-fpr`.</span>
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
<span class="sd"> misclassification rates are to be estimated.</span>
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span>
<div class="viewcode-block" id="MAX.condition">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MAX.condition">[docs]</a>
<span class="k">def</span> <span class="nf">condition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="c1"># MAX strives to maximize (tpr - fpr), which is equivalent to minimize (fpr - tpr)</span>
<span class="k">return</span> <span class="p">(</span><span class="n">fpr</span> <span class="o">-</span> <span class="n">tpr</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="X">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.X">[docs]</a>
<span class="k">class</span> <span class="nc">X</span><span class="p">(</span><span class="n">ThresholdOptimization</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Threshold Optimization variant for :class:`ACC` as proposed by</span>
<span class="sd"> `Forman 2006 &lt;https://dl.acm.org/doi/abs/10.1145/1150402.1150423&gt;`_ and</span>
<span class="sd"> `Forman 2008 &lt;https://link.springer.com/article/10.1007/s10618-008-0097-y&gt;`_ that looks</span>
<span class="sd"> for the threshold that yields `tpr=1-fpr`.</span>
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
<span class="sd"> misclassification rates are to be estimated.</span>
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span>
<div class="viewcode-block" id="X.condition">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.X.condition">[docs]</a>
<span class="k">def</span> <span class="nf">condition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="k">return</span> <span class="nb">abs</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="p">(</span><span class="n">tpr</span> <span class="o">+</span> <span class="n">fpr</span><span class="p">))</span></div>
</div>
<div class="viewcode-block" id="MS">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS">[docs]</a>
<span class="k">class</span> <span class="nc">MS</span><span class="p">(</span><span class="n">ThresholdOptimization</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Median Sweep. Threshold Optimization variant for :class:`ACC` as proposed by</span>
<span class="sd"> `Forman 2006 &lt;https://dl.acm.org/doi/abs/10.1145/1150402.1150423&gt;`_ and</span>
<span class="sd"> `Forman 2008 &lt;https://link.springer.com/article/10.1007/s10618-008-0097-y&gt;`_ that generates</span>
<span class="sd"> class prevalence estimates for all decision thresholds and returns the median of them all.</span>
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
<span class="sd"> misclassification rates are to be estimated.</span>
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span>
<div class="viewcode-block" id="MS.condition">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS.condition">[docs]</a>
<span class="k">def</span> <span class="nf">condition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="k">return</span> <span class="mi">1</span></div>
<div class="viewcode-block" id="MS.aggregation_fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS.aggregation_fit">[docs]</a>
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="n">decision_scores</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
<span class="c1"># keeps all candidates</span>
<span class="n">tprs_fprs_thresholds</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_eval_candidate_thresholds</span><span class="p">(</span><span class="n">decision_scores</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">tprs</span> <span class="o">=</span> <span class="n">tprs_fprs_thresholds</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">fprs</span> <span class="o">=</span> <span class="n">tprs_fprs_thresholds</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">thresholds</span> <span class="o">=</span> <span class="n">tprs_fprs_thresholds</span><span class="p">[:,</span> <span class="mi">2</span><span class="p">]</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="MS.aggregate">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS.aggregate">[docs]</a>
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
<span class="n">prevalences</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">aggregate_with_threshold</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tprs</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">fprs</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">thresholds</span><span class="p">)</span>
<span class="k">if</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">ndim</span><span class="o">==</span><span class="mi">2</span><span class="p">:</span>
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">prevalences</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="k">return</span> <span class="n">prevalences</span></div>
</div>
<div class="viewcode-block" id="MS2">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS2">[docs]</a>
<span class="k">class</span> <span class="nc">MS2</span><span class="p">(</span><span class="n">MS</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Median Sweep 2. Threshold Optimization variant for :class:`ACC` as proposed by</span>
<span class="sd"> `Forman 2006 &lt;https://dl.acm.org/doi/abs/10.1145/1150402.1150423&gt;`_ and</span>
<span class="sd"> `Forman 2008 &lt;https://link.springer.com/article/10.1007/s10618-008-0097-y&gt;`_ that generates</span>
<span class="sd"> class prevalence estimates for all decision thresholds and returns the median of for cases in</span>
<span class="sd"> which `tpr-fpr&gt;0.25`</span>
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
<span class="sd"> misclassification rates are to be estimated.</span>
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span>
<div class="viewcode-block" id="MS2.discard">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS2.discard">[docs]</a>
<span class="k">def</span> <span class="nf">discard</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="k">return</span> <span class="p">(</span><span class="n">tpr</span><span class="o">-</span><span class="n">fpr</span><span class="p">)</span> <span class="o">&lt;=</span> <span class="mf">0.25</span></div>
</div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

File diff suppressed because it is too large Load Diff

View File

@ -1,238 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.method.base &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.method.base</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.method.base</h1><div class="highlight"><pre>
<span></span><span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABCMeta</span><span class="p">,</span> <span class="n">abstractmethod</span>
<span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
<span class="kn">from</span> <span class="nn">joblib</span> <span class="kn">import</span> <span class="n">Parallel</span><span class="p">,</span> <span class="n">delayed</span>
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="c1"># Base Quantifier abstract class</span>
<span class="c1"># ------------------------------------</span>
<div class="viewcode-block" id="BaseQuantifier">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.BaseQuantifier">[docs]</a>
<span class="k">class</span> <span class="nc">BaseQuantifier</span><span class="p">(</span><span class="n">BaseEstimator</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Abstract Quantifier. A quantifier is defined as an object of a class that implements the method :meth:`fit` on</span>
<span class="sd"> :class:`quapy.data.base.LabelledCollection`, the method :meth:`quantify`, and the :meth:`set_params` and</span>
<span class="sd"> :meth:`get_params` for model selection (see :meth:`quapy.model_selection.GridSearchQ`)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="BaseQuantifier.fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.BaseQuantifier.fit">[docs]</a>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Trains a quantifier.</span>
<span class="sd"> :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data</span>
<span class="sd"> :return: self</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="o">...</span></div>
<div class="viewcode-block" id="BaseQuantifier.quantify">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.BaseQuantifier.quantify">[docs]</a>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Generate class prevalence estimates for the sample&#39;s instances</span>
<span class="sd"> :param instances: array-like</span>
<span class="sd"> :return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="o">...</span></div>
</div>
<div class="viewcode-block" id="BinaryQuantifier">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.BinaryQuantifier">[docs]</a>
<span class="k">class</span> <span class="nc">BinaryQuantifier</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Abstract class of binary quantifiers, i.e., quantifiers estimating class prevalence values for only two classes</span>
<span class="sd"> (typically, to be interpreted as one class and its complement).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">_check_binary</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">quantifier_name</span><span class="p">):</span>
<span class="k">assert</span> <span class="n">data</span><span class="o">.</span><span class="n">binary</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">quantifier_name</span><span class="si">}</span><span class="s1"> works only on problems of binary classification. &#39;</span> \
<span class="sa">f</span><span class="s1">&#39;Use the class OneVsAll to enable </span><span class="si">{</span><span class="n">quantifier_name</span><span class="si">}</span><span class="s1"> work on single-label data.&#39;</span></div>
<div class="viewcode-block" id="OneVsAll">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.OneVsAll">[docs]</a>
<span class="k">class</span> <span class="nc">OneVsAll</span><span class="p">:</span>
<span class="k">pass</span></div>
<div class="viewcode-block" id="newOneVsAll">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.newOneVsAll">[docs]</a>
<span class="k">def</span> <span class="nf">newOneVsAll</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">BaseQuantifier</span><span class="p">),</span> \
<span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">binary_quantifier</span><span class="si">}</span><span class="s1"> does not seem to be a Quantifier&#39;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">AggregativeQuantifier</span><span class="p">):</span>
<span class="k">return</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">OneVsAllAggregative</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">n_jobs</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">OneVsAllGeneric</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">n_jobs</span><span class="p">)</span></div>
<div class="viewcode-block" id="OneVsAllGeneric">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.OneVsAllGeneric">[docs]</a>
<span class="k">class</span> <span class="nc">OneVsAllGeneric</span><span class="p">(</span><span class="n">OneVsAll</span><span class="p">,</span> <span class="n">BaseQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary</span>
<span class="sd"> quantifier for each class, and then l1-normalizes the outputs so that the class prevelence values sum up to 1.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">BaseQuantifier</span><span class="p">),</span> \
<span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">binary_quantifier</span><span class="si">}</span><span class="s1"> does not seem to be a Quantifier&#39;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">AggregativeQuantifier</span><span class="p">):</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;[warning] the quantifier seems to be an instance of qp.method.aggregative.AggregativeQuantifier; &#39;</span>
<span class="sa">f</span><span class="s1">&#39;you might prefer instantiating </span><span class="si">{</span><span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">OneVsAllAggregative</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">binary_quantifier</span> <span class="o">=</span> <span class="n">binary_quantifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
<div class="viewcode-block" id="OneVsAllGeneric.fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.OneVsAllGeneric.fit">[docs]</a>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="k">assert</span> <span class="ow">not</span> <span class="n">data</span><span class="o">.</span><span class="n">binary</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> expect non-binary data&#39;</span>
<span class="k">assert</span> <span class="n">fit_classifier</span> <span class="o">==</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">&#39;fit_classifier must be True&#39;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">dict_binary_quantifiers</span> <span class="o">=</span> <span class="p">{</span><span class="n">c</span><span class="p">:</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">binary_quantifier</span><span class="p">)</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">data</span><span class="o">.</span><span class="n">classes_</span><span class="p">}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_parallel</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_delayed_binary_fit</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<span class="k">def</span> <span class="nf">_parallel</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span>
<span class="n">Parallel</span><span class="p">(</span><span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">backend</span><span class="o">=</span><span class="s1">&#39;threading&#39;</span><span class="p">)(</span>
<span class="n">delayed</span><span class="p">(</span><span class="n">func</span><span class="p">)(</span><span class="n">c</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span>
<span class="p">)</span>
<span class="p">)</span>
<div class="viewcode-block" id="OneVsAllGeneric.quantify">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.OneVsAllGeneric.quantify">[docs]</a>
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="n">prevalences</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_parallel</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_delayed_binary_predict</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span>
<span class="k">return</span> <span class="n">qp</span><span class="o">.</span><span class="n">functional</span><span class="o">.</span><span class="n">normalize_prevalence</span><span class="p">(</span><span class="n">prevalences</span><span class="p">)</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">classes_</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="nb">sorted</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dict_binary_quantifiers</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
<span class="k">def</span> <span class="nf">_delayed_binary_predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dict_binary_quantifiers</span><span class="p">[</span><span class="n">c</span><span class="p">]</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">X</span><span class="p">)[</span><span class="mi">1</span><span class="p">]</span>
<span class="k">def</span> <span class="nf">_delayed_binary_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">data</span><span class="p">):</span>
<span class="n">bindata</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">labels</span> <span class="o">==</span> <span class="n">c</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="p">[</span><span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">dict_binary_quantifiers</span><span class="p">[</span><span class="n">c</span><span class="p">]</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">bindata</span><span class="p">)</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,861 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.method.meta &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.method.meta</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.method.meta</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">itertools</span>
<span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
<span class="kn">from</span> <span class="nn">sklearn.metrics</span> <span class="kn">import</span> <span class="n">f1_score</span><span class="p">,</span> <span class="n">make_scorer</span><span class="p">,</span> <span class="n">accuracy_score</span>
<span class="kn">from</span> <span class="nn">sklearn.model_selection</span> <span class="kn">import</span> <span class="n">GridSearchCV</span><span class="p">,</span> <span class="n">cross_val_predict</span>
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="kn">from</span> <span class="nn">quapy</span> <span class="kn">import</span> <span class="n">functional</span> <span class="k">as</span> <span class="n">F</span>
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
<span class="kn">from</span> <span class="nn">quapy.model_selection</span> <span class="kn">import</span> <span class="n">GridSearchQ</span>
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span><span class="p">,</span> <span class="n">BinaryQuantifier</span>
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">CC</span><span class="p">,</span> <span class="n">ACC</span><span class="p">,</span> <span class="n">PACC</span><span class="p">,</span> <span class="n">HDy</span><span class="p">,</span> <span class="n">EMQ</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span>
<span class="k">try</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">.</span> <span class="kn">import</span> <span class="n">_neural</span>
<span class="k">except</span> <span class="ne">ModuleNotFoundError</span><span class="p">:</span>
<span class="n">_neural</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">_neural</span><span class="p">:</span>
<span class="n">QuaNet</span> <span class="o">=</span> <span class="n">_neural</span><span class="o">.</span><span class="n">QuaNetTrainer</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">QuaNet</span> <span class="o">=</span> <span class="s2">&quot;QuaNet is not available due to missing torch package&quot;</span>
<div class="viewcode-block" id="MedianEstimator2">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator2">[docs]</a>
<span class="k">class</span> <span class="nc">MedianEstimator2</span><span class="p">(</span><span class="n">BinaryQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the</span>
<span class="sd"> estimation returned by differently (hyper)parameterized base quantifiers.</span>
<span class="sd"> The median of unit-vectors is only guaranteed to be a unit-vector for n=2 dimensions,</span>
<span class="sd"> i.e., in cases of binary quantification.</span>
<span class="sd"> :param base_quantifier: the base, binary quantifier</span>
<span class="sd"> :param random_state: a seed to be set before fitting any base quantifier (default None)</span>
<span class="sd"> :param param_grid: the grid or parameters towards which the median will be computed</span>
<span class="sd"> :param n_jobs: number of parllel workes</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">base_quantifier</span><span class="p">:</span> <span class="n">BinaryQuantifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">base_quantifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span> <span class="o">=</span> <span class="n">param_grid</span>
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
<div class="viewcode-block" id="MedianEstimator2.get_params">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator2.get_params">[docs]</a>
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">(</span><span class="n">deep</span><span class="p">)</span></div>
<div class="viewcode-block" id="MedianEstimator2.set_params">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator2.set_params">[docs]</a>
<span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">params</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_delayed_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
<span class="n">params</span><span class="p">,</span> <span class="n">training</span> <span class="o">=</span> <span class="n">args</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
<span class="k">return</span> <span class="n">model</span>
<div class="viewcode-block" id="MedianEstimator2.fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator2.fit">[docs]</a>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_check_binary</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
<span class="n">configs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">model_selection</span><span class="o">.</span><span class="n">expand_grid</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">models</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit</span><span class="p">,</span>
<span class="p">((</span><span class="n">params</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span> <span class="k">for</span> <span class="n">params</span> <span class="ow">in</span> <span class="n">configs</span><span class="p">),</span>
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;_R_SEED&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<span class="k">def</span> <span class="nf">_delayed_predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
<span class="n">model</span><span class="p">,</span> <span class="n">instances</span> <span class="o">=</span> <span class="n">args</span>
<span class="k">return</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
<div class="viewcode-block" id="MedianEstimator2.quantify">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator2.quantify">[docs]</a>
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="n">prev_preds</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_predict</span><span class="p">,</span>
<span class="p">((</span><span class="n">model</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span> <span class="k">for</span> <span class="n">model</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">models</span><span class="p">),</span>
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;_R_SEED&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span>
<span class="p">)</span>
<span class="n">prev_preds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prev_preds</span><span class="p">)</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">prev_preds</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="MedianEstimator">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator">[docs]</a>
<span class="k">class</span> <span class="nc">MedianEstimator</span><span class="p">(</span><span class="n">BinaryQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the</span>
<span class="sd"> estimation returned by differently (hyper)parameterized base quantifiers.</span>
<span class="sd"> The median of unit-vectors is only guaranteed to be a unit-vector for n=2 dimensions,</span>
<span class="sd"> i.e., in cases of binary quantification.</span>
<span class="sd"> :param base_quantifier: the base, binary quantifier</span>
<span class="sd"> :param random_state: a seed to be set before fitting any base quantifier (default None)</span>
<span class="sd"> :param param_grid: the grid or parameters towards which the median will be computed</span>
<span class="sd"> :param n_jobs: number of parllel workes</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">base_quantifier</span><span class="p">:</span> <span class="n">BinaryQuantifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">base_quantifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span> <span class="o">=</span> <span class="n">param_grid</span>
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
<div class="viewcode-block" id="MedianEstimator.get_params">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator.get_params">[docs]</a>
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">(</span><span class="n">deep</span><span class="p">)</span></div>
<div class="viewcode-block" id="MedianEstimator.set_params">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator.set_params">[docs]</a>
<span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">params</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_delayed_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
<span class="n">params</span><span class="p">,</span> <span class="n">training</span> <span class="o">=</span> <span class="n">args</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
<span class="k">return</span> <span class="n">model</span>
<span class="k">def</span> <span class="nf">_delayed_fit_classifier</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
<span class="n">cls_params</span><span class="p">,</span> <span class="n">training</span> <span class="o">=</span> <span class="n">args</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">cls_params</span><span class="p">)</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">classifier_fit_predict</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">predict_on</span><span class="o">=</span><span class="n">model</span><span class="o">.</span><span class="n">val_split</span><span class="p">)</span>
<span class="k">return</span> <span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_delayed_fit_aggregation</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
<span class="p">((</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">),</span> <span class="n">q_params</span><span class="p">),</span> <span class="n">training</span> <span class="o">=</span> <span class="n">args</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">q_params</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">aggregation_fit</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span>
<span class="k">return</span> <span class="n">model</span>
<div class="viewcode-block" id="MedianEstimator.fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator.fit">[docs]</a>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_check_binary</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">):</span>
<span class="n">cls_configs</span><span class="p">,</span> <span class="n">q_configs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">model_selection</span><span class="o">.</span><span class="n">group_params</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">cls_configs</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">models_preds</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit_classifier</span><span class="p">,</span>
<span class="p">((</span><span class="n">params</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span> <span class="k">for</span> <span class="n">params</span> <span class="ow">in</span> <span class="n">cls_configs</span><span class="p">),</span>
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;_R_SEED&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">model</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span>
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">cls_configs</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">classifier_fit_predict</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">predict_on</span><span class="o">=</span><span class="n">model</span><span class="o">.</span><span class="n">val_split</span><span class="p">)</span>
<span class="n">models_preds</span> <span class="o">=</span> <span class="p">[(</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">)]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">models</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit_aggregation</span><span class="p">,</span>
<span class="p">((</span><span class="n">setup</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span> <span class="k">for</span> <span class="n">setup</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">product</span><span class="p">(</span><span class="n">models_preds</span><span class="p">,</span> <span class="n">q_configs</span><span class="p">)),</span>
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;_R_SEED&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">configs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">model_selection</span><span class="o">.</span><span class="n">expand_grid</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">models</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit</span><span class="p">,</span>
<span class="p">((</span><span class="n">params</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span> <span class="k">for</span> <span class="n">params</span> <span class="ow">in</span> <span class="n">configs</span><span class="p">),</span>
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;_R_SEED&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<span class="k">def</span> <span class="nf">_delayed_predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
<span class="n">model</span><span class="p">,</span> <span class="n">instances</span> <span class="o">=</span> <span class="n">args</span>
<span class="k">return</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
<div class="viewcode-block" id="MedianEstimator.quantify">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator.quantify">[docs]</a>
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="n">prev_preds</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_predict</span><span class="p">,</span>
<span class="p">((</span><span class="n">model</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span> <span class="k">for</span> <span class="n">model</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">models</span><span class="p">),</span>
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;_R_SEED&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span>
<span class="p">)</span>
<span class="n">prev_preds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prev_preds</span><span class="p">)</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">prev_preds</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="Ensemble">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.Ensemble">[docs]</a>
<span class="k">class</span> <span class="nc">Ensemble</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
<span class="n">VALID_POLICIES</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;ave&#39;</span><span class="p">,</span> <span class="s1">&#39;ptr&#39;</span><span class="p">,</span> <span class="s1">&#39;ds&#39;</span><span class="p">}</span> <span class="o">|</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR_NAMES</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Implementation of the Ensemble methods for quantification described by </span>
<span class="sd"> `Pérez-Gállego et al., 2017 &lt;https://www.sciencedirect.com/science/article/pii/S1566253516300628&gt;`_</span>
<span class="sd"> and</span>
<span class="sd"> `Pérez-Gállego et al., 2019 &lt;https://www.sciencedirect.com/science/article/pii/S1566253517303652&gt;`_.</span>
<span class="sd"> The policies implemented include:</span>
<span class="sd"> </span>
<span class="sd"> - Average (`policy=&#39;ave&#39;`): computes class prevalence estimates as the average of the estimates </span>
<span class="sd"> returned by the base quantifiers.</span>
<span class="sd"> - Training Prevalence (`policy=&#39;ptr&#39;`): applies a dynamic selection to the ensembles members by retaining only </span>
<span class="sd"> those members such that the class prevalence values in the samples they use as training set are closest to </span>
<span class="sd"> preliminary class prevalence estimates computed as the average of the estimates of all the members. The final </span>
<span class="sd"> estimate is recomputed by considering only the selected members.</span>
<span class="sd"> - Distribution Similarity (`policy=&#39;ds&#39;`): performs a dynamic selection of base members by retaining</span>
<span class="sd"> the members trained on samples whose distribution of posterior probabilities is closest, in terms of the</span>
<span class="sd"> Hellinger Distance, to the distribution of posterior probabilities in the test sample</span>
<span class="sd"> - Accuracy (`policy=&#39;&lt;valid error name&gt;&#39;`): performs a static selection of the ensemble members by</span>
<span class="sd"> retaining those that minimize a quantification error measure, which is passed as an argument.</span>
<span class="sd"> </span>
<span class="sd"> Example:</span>
<span class="sd"> </span>
<span class="sd"> &gt;&gt;&gt; model = Ensemble(quantifier=ACC(LogisticRegression()), size=30, policy=&#39;ave&#39;, n_jobs=-1)</span>
<span class="sd"> </span>
<span class="sd"> :param quantifier: base quantification member of the ensemble </span>
<span class="sd"> :param size: number of members</span>
<span class="sd"> :param red_size: number of members to retain after selection (depending on the policy)</span>
<span class="sd"> :param min_pos: minimum number of positive instances to consider a sample as valid </span>
<span class="sd"> :param policy: the selection policy; available policies include: `ave` (default), `ptr`, `ds`, and accuracy </span>
<span class="sd"> (which is instantiated via a valid error name, e.g., `mae`)</span>
<span class="sd"> :param max_sample_size: maximum number of instances to consider in the samples (set to None </span>
<span class="sd"> to indicate no limit, default)</span>
<span class="sd"> :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out</span>
<span class="sd"> validation split, or a :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
<span class="sd"> :param n_jobs: number of parallel workers (default 1)</span>
<span class="sd"> :param verbose: set to True (default is False) to get some information in standard output</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">quantifier</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
<span class="n">size</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span>
<span class="n">red_size</span><span class="o">=</span><span class="mi">25</span><span class="p">,</span>
<span class="n">min_pos</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span>
<span class="n">policy</span><span class="o">=</span><span class="s1">&#39;ave&#39;</span><span class="p">,</span>
<span class="n">max_sample_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">val_split</span><span class="p">:</span><span class="n">Union</span><span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">,</span> <span class="nb">float</span><span class="p">]</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="k">assert</span> <span class="n">policy</span> <span class="ow">in</span> <span class="n">Ensemble</span><span class="o">.</span><span class="n">VALID_POLICIES</span><span class="p">,</span> \
<span class="sa">f</span><span class="s1">&#39;unknown policy=</span><span class="si">{</span><span class="n">policy</span><span class="si">}</span><span class="s1">; valid are </span><span class="si">{</span><span class="n">Ensemble</span><span class="o">.</span><span class="n">VALID_POLICIES</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="k">assert</span> <span class="n">max_sample_size</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">max_sample_size</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> \
<span class="s1">&#39;wrong value for max_sample_size; set it to a positive number or None&#39;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">quantifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">size</span> <span class="o">=</span> <span class="n">size</span>
<span class="bp">self</span><span class="o">.</span><span class="n">min_pos</span> <span class="o">=</span> <span class="n">min_pos</span>
<span class="bp">self</span><span class="o">.</span><span class="n">red_size</span> <span class="o">=</span> <span class="n">red_size</span>
<span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="o">=</span> <span class="n">policy</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">post_proba_fn</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span>
<span class="bp">self</span><span class="o">.</span><span class="n">max_sample_size</span> <span class="o">=</span> <span class="n">max_sample_size</span>
<span class="k">def</span> <span class="nf">_sout</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">msg</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;[Ensemble]&#39;</span> <span class="o">+</span> <span class="n">msg</span><span class="p">)</span>
<div class="viewcode-block" id="Ensemble.fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.Ensemble.fit">[docs]</a>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">val_split</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">,</span> <span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="o">==</span> <span class="s1">&#39;ds&#39;</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">data</span><span class="o">.</span><span class="n">binary</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;ds policy is only defined for binary quantification, but this dataset is not binary&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">val_split</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">val_split</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">val_split</span>
<span class="c1"># randomly chooses the prevalences for each member of the ensemble (preventing classes with less than</span>
<span class="c1"># min_pos positive examples)</span>
<span class="n">sample_size</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_sample_size</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="nb">min</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">max_sample_size</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">))</span>
<span class="n">prevs</span> <span class="o">=</span> <span class="p">[</span><span class="n">_draw_simplex</span><span class="p">(</span><span class="n">ndim</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">min_val</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">min_pos</span> <span class="o">/</span> <span class="n">sample_size</span><span class="p">)</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">size</span><span class="p">)]</span>
<span class="n">posteriors</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="o">==</span> <span class="s1">&#39;ds&#39;</span><span class="p">:</span>
<span class="c1"># precompute the training posterior probabilities</span>
<span class="n">posteriors</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">post_proba_fn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ds_policy_get_posteriors</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="n">is_static_policy</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR_NAMES</span><span class="p">)</span>
<span class="n">args</span> <span class="o">=</span> <span class="p">(</span>
<span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">val_split</span><span class="p">,</span> <span class="n">prev</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">,</span> <span class="n">is_static_policy</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">,</span> <span class="n">sample_size</span><span class="p">)</span>
<span class="k">for</span> <span class="n">prev</span> <span class="ow">in</span> <span class="n">prevs</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
<span class="n">_delayed_new_instance</span><span class="p">,</span>
<span class="n">tqdm</span><span class="p">(</span><span class="n">args</span><span class="p">,</span> <span class="n">desc</span><span class="o">=</span><span class="s1">&#39;fitting ensamble&#39;</span><span class="p">,</span> <span class="n">total</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">size</span><span class="p">)</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="k">else</span> <span class="n">args</span><span class="p">,</span>
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">)</span>
<span class="c1"># static selection policy (the name of a quantification-oriented error function to minimize)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR_NAMES</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_accuracy_policy</span><span class="p">(</span><span class="n">error_name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">policy</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="s1">&#39;Fit [Done]&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="Ensemble.quantify">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.Ensemble.quantify">[docs]</a>
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span>
<span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span><span class="n">_delayed_quantify</span><span class="p">,</span> <span class="p">((</span><span class="n">Qi</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span> <span class="k">for</span> <span class="n">Qi</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">),</span> <span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="o">==</span> <span class="s1">&#39;ptr&#39;</span><span class="p">:</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ptr_policy</span><span class="p">(</span><span class="n">predictions</span><span class="p">)</span>
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="o">==</span> <span class="s1">&#39;ds&#39;</span><span class="p">:</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ds_policy</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">normalize_prevalence</span><span class="p">(</span><span class="n">predictions</span><span class="p">)</span></div>
<div class="viewcode-block" id="Ensemble.set_params">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.Ensemble.set_params">[docs]</a>
<span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">parameters</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> This function should not be used within :class:`quapy.model_selection.GridSearchQ` (is here for compatibility</span>
<span class="sd"> with the abstract class).</span>
<span class="sd"> Instead, use `Ensemble(GridSearchQ(q),...)`, with `q` a Quantifier (recommended), or</span>
<span class="sd"> `Ensemble(Q(GridSearchCV(l)))` with `Q` a quantifier class that has a classifier `l` optimized for</span>
<span class="sd"> classification (not recommended).</span>
<span class="sd"> :param parameters: dictionary</span>
<span class="sd"> :return: raises an Exception</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> should not be used within GridSearchQ; &#39;</span>
<span class="sa">f</span><span class="s1">&#39;instead, use Ensemble(GridSearchQ(q),...), with q a Quantifier (recommended), &#39;</span>
<span class="sa">f</span><span class="s1">&#39;or Ensemble(Q(GridSearchCV(l))) with Q a quantifier class that has a classifier &#39;</span>
<span class="sa">f</span><span class="s1">&#39;l optimized for classification (not recommended).&#39;</span><span class="p">)</span></div>
<div class="viewcode-block" id="Ensemble.get_params">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.Ensemble.get_params">[docs]</a>
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> This function should not be used within :class:`quapy.model_selection.GridSearchQ` (is here for compatibility</span>
<span class="sd"> with the abstract class).</span>
<span class="sd"> Instead, use `Ensemble(GridSearchQ(q),...)`, with `q` a Quantifier (recommended), or</span>
<span class="sd"> `Ensemble(Q(GridSearchCV(l)))` with `Q` a quantifier class that has a classifier `l` optimized for</span>
<span class="sd"> classification (not recommended).</span>
<span class="sd"> :param deep: for compatibility with scikit-learn</span>
<span class="sd"> :return: raises an Exception</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
<span class="k">def</span> <span class="nf">_accuracy_policy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">error_name</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Selects the red_size best performant quantifiers in a static way (i.e., dropping all non-selected instances).</span>
<span class="sd"> For each model in the ensemble, the performance is measured in terms of _error_name_ on the quantification of</span>
<span class="sd"> the samples used for training the rest of the models in the ensemble.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">quapy.evaluation</span> <span class="kn">import</span> <span class="n">evaluate_on_samples</span>
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">error_name</span><span class="p">)</span>
<span class="n">tests</span> <span class="o">=</span> <span class="p">[</span><span class="n">m</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">]</span>
<span class="n">scores</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">model</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">):</span>
<span class="n">scores</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">evaluate_on_samples</span><span class="p">(</span><span class="n">model</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">tests</span><span class="p">[:</span><span class="n">i</span><span class="p">]</span> <span class="o">+</span> <span class="n">tests</span><span class="p">[</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">:],</span> <span class="n">error</span><span class="p">))</span>
<span class="n">order</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">scores</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span> <span class="o">=</span> <span class="n">_select_k</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">,</span> <span class="n">order</span><span class="p">,</span> <span class="n">k</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">red_size</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_ptr_policy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">predictions</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Selects the predictions made by models that have been trained on samples with a prevalence that is most similar</span>
<span class="sd"> to a first approximation of the test prevalence as made by all models in the ensemble.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">test_prev_estim</span> <span class="o">=</span> <span class="n">predictions</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">tr_prevs</span> <span class="o">=</span> <span class="p">[</span><span class="n">m</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">]</span>
<span class="n">ptr_differences</span> <span class="o">=</span> <span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mse</span><span class="p">(</span><span class="n">ptr_i</span><span class="p">,</span> <span class="n">test_prev_estim</span><span class="p">)</span> <span class="k">for</span> <span class="n">ptr_i</span> <span class="ow">in</span> <span class="n">tr_prevs</span><span class="p">]</span>
<span class="n">order</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">ptr_differences</span><span class="p">)</span>
<span class="k">return</span> <span class="n">_select_k</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">order</span><span class="p">,</span> <span class="n">k</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">red_size</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_ds_policy_get_posteriors</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> In the original article, there are some aspects regarding this method that are not mentioned. The paper says</span>
<span class="sd"> that the distribution of posterior probabilities from training and test examples is compared by means of the</span>
<span class="sd"> Hellinger Distance. However, how these posterior probabilities are generated is not specified. In the article,</span>
<span class="sd"> a Logistic Regressor (LR) is used as the classifier device and that could be used for this purpose. However, in</span>
<span class="sd"> general, a Quantifier is not necessarily an instance of Aggreggative Probabilistic Quantifiers, and so, that the</span>
<span class="sd"> quantifier builds on top of a probabilistic classifier cannot be given for granted. Additionally, it would not</span>
<span class="sd"> be correct to generate the posterior probabilities for training instances that have concurred in training the</span>
<span class="sd"> classifier that generates them.</span>
<span class="sd"> This function thus generates the posterior probabilities for all training documents in a cross-validation way,</span>
<span class="sd"> using LR with hyperparameters that have previously been optimized via grid search in 5FCV.</span>
<span class="sd"> :param data: a LabelledCollection</span>
<span class="sd"> :return: (P,f,) where P is an ndarray containing the posterior probabilities of the training data, generated via</span>
<span class="sd"> cross-validation and using an optimized LR, and the function to be used in order to generate posterior</span>
<span class="sd"> probabilities for test instances.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">Xy</span>
<span class="n">lr_base</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">(</span><span class="n">class_weight</span><span class="o">=</span><span class="s1">&#39;balanced&#39;</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">1000</span><span class="p">)</span>
<span class="n">param_grid</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;C&#39;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">9</span><span class="p">)}</span>
<span class="n">optim</span> <span class="o">=</span> <span class="n">GridSearchCV</span><span class="p">(</span><span class="n">lr_base</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="n">param_grid</span><span class="p">,</span> <span class="n">cv</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="n">posteriors</span> <span class="o">=</span> <span class="n">cross_val_predict</span><span class="p">(</span><span class="n">optim</span><span class="o">.</span><span class="n">best_estimator_</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">cv</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">&#39;predict_proba&#39;</span><span class="p">)</span>
<span class="n">posteriors_generator</span> <span class="o">=</span> <span class="n">optim</span><span class="o">.</span><span class="n">best_estimator_</span><span class="o">.</span><span class="n">predict_proba</span>
<span class="k">return</span> <span class="n">posteriors</span><span class="p">,</span> <span class="n">posteriors_generator</span>
<span class="k">def</span> <span class="nf">_ds_policy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">test</span><span class="p">):</span>
<span class="n">test_posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">post_proba_fn</span><span class="p">(</span><span class="n">test</span><span class="p">)</span>
<span class="n">test_distribution</span> <span class="o">=</span> <span class="n">get_probability_distribution</span><span class="p">(</span><span class="n">test_posteriors</span><span class="p">)</span>
<span class="n">tr_distributions</span> <span class="o">=</span> <span class="p">[</span><span class="n">m</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">]</span>
<span class="n">dist</span> <span class="o">=</span> <span class="p">[</span><span class="n">F</span><span class="o">.</span><span class="n">HellingerDistance</span><span class="p">(</span><span class="n">tr_dist_i</span><span class="p">,</span> <span class="n">test_distribution</span><span class="p">)</span> <span class="k">for</span> <span class="n">tr_dist_i</span> <span class="ow">in</span> <span class="n">tr_distributions</span><span class="p">]</span>
<span class="n">order</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">dist</span><span class="p">)</span>
<span class="k">return</span> <span class="n">_select_k</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">order</span><span class="p">,</span> <span class="n">k</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">red_size</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">aggregative</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Indicates that the quantifier is not aggregative.</span>
<span class="sd"> :return: False</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">probabilistic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Indicates that the quantifier is not probabilistic.</span>
<span class="sd"> :return: False</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="kc">False</span></div>
<div class="viewcode-block" id="get_probability_distribution">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.get_probability_distribution">[docs]</a>
<span class="k">def</span> <span class="nf">get_probability_distribution</span><span class="p">(</span><span class="n">posterior_probabilities</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="mi">8</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Gets a histogram out of the posterior probabilities (only for the binary case).</span>
<span class="sd"> :param posterior_probabilities: array-like of shape `(n_instances, 2,)`</span>
<span class="sd"> :param bins: integer</span>
<span class="sd"> :return: `np.ndarray` with the relative frequencies for each bin (for the positive class only)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">posterior_probabilities</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="mi">2</span><span class="p">,</span> <span class="s1">&#39;the posterior probabilities do not seem to be for a binary problem&#39;</span>
<span class="n">posterior_probabilities</span> <span class="o">=</span> <span class="n">posterior_probabilities</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">]</span> <span class="c1"># take the positive posteriors only</span>
<span class="n">distribution</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">posterior_probabilities</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="n">bins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">density</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">return</span> <span class="n">distribution</span></div>
<span class="k">def</span> <span class="nf">_select_k</span><span class="p">(</span><span class="n">elements</span><span class="p">,</span> <span class="n">order</span><span class="p">,</span> <span class="n">k</span><span class="p">):</span>
<span class="k">return</span> <span class="p">[</span><span class="n">elements</span><span class="p">[</span><span class="n">idx</span><span class="p">]</span> <span class="k">for</span> <span class="n">idx</span> <span class="ow">in</span> <span class="n">order</span><span class="p">[:</span><span class="n">k</span><span class="p">]]</span>
<span class="k">def</span> <span class="nf">_delayed_new_instance</span><span class="p">(</span><span class="n">args</span><span class="p">):</span>
<span class="n">base_quantifier</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">val_split</span><span class="p">,</span> <span class="n">prev</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">,</span> <span class="n">keep_samples</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="n">sample_size</span> <span class="o">=</span> <span class="n">args</span>
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="se">\t</span><span class="s1">fit-start for prev </span><span class="si">{</span><span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">prev</span><span class="p">)</span><span class="si">}</span><span class="s1">, sample_size=</span><span class="si">{</span><span class="n">sample_size</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="n">base_quantifier</span><span class="p">)</span>
<span class="k">if</span> <span class="n">val_split</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">val_split</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
<span class="k">assert</span> <span class="mi">0</span> <span class="o">&lt;</span> <span class="n">val_split</span> <span class="o">&lt;</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;val_split should be in (0,1)&#39;</span>
<span class="n">data</span><span class="p">,</span> <span class="n">val_split</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="mi">1</span> <span class="o">-</span> <span class="n">val_split</span><span class="p">)</span>
<span class="n">sample_index</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="n">sample_size</span><span class="p">,</span> <span class="o">*</span><span class="n">prev</span><span class="p">)</span>
<span class="n">sample</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">sample_index</span><span class="p">)</span>
<span class="k">if</span> <span class="n">val_split</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">sample</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="n">val_split</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">sample</span><span class="p">)</span>
<span class="n">tr_prevalence</span> <span class="o">=</span> <span class="n">sample</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
<span class="n">tr_distribution</span> <span class="o">=</span> <span class="n">get_probability_distribution</span><span class="p">(</span><span class="n">posteriors</span><span class="p">[</span><span class="n">sample_index</span><span class="p">])</span> <span class="k">if</span> <span class="p">(</span><span class="n">posteriors</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="k">else</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="se">\t</span><span class="s1">\--fit-ended for prev </span><span class="si">{</span><span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">prev</span><span class="p">)</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">tr_prevalence</span><span class="p">,</span> <span class="n">tr_distribution</span><span class="p">,</span> <span class="n">sample</span> <span class="k">if</span> <span class="n">keep_samples</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_delayed_quantify</span><span class="p">(</span><span class="n">args</span><span class="p">):</span>
<span class="n">quantifier</span><span class="p">,</span> <span class="n">instances</span> <span class="o">=</span> <span class="n">args</span>
<span class="k">return</span> <span class="n">quantifier</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_draw_simplex</span><span class="p">(</span><span class="n">ndim</span><span class="p">,</span> <span class="n">min_val</span><span class="p">,</span> <span class="n">max_trials</span><span class="o">=</span><span class="mi">100</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns a uniform sampling from the ndim-dimensional simplex but guarantees that all dimensions</span>
<span class="sd"> are &gt;= min_class_prev (for min_val&gt;0, this makes the sampling not truly uniform)</span>
<span class="sd"> :param ndim: number of dimensions of the simplex</span>
<span class="sd"> :param min_val: minimum class prevalence allowed. If less than 1/ndim a ValueError will be throw since</span>
<span class="sd"> there is no possible solution.</span>
<span class="sd"> :return: a sample from the ndim-dimensional simplex that is uniform in S(ndim)-R where S(ndim) is the simplex</span>
<span class="sd"> and R is the simplex subset containing dimensions lower than min_val</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">min_val</span> <span class="o">&gt;=</span> <span class="mi">1</span> <span class="o">/</span> <span class="n">ndim</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;no sample can be draw from the </span><span class="si">{</span><span class="n">ndim</span><span class="si">}</span><span class="s1">-dimensional simplex so that &#39;</span>
<span class="sa">f</span><span class="s1">&#39;all its values are &gt;=</span><span class="si">{</span><span class="n">min_val</span><span class="si">}</span><span class="s1"> (try with a larger value for min_pos)&#39;</span><span class="p">)</span>
<span class="n">trials</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
<span class="n">u</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">uniform_simplex_sampling</span><span class="p">(</span><span class="n">ndim</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">all</span><span class="p">(</span><span class="n">u</span> <span class="o">&gt;=</span> <span class="n">min_val</span><span class="p">):</span>
<span class="k">return</span> <span class="n">u</span>
<span class="n">trials</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">if</span> <span class="n">trials</span> <span class="o">&gt;=</span> <span class="n">max_trials</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;it looks like finding a random simplex with all its dimensions being&#39;</span>
<span class="sa">f</span><span class="s1">&#39;&gt;= </span><span class="si">{</span><span class="n">min_val</span><span class="si">}</span><span class="s1"> is unlikely (it failed after </span><span class="si">{</span><span class="n">max_trials</span><span class="si">}</span><span class="s1"> trials)&#39;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_instantiate_ensemble</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">base_quantifier_class</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_model_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="k">if</span> <span class="n">optim</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">base_quantifier_class</span><span class="p">(</span><span class="n">classifier</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">optim</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">CLASSIFICATION_ERROR</span><span class="p">:</span>
<span class="k">if</span> <span class="n">optim</span> <span class="o">==</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">f1e</span><span class="p">:</span>
<span class="n">scoring</span> <span class="o">=</span> <span class="n">make_scorer</span><span class="p">(</span><span class="n">f1_score</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">optim</span> <span class="o">==</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">acce</span><span class="p">:</span>
<span class="n">scoring</span> <span class="o">=</span> <span class="n">make_scorer</span><span class="p">(</span><span class="n">accuracy_score</span><span class="p">)</span>
<span class="n">classifier</span> <span class="o">=</span> <span class="n">GridSearchCV</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">scoring</span><span class="o">=</span><span class="n">scoring</span><span class="p">)</span>
<span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">base_quantifier_class</span><span class="p">(</span><span class="n">classifier</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">GridSearchQ</span><span class="p">(</span><span class="n">base_quantifier_class</span><span class="p">(</span><span class="n">classifier</span><span class="p">),</span>
<span class="n">param_grid</span><span class="o">=</span><span class="n">param_grid</span><span class="p">,</span>
<span class="o">**</span><span class="n">param_model_sel</span><span class="p">,</span>
<span class="n">error</span><span class="o">=</span><span class="n">optim</span><span class="p">)</span>
<span class="k">return</span> <span class="n">Ensemble</span><span class="p">(</span><span class="n">base_quantifier</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_check_error</span><span class="p">(</span><span class="n">error</span><span class="p">):</span>
<span class="k">if</span> <span class="n">error</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">error</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR</span> <span class="ow">or</span> <span class="n">error</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">CLASSIFICATION_ERROR</span><span class="p">:</span>
<span class="k">return</span> <span class="n">error</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">error</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">return</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">error</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;unexpected error type; must either be a callable function or a str representing</span><span class="se">\n</span><span class="s1">&#39;</span>
<span class="sa">f</span><span class="s1">&#39;the name of an error function in </span><span class="si">{</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">ERROR_NAMES</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<div class="viewcode-block" id="ensembleFactory">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.ensembleFactory">[docs]</a>
<span class="k">def</span> <span class="nf">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">base_quantifier_class</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_model_sel</span><span class="p">:</span> <span class="nb">dict</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Ensemble factory. Provides a unified interface for instantiating ensembles that can be optimized (via model</span>
<span class="sd"> selection for quantification) for a given evaluation metric using :class:`quapy.model_selection.GridSearchQ`.</span>
<span class="sd"> If the evaluation metric is classification-oriented</span>
<span class="sd"> (instead of quantification-oriented), then the optimization will be carried out via sklearn&#39;s</span>
<span class="sd"> `GridSearchCV &lt;https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html&gt;`_.</span>
<span class="sd"> Example to instantiate an :class:`Ensemble` based on :class:`quapy.method.aggregative.PACC`</span>
<span class="sd"> in which the base members are optimized for :meth:`quapy.error.mae` via</span>
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`. The ensemble follows the policy `Accuracy` based</span>
<span class="sd"> on :meth:`quapy.error.mae` (the same measure being optimized),</span>
<span class="sd"> meaning that a static selection of members of the ensemble is made based on their performance</span>
<span class="sd"> in terms of this error.</span>
<span class="sd"> &gt;&gt;&gt; param_grid = {</span>
<span class="sd"> &gt;&gt;&gt; &#39;C&#39;: np.logspace(-3,3,7),</span>
<span class="sd"> &gt;&gt;&gt; &#39;class_weight&#39;: [&#39;balanced&#39;, None]</span>
<span class="sd"> &gt;&gt;&gt; }</span>
<span class="sd"> &gt;&gt;&gt; param_mod_sel = {</span>
<span class="sd"> &gt;&gt;&gt; &#39;sample_size&#39;: 500,</span>
<span class="sd"> &gt;&gt;&gt; &#39;protocol&#39;: &#39;app&#39;</span>
<span class="sd"> &gt;&gt;&gt; }</span>
<span class="sd"> &gt;&gt;&gt; common={</span>
<span class="sd"> &gt;&gt;&gt; &#39;max_sample_size&#39;: 1000,</span>
<span class="sd"> &gt;&gt;&gt; &#39;n_jobs&#39;: -1,</span>
<span class="sd"> &gt;&gt;&gt; &#39;param_grid&#39;: param_grid,</span>
<span class="sd"> &gt;&gt;&gt; &#39;param_mod_sel&#39;: param_mod_sel,</span>
<span class="sd"> &gt;&gt;&gt; }</span>
<span class="sd"> &gt;&gt;&gt;</span>
<span class="sd"> &gt;&gt;&gt; ensembleFactory(LogisticRegression(), PACC, optim=&#39;mae&#39;, policy=&#39;mae&#39;, **common)</span>
<span class="sd"> :param classifier: sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> :param base_quantifier_class: a class of quantifiers</span>
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">optim</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="n">param_grid</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;param_grid is None but optim was requested.&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">param_model_sel</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;param_model_sel is None but optim was requested.&#39;</span><span class="p">)</span>
<span class="n">error</span> <span class="o">=</span> <span class="n">_check_error</span><span class="p">(</span><span class="n">optim</span><span class="p">)</span>
<span class="k">return</span> <span class="n">_instantiate_ensemble</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">base_quantifier_class</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">error</span><span class="p">,</span> <span class="n">param_model_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
<div class="viewcode-block" id="ECC">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.ECC">[docs]</a>
<span class="k">def</span> <span class="nf">ECC</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Implements an ensemble of :class:`quapy.method.aggregative.CC` quantifiers, as used by</span>
<span class="sd"> `Pérez-Gállego et al., 2019 &lt;https://www.sciencedirect.com/science/article/pii/S1566253517303652&gt;`_.</span>
<span class="sd"> Equivalent to:</span>
<span class="sd"> &gt;&gt;&gt; ensembleFactory(classifier, CC, param_grid, optim, param_mod_sel, **kwargs)</span>
<span class="sd"> See :meth:`ensembleFactory` for further details.</span>
<span class="sd"> :param classifier: sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">CC</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
<div class="viewcode-block" id="EACC">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.EACC">[docs]</a>
<span class="k">def</span> <span class="nf">EACC</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Implements an ensemble of :class:`quapy.method.aggregative.ACC` quantifiers, as used by</span>
<span class="sd"> `Pérez-Gállego et al., 2019 &lt;https://www.sciencedirect.com/science/article/pii/S1566253517303652&gt;`_.</span>
<span class="sd"> Equivalent to:</span>
<span class="sd"> &gt;&gt;&gt; ensembleFactory(classifier, ACC, param_grid, optim, param_mod_sel, **kwargs)</span>
<span class="sd"> See :meth:`ensembleFactory` for further details.</span>
<span class="sd"> :param classifier: sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">ACC</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
<div class="viewcode-block" id="EPACC">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.EPACC">[docs]</a>
<span class="k">def</span> <span class="nf">EPACC</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Implements an ensemble of :class:`quapy.method.aggregative.PACC` quantifiers.</span>
<span class="sd"> Equivalent to:</span>
<span class="sd"> &gt;&gt;&gt; ensembleFactory(classifier, PACC, param_grid, optim, param_mod_sel, **kwargs)</span>
<span class="sd"> See :meth:`ensembleFactory` for further details.</span>
<span class="sd"> :param classifier: sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">PACC</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
<div class="viewcode-block" id="EHDy">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.EHDy">[docs]</a>
<span class="k">def</span> <span class="nf">EHDy</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Implements an ensemble of :class:`quapy.method.aggregative.HDy` quantifiers, as used by</span>
<span class="sd"> `Pérez-Gállego et al., 2019 &lt;https://www.sciencedirect.com/science/article/pii/S1566253517303652&gt;`_.</span>
<span class="sd"> Equivalent to:</span>
<span class="sd"> &gt;&gt;&gt; ensembleFactory(classifier, HDy, param_grid, optim, param_mod_sel, **kwargs)</span>
<span class="sd"> See :meth:`ensembleFactory` for further details.</span>
<span class="sd"> :param classifier: sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">HDy</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
<div class="viewcode-block" id="EEMQ">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.EEMQ">[docs]</a>
<span class="k">def</span> <span class="nf">EEMQ</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Implements an ensemble of :class:`quapy.method.aggregative.EMQ` quantifiers.</span>
<span class="sd"> Equivalent to:</span>
<span class="sd"> &gt;&gt;&gt; ensembleFactory(classifier, EMQ, param_grid, optim, param_mod_sel, **kwargs)</span>
<span class="sd"> See :meth:`ensembleFactory` for further details.</span>
<span class="sd"> :param classifier: sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">EMQ</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,286 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.method.non_aggregative &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.method.non_aggregative</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.method.non_aggregative</h1><div class="highlight"><pre>
<span></span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Callable</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">quapy.functional</span> <span class="kn">import</span> <span class="n">get_divergence</span>
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span><span class="p">,</span> <span class="n">BinaryQuantifier</span>
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
<div class="viewcode-block" id="MaximumLikelihoodPrevalenceEstimation">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation">[docs]</a>
<span class="k">class</span> <span class="nc">MaximumLikelihoodPrevalenceEstimation</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The `Maximum Likelihood Prevalence Estimation` (MLPE) method is a lazy method that assumes there is no prior</span>
<span class="sd"> probability shift between training and test instances (put it other way, that the i.i.d. assumpion holds).</span>
<span class="sd"> The estimation of class prevalence values for any test sample is always (i.e., irrespective of the test sample</span>
<span class="sd"> itself) the class prevalence seen during training. This method is considered to be a lower-bound quantifier that</span>
<span class="sd"> any quantification method should beat.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span> <span class="o">=</span> <span class="kc">None</span>
<div class="viewcode-block" id="MaximumLikelihoodPrevalenceEstimation.fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.fit">[docs]</a>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Computes the training prevalence and stores it.</span>
<span class="sd"> :param data: the training sample</span>
<span class="sd"> :return: self</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">estimated_prevalence</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="MaximumLikelihoodPrevalenceEstimation.quantify">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.quantify">[docs]</a>
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Ignores the input instances and returns, as the class prevalence estimantes, the training prevalence.</span>
<span class="sd"> :param instances: array-like (ignored)</span>
<span class="sd"> :return: the class prevalence seen during training</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">estimated_prevalence</span></div>
</div>
<div class="viewcode-block" id="DMx">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.DMx">[docs]</a>
<span class="k">class</span> <span class="nc">DMx</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of covariates.</span>
<span class="sd"> This implementation takes the number of bins, the divergence, and the possibility to work on CDF as hyperparameters.</span>
<span class="sd"> :param nbins: number of bins used to discretize the distributions (default 8)</span>
<span class="sd"> :param divergence: a string representing a divergence measure (currently, &quot;HD&quot; and &quot;topsoe&quot; are implemented)</span>
<span class="sd"> or a callable function taking two ndarrays of the same dimension as input (default &quot;HD&quot;, meaning Hellinger</span>
<span class="sd"> Distance)</span>
<span class="sd"> :param cdf: whether to use CDF instead of PDF (default False)</span>
<span class="sd"> :param n_jobs: number of parallel workers (default None)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">nbins</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">divergence</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">]</span><span class="o">=</span><span class="s1">&#39;HD&#39;</span><span class="p">,</span> <span class="n">cdf</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">search</span><span class="o">=</span><span class="s1">&#39;optim_minimize&#39;</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">nbins</span> <span class="o">=</span> <span class="n">nbins</span>
<span class="bp">self</span><span class="o">.</span><span class="n">divergence</span> <span class="o">=</span> <span class="n">divergence</span>
<span class="bp">self</span><span class="o">.</span><span class="n">cdf</span> <span class="o">=</span> <span class="n">cdf</span>
<span class="bp">self</span><span class="o">.</span><span class="n">search</span> <span class="o">=</span> <span class="n">search</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
<div class="viewcode-block" id="DMx.HDx">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.DMx.HDx">[docs]</a>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">HDx</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> `Hellinger Distance x &lt;https://www.sciencedirect.com/science/article/pii/S0020025512004069&gt;`_ (HDx).</span>
<span class="sd"> HDx is a method for training binary quantifiers, that models quantification as the problem of</span>
<span class="sd"> minimizing the average divergence (in terms of the Hellinger Distance) across the feature-specific normalized</span>
<span class="sd"> histograms of two representations, one for the unlabelled examples, and another generated from the training</span>
<span class="sd"> examples as a mixture model of the class-specific representations. The parameters of the mixture thus represent</span>
<span class="sd"> the estimates of the class prevalence values.</span>
<span class="sd"> The method computes all matchings for nbins in [10, 20, ..., 110] and reports the mean of the median.</span>
<span class="sd"> The best prevalence is searched via linear search, from 0 to 1 stepping by 0.01.</span>
<span class="sd"> :param n_jobs: number of parallel workers</span>
<span class="sd"> :return: an instance of this class setup to mimick the performance of the HDx as originally proposed by</span>
<span class="sd"> González-Castro, Alaiz-Rodríguez, Alegre (2013)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">quapy.method.meta</span> <span class="kn">import</span> <span class="n">MedianEstimator</span>
<span class="n">dmx</span> <span class="o">=</span> <span class="n">DMx</span><span class="p">(</span><span class="n">divergence</span><span class="o">=</span><span class="s1">&#39;HD&#39;</span><span class="p">,</span> <span class="n">cdf</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">search</span><span class="o">=</span><span class="s1">&#39;linear_search&#39;</span><span class="p">)</span>
<span class="n">nbins</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;nbins&#39;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">110</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">)}</span>
<span class="n">hdx</span> <span class="o">=</span> <span class="n">MedianEstimator</span><span class="p">(</span><span class="n">base_quantifier</span><span class="o">=</span><span class="n">dmx</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="n">nbins</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">)</span>
<span class="k">return</span> <span class="n">hdx</span></div>
<span class="k">def</span> <span class="nf">__get_distributions</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
<span class="n">histograms</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">feat_idx</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nfeats</span><span class="p">):</span>
<span class="n">feature</span> <span class="o">=</span> <span class="n">X</span><span class="p">[:,</span> <span class="n">feat_idx</span><span class="p">]</span>
<span class="n">feat_range</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">feat_ranges</span><span class="p">[</span><span class="n">feat_idx</span><span class="p">]</span>
<span class="n">hist</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">feature</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">nbins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="n">feat_range</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">norm_hist</span> <span class="o">=</span> <span class="n">hist</span> <span class="o">/</span> <span class="n">hist</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="n">histograms</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">norm_hist</span><span class="p">)</span>
<span class="n">distributions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">vstack</span><span class="p">(</span><span class="n">histograms</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cdf</span><span class="p">:</span>
<span class="n">distributions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">cumsum</span><span class="p">(</span><span class="n">distributions</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="k">return</span> <span class="n">distributions</span>
<div class="viewcode-block" id="DMx.fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.DMx.fit">[docs]</a>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Generates the validation distributions out of the training data (covariates).</span>
<span class="sd"> The validation distributions have shape `(n, nfeats, nbins)`, with `n` the number of classes, `nfeats`</span>
<span class="sd"> the number of features, and `nbins` the number of bins.</span>
<span class="sd"> In particular, let `V` be the validation distributions; then `di=V[i]` are the distributions obtained from</span>
<span class="sd"> training data labelled with class `i`; while `dij = di[j]` is the discrete distribution for feature j in</span>
<span class="sd"> training data labelled with class `i`, and `dij[k]` is the fraction of instances with a value in the `k`-th bin.</span>
<span class="sd"> :param data: the training set</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">Xy</span>
<span class="bp">self</span><span class="o">.</span><span class="n">nfeats</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">feat_ranges</span> <span class="o">=</span> <span class="n">_get_features_range</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">validation_distribution</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span>
<span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">__get_distributions</span><span class="p">(</span><span class="n">X</span><span class="p">[</span><span class="n">y</span><span class="o">==</span><span class="n">cat</span><span class="p">])</span> <span class="k">for</span> <span class="n">cat</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">)]</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="DMx.quantify">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.DMx.quantify">[docs]</a>
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution</span>
<span class="sd"> (the mixture) that best matches the test distribution, in terms of the divergence measure of choice.</span>
<span class="sd"> The matching is computed as the average dissimilarity (in terms of the dissimilarity measure of choice)</span>
<span class="sd"> between all feature-specific discrete distributions.</span>
<span class="sd"> :param instances: instances in the sample</span>
<span class="sd"> :return: a vector of class prevalence estimates</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">nfeats</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;wrong shape; expected </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">nfeats</span><span class="si">}</span><span class="s1">, found </span><span class="si">{</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="n">test_distribution</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__get_distributions</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
<span class="n">divergence</span> <span class="o">=</span> <span class="n">get_divergence</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">divergence</span><span class="p">)</span>
<span class="n">n_classes</span><span class="p">,</span> <span class="n">n_feats</span><span class="p">,</span> <span class="n">nbins</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">validation_distribution</span><span class="o">.</span><span class="n">shape</span>
<span class="k">def</span> <span class="nf">loss</span><span class="p">(</span><span class="n">prev</span><span class="p">):</span>
<span class="n">prev</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">expand_dims</span><span class="p">(</span><span class="n">prev</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">mixture_distribution</span> <span class="o">=</span> <span class="p">(</span><span class="n">prev</span> <span class="o">@</span> <span class="bp">self</span><span class="o">.</span><span class="n">validation_distribution</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="n">n_classes</span><span class="p">,</span><span class="o">-</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="n">n_feats</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="n">divs</span> <span class="o">=</span> <span class="p">[</span><span class="n">divergence</span><span class="p">(</span><span class="n">test_distribution</span><span class="p">[</span><span class="n">feat</span><span class="p">],</span> <span class="n">mixture_distribution</span><span class="p">[</span><span class="n">feat</span><span class="p">])</span> <span class="k">for</span> <span class="n">feat</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_feats</span><span class="p">)]</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">divs</span><span class="p">)</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">argmin_prevalence</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">search</span><span class="p">)</span></div>
</div>
<span class="k">def</span> <span class="nf">_get_features_range</span><span class="p">(</span><span class="n">X</span><span class="p">):</span>
<span class="n">feat_ranges</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">ncols</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="k">for</span> <span class="n">col_idx</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">ncols</span><span class="p">):</span>
<span class="n">feature</span> <span class="o">=</span> <span class="n">X</span><span class="p">[:,</span><span class="n">col_idx</span><span class="p">]</span>
<span class="n">feat_ranges</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">np</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">feature</span><span class="p">),</span> <span class="n">np</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">feature</span><span class="p">)))</span>
<span class="k">return</span> <span class="n">feat_ranges</span>
<span class="c1">#---------------------------------------------------------------</span>
<span class="c1"># aliases</span>
<span class="c1">#---------------------------------------------------------------</span>
<span class="n">DistributionMatchingX</span> <span class="o">=</span> <span class="n">DMx</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,554 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.model_selection &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=22607128"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.model_selection</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.model_selection</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">itertools</span>
<span class="kn">import</span> <span class="nn">signal</span>
<span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
<span class="kn">from</span> <span class="nn">enum</span> <span class="kn">import</span> <span class="n">Enum</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Callable</span>
<span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="n">wraps</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">sklearn</span> <span class="kn">import</span> <span class="n">clone</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="kn">from</span> <span class="nn">quapy</span> <span class="kn">import</span> <span class="n">evaluation</span>
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">AbstractProtocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span>
<span class="kn">from</span> <span class="nn">quapy.data.base</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span>
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">timeout</span>
<span class="kn">from</span> <span class="nn">time</span> <span class="kn">import</span> <span class="n">time</span>
<div class="viewcode-block" id="Status">
<a class="viewcode-back" href="../../quapy.html#quapy.model_selection.Status">[docs]</a>
<span class="k">class</span> <span class="nc">Status</span><span class="p">(</span><span class="n">Enum</span><span class="p">):</span>
<span class="n">SUCCESS</span> <span class="o">=</span> <span class="mi">1</span>
<span class="n">TIMEOUT</span> <span class="o">=</span> <span class="mi">2</span>
<span class="n">INVALID</span> <span class="o">=</span> <span class="mi">3</span>
<span class="n">ERROR</span> <span class="o">=</span> <span class="mi">4</span></div>
<div class="viewcode-block" id="ConfigStatus">
<a class="viewcode-back" href="../../quapy.html#quapy.model_selection.ConfigStatus">[docs]</a>
<span class="k">class</span> <span class="nc">ConfigStatus</span><span class="p">:</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">msg</span><span class="o">=</span><span class="s1">&#39;&#39;</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">params</span> <span class="o">=</span> <span class="n">params</span>
<span class="bp">self</span><span class="o">.</span><span class="n">status</span> <span class="o">=</span> <span class="n">status</span>
<span class="bp">self</span><span class="o">.</span><span class="n">msg</span> <span class="o">=</span> <span class="n">msg</span>
<span class="k">def</span> <span class="fm">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="sa">f</span><span class="s1">&#39;:params:</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="si">}</span><span class="s1"> :status:</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="si">}</span><span class="s1"> &#39;</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">msg</span>
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<div class="viewcode-block" id="ConfigStatus.success">
<a class="viewcode-back" href="../../quapy.html#quapy.model_selection.ConfigStatus.success">[docs]</a>
<span class="k">def</span> <span class="nf">success</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">status</span> <span class="o">==</span> <span class="n">Status</span><span class="o">.</span><span class="n">SUCCESS</span></div>
<div class="viewcode-block" id="ConfigStatus.failed">
<a class="viewcode-back" href="../../quapy.html#quapy.model_selection.ConfigStatus.failed">[docs]</a>
<span class="k">def</span> <span class="nf">failed</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">status</span> <span class="o">!=</span> <span class="n">Status</span><span class="o">.</span><span class="n">SUCCESS</span></div>
</div>
<div class="viewcode-block" id="GridSearchQ">
<a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ">[docs]</a>
<span class="k">class</span> <span class="nc">GridSearchQ</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Grid Search optimization targeting a quantification-oriented metric.</span>
<span class="sd"> Optimizes the hyperparameters of a quantification method, based on an evaluation method and on an evaluation</span>
<span class="sd"> protocol for quantification.</span>
<span class="sd"> :param model: the quantifier to optimize</span>
<span class="sd"> :type model: BaseQuantifier</span>
<span class="sd"> :param param_grid: a dictionary with keys the parameter names and values the list of values to explore</span>
<span class="sd"> :param protocol: a sample generation protocol, an instance of :class:`quapy.protocol.AbstractProtocol`</span>
<span class="sd"> :param error: an error function (callable) or a string indicating the name of an error function (valid ones</span>
<span class="sd"> are those in :class:`quapy.error.QUANTIFICATION_ERROR`</span>
<span class="sd"> :param refit: whether to refit the model on the whole labelled collection (training+validation) with</span>
<span class="sd"> the best chosen hyperparameter combination. Ignored if protocol=&#39;gen&#39;</span>
<span class="sd"> :param timeout: establishes a timer (in seconds) for each of the hyperparameters configurations being tested.</span>
<span class="sd"> Whenever a run takes longer than this timer, that configuration will be ignored. If all configurations end up</span>
<span class="sd"> being ignored, a TimeoutError exception is raised. If -1 (default) then no time bound is set.</span>
<span class="sd"> :param raise_errors: boolean, if True then raises an exception when a param combination yields any error, if</span>
<span class="sd"> otherwise is False (default), then the combination is marked with an error status, but the process goes on.</span>
<span class="sd"> However, if no configuration yields a valid model, then a ValueError exception will be raised.</span>
<span class="sd"> :param verbose: set to True to get information through the stdout</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">model</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
<span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span>
<span class="n">protocol</span><span class="p">:</span> <span class="n">AbstractProtocol</span><span class="p">,</span>
<span class="n">error</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Callable</span><span class="p">,</span> <span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">,</span>
<span class="n">refit</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">timeout</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">raise_errors</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">model</span> <span class="o">=</span> <span class="n">model</span>
<span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span> <span class="o">=</span> <span class="n">param_grid</span>
<span class="bp">self</span><span class="o">.</span><span class="n">protocol</span> <span class="o">=</span> <span class="n">protocol</span>
<span class="bp">self</span><span class="o">.</span><span class="n">refit</span> <span class="o">=</span> <span class="n">refit</span>
<span class="bp">self</span><span class="o">.</span><span class="n">timeout</span> <span class="o">=</span> <span class="n">timeout</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">raise_errors</span> <span class="o">=</span> <span class="n">raise_errors</span>
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span>
<span class="bp">self</span><span class="o">.</span><span class="n">__check_error</span><span class="p">(</span><span class="n">error</span><span class="p">)</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">protocol</span><span class="p">,</span> <span class="n">AbstractProtocol</span><span class="p">),</span> <span class="s1">&#39;unknown protocol&#39;</span>
<span class="k">def</span> <span class="nf">_sout</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">msg</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;[</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">:</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">]: </span><span class="si">{</span><span class="n">msg</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">__check_error</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">error</span><span class="p">):</span>
<span class="k">if</span> <span class="n">error</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">error</span> <span class="o">=</span> <span class="n">error</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">error</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">error</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">error</span><span class="p">,</span> <span class="s1">&#39;__call__&#39;</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">error</span> <span class="o">=</span> <span class="n">error</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;unexpected error type; must either be a callable function or a str representing</span><span class="se">\n</span><span class="s1">&#39;</span>
<span class="sa">f</span><span class="s1">&#39;the name of an error function in </span><span class="si">{</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR_NAMES</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_prepare_classifier</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cls_params</span><span class="p">):</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">job</span><span class="p">(</span><span class="n">cls_params</span><span class="p">):</span>
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">cls_params</span><span class="p">)</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">classifier_fit_predict</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_training</span><span class="p">)</span>
<span class="k">return</span> <span class="n">predictions</span>
<span class="n">predictions</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_error_handler</span><span class="p">(</span><span class="n">job</span><span class="p">,</span> <span class="n">cls_params</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;[classifier fit] hyperparams=</span><span class="si">{</span><span class="n">cls_params</span><span class="si">}</span><span class="s1"> [took </span><span class="si">{</span><span class="n">took</span><span class="si">:</span><span class="s1">.3f</span><span class="si">}</span><span class="s1">s]&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span>
<span class="k">def</span> <span class="nf">_prepare_aggregation</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
<span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">cls_took</span><span class="p">,</span> <span class="n">cls_params</span><span class="p">,</span> <span class="n">q_params</span> <span class="o">=</span> <span class="n">args</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
<span class="n">params</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">cls_params</span><span class="p">,</span> <span class="o">**</span><span class="n">q_params</span><span class="p">}</span>
<span class="k">def</span> <span class="nf">job</span><span class="p">(</span><span class="n">q_params</span><span class="p">):</span>
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">q_params</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">aggregation_fit</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_training</span><span class="p">)</span>
<span class="n">score</span> <span class="o">=</span> <span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">error</span><span class="p">)</span>
<span class="k">return</span> <span class="n">score</span>
<span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">aggr_took</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_error_handler</span><span class="p">(</span><span class="n">job</span><span class="p">,</span> <span class="n">q_params</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_print_status</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">aggr_took</span><span class="p">)</span>
<span class="k">return</span> <span class="n">model</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="p">(</span><span class="n">cls_took</span><span class="o">+</span><span class="n">aggr_took</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_prepare_nonaggr_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">job</span><span class="p">(</span><span class="n">params</span><span class="p">):</span>
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_training</span><span class="p">)</span>
<span class="n">score</span> <span class="o">=</span> <span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">error</span><span class="p">)</span>
<span class="k">return</span> <span class="n">score</span>
<span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_error_handler</span><span class="p">(</span><span class="n">job</span><span class="p">,</span> <span class="n">params</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_print_status</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span><span class="p">)</span>
<span class="k">return</span> <span class="n">model</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span>
<span class="k">def</span> <span class="nf">_break_down_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Decides whether to break down the fit phase in two (classifier-fit followed by aggregation-fit).</span>
<span class="sd"> In order to do so, some conditions should be met: a) the quantifier is of type aggregative,</span>
<span class="sd"> b) the set of hyperparameters can be split into two disjoint non-empty groups.</span>
<span class="sd"> :return: True if the conditions are met, False otherwise</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">):</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="n">cls_configs</span><span class="p">,</span> <span class="n">q_configs</span> <span class="o">=</span> <span class="n">group_params</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
<span class="k">if</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">cls_configs</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">)</span> <span class="ow">or</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">q_configs</span><span class="p">)</span><span class="o">==</span><span class="mi">1</span><span class="p">):</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">def</span> <span class="nf">_compute_scores_aggregative</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">):</span>
<span class="c1"># break down the set of hyperparameters into two: classifier-specific, quantifier-specific</span>
<span class="n">cls_configs</span><span class="p">,</span> <span class="n">q_configs</span> <span class="o">=</span> <span class="n">group_params</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
<span class="c1"># train all classifiers and get the predictions</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_training</span> <span class="o">=</span> <span class="n">training</span>
<span class="n">cls_outs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_prepare_classifier</span><span class="p">,</span>
<span class="n">cls_configs</span><span class="p">,</span>
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;_R_SEED&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span>
<span class="p">)</span>
<span class="c1"># filter out classifier configurations that yielded any error</span>
<span class="n">success_outs</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span><span class="p">),</span> <span class="n">cls_config</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">cls_outs</span><span class="p">,</span> <span class="n">cls_configs</span><span class="p">):</span>
<span class="k">if</span> <span class="n">status</span><span class="o">.</span><span class="n">success</span><span class="p">():</span>
<span class="n">success_outs</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">took</span><span class="p">,</span> <span class="n">cls_config</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">error_collector</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">status</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">success_outs</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;No valid configuration found for the classifier!&#39;</span><span class="p">)</span>
<span class="c1"># explore the quantifier-specific hyperparameters for each valid training configuration</span>
<span class="n">aggr_configs</span> <span class="o">=</span> <span class="p">[(</span><span class="o">*</span><span class="n">out</span><span class="p">,</span> <span class="n">q_config</span><span class="p">)</span> <span class="k">for</span> <span class="n">out</span><span class="p">,</span> <span class="n">q_config</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">product</span><span class="p">(</span><span class="n">success_outs</span><span class="p">,</span> <span class="n">q_configs</span><span class="p">)]</span>
<span class="n">aggr_outs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_prepare_aggregation</span><span class="p">,</span>
<span class="n">aggr_configs</span><span class="p">,</span>
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;_R_SEED&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">aggr_outs</span>
<span class="k">def</span> <span class="nf">_compute_scores_nonaggregative</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">):</span>
<span class="n">configs</span> <span class="o">=</span> <span class="n">expand_grid</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_training</span> <span class="o">=</span> <span class="n">training</span>
<span class="n">scores</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_prepare_nonaggr_model</span><span class="p">,</span>
<span class="n">configs</span><span class="p">,</span>
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;_R_SEED&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">scores</span>
<span class="k">def</span> <span class="nf">_print_status</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span><span class="p">):</span>
<span class="k">if</span> <span class="n">status</span><span class="o">.</span><span class="n">success</span><span class="p">():</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;hyperparams=[</span><span class="si">{</span><span class="n">params</span><span class="si">}</span><span class="s1">]</span><span class="se">\t</span><span class="s1"> got </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> = </span><span class="si">{</span><span class="n">score</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> [took </span><span class="si">{</span><span class="n">took</span><span class="si">:</span><span class="s1">.3f</span><span class="si">}</span><span class="s1">s]&#39;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;error=</span><span class="si">{</span><span class="n">status</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<div class="viewcode-block" id="GridSearchQ.fit">
<a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ.fit">[docs]</a>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot; Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing</span>
<span class="sd"> the error metric.</span>
<span class="sd"> :param training: the training set on which to optimize the hyperparameters</span>
<span class="sd"> :return: self</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">refit</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">protocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">RuntimeWarning</span><span class="p">(</span>
<span class="sa">f</span><span class="s1">&#39;&quot;refit&quot; was requested, but the protocol does not implement &#39;</span>
<span class="sa">f</span><span class="s1">&#39;the </span><span class="si">{</span><span class="n">OnLabelledCollectionProtocol</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> interface&#39;</span>
<span class="p">)</span>
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">error_collector</span> <span class="o">=</span> <span class="p">[]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;starting model selection with n_jobs=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_break_down_fit</span><span class="p">():</span>
<span class="n">results</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_scores_aggregative</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">results</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_scores_nonaggregative</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">param_scores_</span> <span class="o">=</span> <span class="p">{}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">for</span> <span class="n">model</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span> <span class="ow">in</span> <span class="n">results</span><span class="p">:</span>
<span class="k">if</span> <span class="n">status</span><span class="o">.</span><span class="n">success</span><span class="p">():</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">score</span> <span class="o">&lt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span> <span class="o">=</span> <span class="n">score</span>
<span class="bp">self</span><span class="o">.</span><span class="n">best_params_</span> <span class="o">=</span> <span class="n">params</span>
<span class="bp">self</span><span class="o">.</span><span class="n">best_model_</span> <span class="o">=</span> <span class="n">model</span>
<span class="bp">self</span><span class="o">.</span><span class="n">param_scores_</span><span class="p">[</span><span class="nb">str</span><span class="p">(</span><span class="n">params</span><span class="p">)]</span> <span class="o">=</span> <span class="n">score</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">param_scores_</span><span class="p">[</span><span class="nb">str</span><span class="p">(</span><span class="n">params</span><span class="p">)]</span> <span class="o">=</span> <span class="n">status</span><span class="o">.</span><span class="n">status</span>
<span class="bp">self</span><span class="o">.</span><span class="n">error_collector</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">status</span><span class="p">)</span>
<span class="n">tend</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span><span class="o">-</span><span class="n">tinit</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;no combination of hyperparameters seemed to work&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;optimization finished: best params </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">best_params_</span><span class="si">}</span><span class="s1"> (score=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1">) &#39;</span>
<span class="sa">f</span><span class="s1">&#39;[took </span><span class="si">{</span><span class="n">tend</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">s]&#39;</span><span class="p">)</span>
<span class="n">no_errors</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">error_collector</span><span class="p">)</span>
<span class="k">if</span> <span class="n">no_errors</span><span class="o">&gt;</span><span class="mi">0</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;warning: </span><span class="si">{</span><span class="n">no_errors</span><span class="si">}</span><span class="s1"> errors found&#39;</span><span class="p">)</span>
<span class="k">for</span> <span class="n">err</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">error_collector</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="se">\t</span><span class="si">{</span><span class="nb">str</span><span class="p">(</span><span class="n">err</span><span class="p">)</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">refit</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">protocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;refitting on the whole development set&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">best_model_</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">protocol</span><span class="o">.</span><span class="n">get_labelled_collection</span><span class="p">())</span>
<span class="n">tend</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">tinit</span>
<span class="bp">self</span><span class="o">.</span><span class="n">refit_time_</span> <span class="o">=</span> <span class="n">tend</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># already checked</span>
<span class="k">raise</span> <span class="ne">RuntimeWarning</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;the model cannot be refit on the whole dataset&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="GridSearchQ.quantify">
<a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ.quantify">[docs]</a>
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Estimate class prevalence values using the best model found after calling the :meth:`fit` method.</span>
<span class="sd"> :param instances: sample contanining the instances</span>
<span class="sd"> :return: a ndarray of shape `(n_classes)` with class prevalence estimates as according to the best model found</span>
<span class="sd"> by the model selection process.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">&#39;best_model_&#39;</span><span class="p">),</span> <span class="s1">&#39;quantify called before fit&#39;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_model</span><span class="p">()</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span></div>
<div class="viewcode-block" id="GridSearchQ.set_params">
<a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ.set_params">[docs]</a>
<span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">parameters</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Sets the hyper-parameters to explore.</span>
<span class="sd"> :param parameters: a dictionary with keys the parameter names and values the list of values to explore</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span> <span class="o">=</span> <span class="n">parameters</span></div>
<div class="viewcode-block" id="GridSearchQ.get_params">
<a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ.get_params">[docs]</a>
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Returns the dictionary of hyper-parameters to explore (`param_grid`)</span>
<span class="sd"> :param deep: Unused</span>
<span class="sd"> :return: the dictionary `param_grid`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span></div>
<div class="viewcode-block" id="GridSearchQ.best_model">
<a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ.best_model">[docs]</a>
<span class="k">def</span> <span class="nf">best_model</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the best model found after calling the :meth:`fit` method, i.e., the one trained on the combination</span>
<span class="sd"> of hyper-parameters that minimized the error function.</span>
<span class="sd"> :return: a trained quantifier</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">&#39;best_model_&#39;</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_model_</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;best_model called before fit&#39;</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_error_handler</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Endorses one job with two returned values: the status, and the time of execution</span>
<span class="sd"> :param func: the function to be called</span>
<span class="sd"> :param params: parameters of the function</span>
<span class="sd"> :return: `tuple(out, status, time)` where `out` is the function output,</span>
<span class="sd"> `status` is an enum value from `Status`, and `time` is the time it</span>
<span class="sd"> took to complete the call</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">output</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">def</span> <span class="nf">_handle</span><span class="p">(</span><span class="n">status</span><span class="p">,</span> <span class="n">exception</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">raise_errors</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">exception</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">ConfigStatus</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">status</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">with</span> <span class="n">timeout</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">timeout</span><span class="p">):</span>
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
<span class="n">output</span> <span class="o">=</span> <span class="n">func</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
<span class="n">status</span> <span class="o">=</span> <span class="n">ConfigStatus</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">Status</span><span class="o">.</span><span class="n">SUCCESS</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">TimeoutError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">status</span> <span class="o">=</span> <span class="n">_handle</span><span class="p">(</span><span class="n">Status</span><span class="o">.</span><span class="n">TIMEOUT</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">ValueError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">status</span> <span class="o">=</span> <span class="n">_handle</span><span class="p">(</span><span class="n">Status</span><span class="o">.</span><span class="n">INVALID</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">status</span> <span class="o">=</span> <span class="n">_handle</span><span class="p">(</span><span class="n">Status</span><span class="o">.</span><span class="n">ERROR</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="n">took</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">tinit</span>
<span class="k">return</span> <span class="n">output</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span></div>
<div class="viewcode-block" id="cross_val_predict">
<a class="viewcode-back" href="../../quapy.html#quapy.model_selection.cross_val_predict">[docs]</a>
<span class="k">def</span> <span class="nf">cross_val_predict</span><span class="p">(</span><span class="n">quantifier</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">nfolds</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Akin to `scikit-learn&#39;s cross_val_predict &lt;https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_predict.html&gt;`_</span>
<span class="sd"> but for quantification.</span>
<span class="sd"> :param quantifier: a quantifier issuing class prevalence values</span>
<span class="sd"> :param data: a labelled collection</span>
<span class="sd"> :param nfolds: number of folds for k-fold cross validation generation</span>
<span class="sd"> :param random_state: random seed for reproducibility</span>
<span class="sd"> :return: a vector of class prevalence values</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">total_prev</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">)</span>
<span class="k">for</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="ow">in</span> <span class="n">data</span><span class="o">.</span><span class="n">kFCV</span><span class="p">(</span><span class="n">nfolds</span><span class="o">=</span><span class="n">nfolds</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">):</span>
<span class="n">quantifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">)</span>
<span class="n">fold_prev</span> <span class="o">=</span> <span class="n">quantifier</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">X</span><span class="p">)</span>
<span class="n">rel_size</span> <span class="o">=</span> <span class="mf">1.</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">test</span><span class="p">)</span> <span class="o">/</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="n">total_prev</span> <span class="o">+=</span> <span class="n">fold_prev</span><span class="o">*</span><span class="n">rel_size</span>
<span class="k">return</span> <span class="n">total_prev</span></div>
<div class="viewcode-block" id="expand_grid">
<a class="viewcode-back" href="../../quapy.html#quapy.model_selection.expand_grid">[docs]</a>
<span class="k">def</span> <span class="nf">expand_grid</span><span class="p">(</span><span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Expands a param_grid dictionary as a list of configurations.</span>
<span class="sd"> Example:</span>
<span class="sd"> &gt;&gt;&gt; combinations = expand_grid({&#39;A&#39;: [1, 10, 100], &#39;B&#39;: [True, False]})</span>
<span class="sd"> &gt;&gt;&gt; print(combinations)</span>
<span class="sd"> &gt;&gt;&gt; [{&#39;A&#39;: 1, &#39;B&#39;: True}, {&#39;A&#39;: 1, &#39;B&#39;: False}, {&#39;A&#39;: 10, &#39;B&#39;: True}, {&#39;A&#39;: 10, &#39;B&#39;: False}, {&#39;A&#39;: 100, &#39;B&#39;: True}, {&#39;A&#39;: 100, &#39;B&#39;: False}]</span>
<span class="sd"> :param param_grid: dictionary with keys representing hyper-parameter names, and values representing the range</span>
<span class="sd"> to explore for that hyper-parameter</span>
<span class="sd"> :return: a list of configurations, i.e., combinations of hyper-parameter assignments in the grid.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">params_keys</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">param_grid</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
<span class="n">params_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">param_grid</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
<span class="n">configs</span> <span class="o">=</span> <span class="p">[{</span><span class="n">k</span><span class="p">:</span> <span class="n">combs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">k</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">params_keys</span><span class="p">)}</span> <span class="k">for</span> <span class="n">combs</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">product</span><span class="p">(</span><span class="o">*</span><span class="n">params_values</span><span class="p">)]</span>
<span class="k">return</span> <span class="n">configs</span></div>
<div class="viewcode-block" id="group_params">
<a class="viewcode-back" href="../../quapy.html#quapy.model_selection.group_params">[docs]</a>
<span class="k">def</span> <span class="nf">group_params</span><span class="p">(</span><span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Partitions a param_grid dictionary as two lists of configurations, one for the classifier-specific</span>
<span class="sd"> hyper-parameters, and another for que quantifier-specific hyper-parameters</span>
<span class="sd"> :param param_grid: dictionary with keys representing hyper-parameter names, and values representing the range</span>
<span class="sd"> to explore for that hyper-parameter</span>
<span class="sd"> :return: two expanded grids of configurations, one for the classifier, another for the quantifier</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">classifier_params</span><span class="p">,</span> <span class="n">quantifier_params</span> <span class="o">=</span> <span class="p">{},</span> <span class="p">{}</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">values</span> <span class="ow">in</span> <span class="n">param_grid</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="n">key</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">&#39;classifier__&#39;</span><span class="p">)</span> <span class="ow">or</span> <span class="n">key</span> <span class="o">==</span> <span class="s1">&#39;val_split&#39;</span><span class="p">:</span>
<span class="n">classifier_params</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">values</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">quantifier_params</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">values</span>
<span class="n">classifier_configs</span> <span class="o">=</span> <span class="n">expand_grid</span><span class="p">(</span><span class="n">classifier_params</span><span class="p">)</span>
<span class="n">quantifier_configs</span> <span class="o">=</span> <span class="n">expand_grid</span><span class="p">(</span><span class="n">quantifier_params</span><span class="p">)</span>
<span class="k">return</span> <span class="n">classifier_configs</span><span class="p">,</span> <span class="n">quantifier_configs</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,687 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.plot &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=22607128"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.plot</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.plot</h1><div class="highlight"><pre>
<span></span><span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">defaultdict</span>
<span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span>
<span class="kn">from</span> <span class="nn">matplotlib.cm</span> <span class="kn">import</span> <span class="n">get_cmap</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">matplotlib</span> <span class="kn">import</span> <span class="n">cm</span>
<span class="kn">from</span> <span class="nn">scipy.stats</span> <span class="kn">import</span> <span class="n">ttest_ind_from_stats</span>
<span class="kn">from</span> <span class="nn">matplotlib.ticker</span> <span class="kn">import</span> <span class="n">ScalarFormatter</span>
<span class="kn">import</span> <span class="nn">math</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="n">plt</span><span class="o">.</span><span class="n">rcParams</span><span class="p">[</span><span class="s1">&#39;figure.figsize&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="mi">10</span><span class="p">,</span> <span class="mi">6</span><span class="p">]</span>
<span class="n">plt</span><span class="o">.</span><span class="n">rcParams</span><span class="p">[</span><span class="s1">&#39;figure.dpi&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="mi">200</span>
<span class="n">plt</span><span class="o">.</span><span class="n">rcParams</span><span class="p">[</span><span class="s1">&#39;font.size&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="mi">18</span>
<div class="viewcode-block" id="binary_diagonal">
<a class="viewcode-back" href="../../quapy.html#quapy.plot.binary_diagonal">[docs]</a>
<span class="k">def</span> <span class="nf">binary_diagonal</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">show_std</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">legend</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">train_prev</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">savepath</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">method_order</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The diagonal plot displays the predicted prevalence values (along the y-axis) as a function of the true prevalence</span>
<span class="sd"> values (along the x-axis). The optimal quantifier is described by the diagonal (0,0)-(1,1) of the plot (hence the</span>
<span class="sd"> name). It is convenient for binary quantification problems, though it can be used for multiclass problems by</span>
<span class="sd"> indicating which class is to be taken as the positive class. (For multiclass quantification problems, other plots</span>
<span class="sd"> like the :meth:`error_by_drift` might be preferable though).</span>
<span class="sd"> :param method_names: array-like with the method names for each experiment</span>
<span class="sd"> :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for</span>
<span class="sd"> each experiment</span>
<span class="sd"> :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)</span>
<span class="sd"> for each experiment</span>
<span class="sd"> :param pos_class: index of the positive class</span>
<span class="sd"> :param title: the title to be displayed in the plot</span>
<span class="sd"> :param show_std: whether or not to show standard deviations (represented by color bands). This might be inconvenient</span>
<span class="sd"> for cases in which many methods are compared, or when the standard deviations are high -- default True)</span>
<span class="sd"> :param legend: whether or not to display the leyend (default True)</span>
<span class="sd"> :param train_prev: if indicated (default is None), the training prevalence (for the positive class) is hightlighted</span>
<span class="sd"> in the plot. This is convenient when all the experiments have been conducted in the same dataset.</span>
<span class="sd"> :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.</span>
<span class="sd"> :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,</span>
<span class="sd"> listed in the legend and associated with matplotlib colors).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">fig</span><span class="p">,</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">()</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_aspect</span><span class="p">(</span><span class="s1">&#39;equal&#39;</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">grid</span><span class="p">()</span>
<span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="s1">&#39;--k&#39;</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s1">&#39;ideal&#39;</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">_merge</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">)</span>
<span class="n">order</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">))</span>
<span class="k">if</span> <span class="n">method_order</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">table</span> <span class="o">=</span> <span class="p">{</span><span class="n">method_name</span><span class="p">:[</span><span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span><span class="p">]</span> <span class="k">for</span> <span class="n">method_name</span><span class="p">,</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="n">order</span><span class="p">}</span>
<span class="n">order</span> <span class="o">=</span> <span class="p">[(</span><span class="n">method_name</span><span class="p">,</span> <span class="o">*</span><span class="n">table</span><span class="p">[</span><span class="n">method_name</span><span class="p">])</span> <span class="k">for</span> <span class="n">method_name</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">]</span>
<span class="n">NUM_COLORS</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">method_names</span><span class="p">)</span>
<span class="k">if</span> <span class="n">NUM_COLORS</span><span class="o">&gt;</span><span class="mi">10</span><span class="p">:</span>
<span class="n">cm</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">get_cmap</span><span class="p">(</span><span class="s1">&#39;tab20&#39;</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_prop_cycle</span><span class="p">(</span><span class="n">color</span><span class="o">=</span><span class="p">[</span><span class="n">cm</span><span class="p">(</span><span class="mf">1.</span> <span class="o">*</span> <span class="n">i</span> <span class="o">/</span> <span class="n">NUM_COLORS</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">NUM_COLORS</span><span class="p">)])</span>
<span class="k">for</span> <span class="n">method</span><span class="p">,</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="n">order</span><span class="p">:</span>
<span class="n">true_prev</span> <span class="o">=</span> <span class="n">true_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
<span class="n">estim_prev</span> <span class="o">=</span> <span class="n">estim_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
<span class="n">x_ticks</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">true_prev</span><span class="p">)</span>
<span class="n">x_ticks</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
<span class="n">y_ave</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="n">estim_prev</span><span class="p">[</span><span class="n">true_prev</span> <span class="o">==</span> <span class="n">x</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">x_ticks</span><span class="p">])</span>
<span class="n">y_std</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="n">estim_prev</span><span class="p">[</span><span class="n">true_prev</span> <span class="o">==</span> <span class="n">x</span><span class="p">]</span><span class="o">.</span><span class="n">std</span><span class="p">()</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">x_ticks</span><span class="p">])</span>
<span class="n">ax</span><span class="o">.</span><span class="n">errorbar</span><span class="p">(</span><span class="n">x_ticks</span><span class="p">,</span> <span class="n">y_ave</span><span class="p">,</span> <span class="n">fmt</span><span class="o">=</span><span class="s1">&#39;-&#39;</span><span class="p">,</span> <span class="n">marker</span><span class="o">=</span><span class="s1">&#39;o&#39;</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="n">method</span><span class="p">,</span> <span class="n">markersize</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<span class="k">if</span> <span class="n">show_std</span><span class="p">:</span>
<span class="n">ax</span><span class="o">.</span><span class="n">fill_between</span><span class="p">(</span><span class="n">x_ticks</span><span class="p">,</span> <span class="n">y_ave</span> <span class="o">-</span> <span class="n">y_std</span><span class="p">,</span> <span class="n">y_ave</span> <span class="o">+</span> <span class="n">y_std</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.25</span><span class="p">)</span>
<span class="k">if</span> <span class="n">train_prev</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">train_prev</span> <span class="o">=</span> <span class="n">train_prev</span><span class="p">[</span><span class="n">pos_class</span><span class="p">]</span>
<span class="n">ax</span><span class="o">.</span><span class="n">scatter</span><span class="p">(</span><span class="n">train_prev</span><span class="p">,</span> <span class="n">train_prev</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s1">&#39;tr-prev&#39;</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">edgecolor</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">,</span> <span class="n">s</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">xlabel</span><span class="o">=</span><span class="s1">&#39;true prevalence&#39;</span><span class="p">,</span> <span class="n">ylabel</span><span class="o">=</span><span class="s1">&#39;estimated prevalence&#39;</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="n">title</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="n">legend</span><span class="p">:</span>
<span class="n">ax</span><span class="o">.</span><span class="n">legend</span><span class="p">(</span><span class="n">loc</span><span class="o">=</span><span class="s1">&#39;center left&#39;</span><span class="p">,</span> <span class="n">bbox_to_anchor</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">))</span>
<span class="c1"># box = ax.get_position()</span>
<span class="c1"># ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])</span>
<span class="c1"># ax.legend(loc=&#39;lower center&#39;,</span>
<span class="c1"># bbox_to_anchor=(1, -0.5),</span>
<span class="c1"># ncol=(len(method_names)+1)//2)</span>
<span class="n">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span></div>
<div class="viewcode-block" id="binary_bias_global">
<a class="viewcode-back" href="../../quapy.html#quapy.plot.binary_bias_global">[docs]</a>
<span class="k">def</span> <span class="nf">binary_bias_global</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">savepath</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Box-plots displaying the global bias (i.e., signed error computed as the estimated value minus the true value)</span>
<span class="sd"> for each quantification method with respect to a given positive class.</span>
<span class="sd"> :param method_names: array-like with the method names for each experiment</span>
<span class="sd"> :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for</span>
<span class="sd"> each experiment</span>
<span class="sd"> :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)</span>
<span class="sd"> for each experiment</span>
<span class="sd"> :param pos_class: index of the positive class</span>
<span class="sd"> :param title: the title to be displayed in the plot</span>
<span class="sd"> :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">_merge</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">)</span>
<span class="n">fig</span><span class="p">,</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">()</span>
<span class="n">ax</span><span class="o">.</span><span class="n">grid</span><span class="p">()</span>
<span class="n">data</span><span class="p">,</span> <span class="n">labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">method</span><span class="p">,</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">):</span>
<span class="n">true_prev</span> <span class="o">=</span> <span class="n">true_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
<span class="n">estim_prev</span> <span class="o">=</span> <span class="n">estim_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
<span class="n">data</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">estim_prev</span><span class="o">-</span><span class="n">true_prev</span><span class="p">)</span>
<span class="n">labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">method</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">boxplot</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="n">labels</span><span class="p">,</span> <span class="n">patch_artist</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">showmeans</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">xticks</span><span class="p">(</span><span class="n">rotation</span><span class="o">=</span><span class="mi">45</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">ylabel</span><span class="o">=</span><span class="s1">&#39;error bias&#39;</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="n">title</span><span class="p">)</span>
<span class="n">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span></div>
<div class="viewcode-block" id="binary_bias_bins">
<a class="viewcode-back" href="../../quapy.html#quapy.plot.binary_bias_bins">[docs]</a>
<span class="k">def</span> <span class="nf">binary_bias_bins</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">nbins</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">colormap</span><span class="o">=</span><span class="n">cm</span><span class="o">.</span><span class="n">tab10</span><span class="p">,</span>
<span class="n">vertical_xticks</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">legend</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">savepath</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Box-plots displaying the local bias (i.e., signed error computed as the estimated value minus the true value)</span>
<span class="sd"> for different bins of (true) prevalence of the positive classs, for each quantification method.</span>
<span class="sd"> :param method_names: array-like with the method names for each experiment</span>
<span class="sd"> :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for</span>
<span class="sd"> each experiment</span>
<span class="sd"> :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)</span>
<span class="sd"> for each experiment</span>
<span class="sd"> :param pos_class: index of the positive class</span>
<span class="sd"> :param title: the title to be displayed in the plot</span>
<span class="sd"> :param nbins: number of bins</span>
<span class="sd"> :param colormap: the matplotlib colormap to use (default cm.tab10)</span>
<span class="sd"> :param vertical_xticks: whether or not to add secondary grid (default is False)</span>
<span class="sd"> :param legend: whether or not to display the legend (default is True)</span>
<span class="sd"> :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">pylab</span> <span class="kn">import</span> <span class="n">boxplot</span><span class="p">,</span> <span class="n">plot</span><span class="p">,</span> <span class="n">setp</span>
<span class="n">fig</span><span class="p">,</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">()</span>
<span class="n">ax</span><span class="o">.</span><span class="n">grid</span><span class="p">()</span>
<span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">_merge</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">)</span>
<span class="n">bins</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">nbins</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
<span class="n">binwidth</span> <span class="o">=</span> <span class="mi">1</span><span class="o">/</span><span class="n">nbins</span>
<span class="n">data</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">for</span> <span class="n">method</span><span class="p">,</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">):</span>
<span class="n">true_prev</span> <span class="o">=</span> <span class="n">true_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
<span class="n">estim_prev</span> <span class="o">=</span> <span class="n">estim_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">inds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">digitize</span><span class="p">(</span><span class="n">true_prev</span><span class="p">,</span> <span class="n">bins</span><span class="p">,</span> <span class="n">right</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">for</span> <span class="n">ind</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">)):</span>
<span class="n">selected</span> <span class="o">=</span> <span class="n">inds</span><span class="o">==</span><span class="n">ind</span>
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">estim_prev</span><span class="p">[</span><span class="n">selected</span><span class="p">]</span> <span class="o">-</span> <span class="n">true_prev</span><span class="p">[</span><span class="n">selected</span><span class="p">])</span>
<span class="n">nmethods</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">method_names</span><span class="p">)</span>
<span class="n">boxwidth</span> <span class="o">=</span> <span class="n">binwidth</span><span class="o">/</span><span class="p">(</span><span class="n">nmethods</span><span class="o">+</span><span class="mi">4</span><span class="p">)</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span><span class="nb">bin</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">bins</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]):</span>
<span class="n">boxdata</span> <span class="o">=</span> <span class="p">[</span><span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">method</span> <span class="ow">in</span> <span class="n">method_names</span><span class="p">]</span>
<span class="n">positions</span> <span class="o">=</span> <span class="p">[</span><span class="nb">bin</span><span class="o">+</span><span class="p">(</span><span class="n">i</span><span class="o">*</span><span class="n">boxwidth</span><span class="p">)</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">boxwidth</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span><span class="n">_</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">method_names</span><span class="p">)]</span>
<span class="n">box</span> <span class="o">=</span> <span class="n">boxplot</span><span class="p">(</span><span class="n">boxdata</span><span class="p">,</span> <span class="n">showmeans</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">positions</span><span class="o">=</span><span class="n">positions</span><span class="p">,</span> <span class="n">widths</span> <span class="o">=</span> <span class="n">boxwidth</span><span class="p">,</span> <span class="n">sym</span><span class="o">=</span><span class="s1">&#39;+&#39;</span><span class="p">,</span> <span class="n">patch_artist</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">for</span> <span class="n">boxid</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">method_names</span><span class="p">)):</span>
<span class="n">c</span> <span class="o">=</span> <span class="n">colormap</span><span class="o">.</span><span class="n">colors</span><span class="p">[</span><span class="n">boxid</span><span class="o">%</span><span class="nb">len</span><span class="p">(</span><span class="n">colormap</span><span class="o">.</span><span class="n">colors</span><span class="p">)]</span>
<span class="n">setp</span><span class="p">(</span><span class="n">box</span><span class="p">[</span><span class="s1">&#39;fliers&#39;</span><span class="p">][</span><span class="n">boxid</span><span class="p">],</span> <span class="n">color</span><span class="o">=</span><span class="n">c</span><span class="p">,</span> <span class="n">marker</span><span class="o">=</span><span class="s1">&#39;+&#39;</span><span class="p">,</span> <span class="n">markersize</span><span class="o">=</span><span class="mf">3.</span><span class="p">,</span> <span class="n">markeredgecolor</span><span class="o">=</span><span class="n">c</span><span class="p">)</span>
<span class="n">setp</span><span class="p">(</span><span class="n">box</span><span class="p">[</span><span class="s1">&#39;boxes&#39;</span><span class="p">][</span><span class="n">boxid</span><span class="p">],</span> <span class="n">color</span><span class="o">=</span><span class="n">c</span><span class="p">)</span>
<span class="n">setp</span><span class="p">(</span><span class="n">box</span><span class="p">[</span><span class="s1">&#39;medians&#39;</span><span class="p">][</span><span class="n">boxid</span><span class="p">],</span> <span class="n">color</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">)</span>
<span class="n">major_xticks_positions</span><span class="p">,</span> <span class="n">minor_xticks_positions</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
<span class="n">major_xticks_labels</span><span class="p">,</span> <span class="n">minor_xticks_labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span><span class="n">b</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">bins</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]):</span>
<span class="n">major_xticks_positions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">b</span><span class="p">)</span>
<span class="n">minor_xticks_positions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">b</span> <span class="o">+</span> <span class="n">binwidth</span> <span class="o">/</span> <span class="mi">2</span><span class="p">)</span>
<span class="n">major_xticks_labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s1">&#39;&#39;</span><span class="p">)</span>
<span class="n">minor_xticks_labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;[</span><span class="si">{</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">-</span><span class="si">{</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">)&#39;</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_xticks</span><span class="p">(</span><span class="n">major_xticks_positions</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_xticks</span><span class="p">(</span><span class="n">minor_xticks_positions</span><span class="p">,</span> <span class="n">minor</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_xticklabels</span><span class="p">(</span><span class="n">major_xticks_labels</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_xticklabels</span><span class="p">(</span><span class="n">minor_xticks_labels</span><span class="p">,</span> <span class="n">minor</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">rotation</span><span class="o">=</span><span class="s1">&#39;vertical&#39;</span> <span class="k">if</span> <span class="n">vertical_xticks</span> <span class="k">else</span> <span class="s1">&#39;horizontal&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">vertical_xticks</span><span class="p">:</span>
<span class="c1"># Pad margins so that markers don&#39;t get clipped by the axes</span>
<span class="n">plt</span><span class="o">.</span><span class="n">margins</span><span class="p">(</span><span class="mf">0.2</span><span class="p">)</span>
<span class="c1"># Tweak spacing to prevent clipping of tick-labels</span>
<span class="n">plt</span><span class="o">.</span><span class="n">subplots_adjust</span><span class="p">(</span><span class="n">bottom</span><span class="o">=</span><span class="mf">0.15</span><span class="p">)</span>
<span class="k">if</span> <span class="n">legend</span><span class="p">:</span>
<span class="c1"># adds the legend to the list hs, initialized with the &quot;ideal&quot; quantifier (one that has 0 bias across all bins. i.e.</span>
<span class="c1"># a line from (0,0) to (1,0). The other elements are simply labelled dot-plots that are to be removed (setting</span>
<span class="c1"># set_visible to False for all but the first element) after the legend has been placed</span>
<span class="n">hs</span><span class="o">=</span><span class="p">[</span><span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;-k&#39;</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">2</span><span class="p">)[</span><span class="mi">0</span><span class="p">]]</span>
<span class="k">for</span> <span class="n">colorid</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">method_names</span><span class="p">)):</span>
<span class="n">color</span><span class="o">=</span><span class="n">colormap</span><span class="o">.</span><span class="n">colors</span><span class="p">[</span><span class="n">colorid</span> <span class="o">%</span> <span class="nb">len</span><span class="p">(</span><span class="n">colormap</span><span class="o">.</span><span class="n">colors</span><span class="p">)]</span>
<span class="n">h</span><span class="p">,</span> <span class="o">=</span> <span class="n">plot</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;-s&#39;</span><span class="p">,</span> <span class="n">markerfacecolor</span><span class="o">=</span><span class="n">color</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">,</span><span class="n">mec</span><span class="o">=</span><span class="n">color</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mf">1.</span><span class="p">)</span>
<span class="n">hs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">h</span><span class="p">)</span>
<span class="n">box</span> <span class="o">=</span> <span class="n">ax</span><span class="o">.</span><span class="n">get_position</span><span class="p">()</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_position</span><span class="p">([</span><span class="n">box</span><span class="o">.</span><span class="n">x0</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">y0</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">width</span> <span class="o">*</span> <span class="mf">0.8</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">height</span><span class="p">])</span>
<span class="n">ax</span><span class="o">.</span><span class="n">legend</span><span class="p">(</span><span class="n">hs</span><span class="p">,</span> <span class="p">[</span><span class="s1">&#39;ideal&#39;</span><span class="p">]</span><span class="o">+</span><span class="n">method_names</span><span class="p">,</span> <span class="n">loc</span><span class="o">=</span><span class="s1">&#39;center left&#39;</span><span class="p">,</span> <span class="n">bbox_to_anchor</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">))</span>
<span class="p">[</span><span class="n">h</span><span class="o">.</span><span class="n">set_visible</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span> <span class="k">for</span> <span class="n">h</span> <span class="ow">in</span> <span class="n">hs</span><span class="p">[</span><span class="mi">1</span><span class="p">:]]</span>
<span class="c1"># x-axis and y-axis labels and limits</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">xlabel</span><span class="o">=</span><span class="s1">&#39;prevalence&#39;</span><span class="p">,</span> <span class="n">ylabel</span><span class="o">=</span><span class="s1">&#39;error bias&#39;</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="n">title</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span></div>
<div class="viewcode-block" id="error_by_drift">
<a class="viewcode-back" href="../../quapy.html#quapy.plot.error_by_drift">[docs]</a>
<span class="k">def</span> <span class="nf">error_by_drift</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">,</span>
<span class="n">n_bins</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">error_name</span><span class="o">=</span><span class="s1">&#39;ae&#39;</span><span class="p">,</span> <span class="n">show_std</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">show_density</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">show_legend</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">logscale</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">title</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;Quantification error as a function of distribution shift&#39;</span><span class="p">,</span>
<span class="n">vlines</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">method_order</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">savepath</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Plots the error (along the x-axis, as measured in terms of `error_name`) as a function of the train-test shift</span>
<span class="sd"> (along the y-axis, as measured in terms of :meth:`quapy.error.ae`). This plot is useful especially for multiclass</span>
<span class="sd"> problems, in which &quot;diagonal plots&quot; may be cumbersone, and in order to gain understanding about how methods</span>
<span class="sd"> fare in different regions of the prior probability shift spectrum (e.g., in the low-shift regime vs. in the</span>
<span class="sd"> high-shift regime).</span>
<span class="sd"> :param method_names: array-like with the method names for each experiment</span>
<span class="sd"> :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for</span>
<span class="sd"> each experiment</span>
<span class="sd"> :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)</span>
<span class="sd"> for each experiment</span>
<span class="sd"> :param tr_prevs: training prevalence of each experiment</span>
<span class="sd"> :param n_bins: number of bins in which the y-axis is to be divided (default is 20)</span>
<span class="sd"> :param error_name: a string representing the name of an error function (as defined in `quapy.error`, default is &quot;ae&quot;)</span>
<span class="sd"> :param show_std: whether or not to show standard deviations as color bands (default is False)</span>
<span class="sd"> :param show_density: whether or not to display the distribution of experiments for each bin (default is True)</span>
<span class="sd"> :param show_density: whether or not to display the legend of the chart (default is True)</span>
<span class="sd"> :param logscale: whether or not to log-scale the y-error measure (default is False)</span>
<span class="sd"> :param title: title of the plot (default is &quot;Quantification error as a function of distribution shift&quot;)</span>
<span class="sd"> :param vlines: array-like list of values (default is None). If indicated, highlights some regions of the space</span>
<span class="sd"> using vertical dotted lines.</span>
<span class="sd"> :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,</span>
<span class="sd"> listed in the legend and associated with matplotlib colors).</span>
<span class="sd"> :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">fig</span><span class="p">,</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">()</span>
<span class="n">ax</span><span class="o">.</span><span class="n">grid</span><span class="p">()</span>
<span class="n">x_error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">ae</span>
<span class="n">y_error</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="p">,</span> <span class="n">error_name</span><span class="p">)</span>
<span class="c1"># get all data as a dictionary {&#39;m&#39;:{&#39;x&#39;:ndarray, &#39;y&#39;:ndarray}} where &#39;m&#39; is a method name (in the same</span>
<span class="c1"># order as in method_order (if specified), and where &#39;x&#39; are the train-test shifts (computed as according to</span>
<span class="c1"># x_error function) and &#39;y&#39; is the estim-test shift (computed as according to y_error)</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">_join_data_by_drift</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">,</span> <span class="n">x_error</span><span class="p">,</span> <span class="n">y_error</span><span class="p">,</span> <span class="n">method_order</span><span class="p">)</span>
<span class="k">if</span> <span class="n">method_order</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">method_order</span> <span class="o">=</span> <span class="n">method_names</span>
<span class="n">_set_colors</span><span class="p">(</span><span class="n">ax</span><span class="p">,</span> <span class="n">n_methods</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="n">method_order</span><span class="p">))</span>
<span class="n">bins</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">n_bins</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
<span class="n">binwidth</span> <span class="o">=</span> <span class="mi">1</span> <span class="o">/</span> <span class="n">n_bins</span>
<span class="n">min_x</span><span class="p">,</span> <span class="n">max_x</span><span class="p">,</span> <span class="n">min_y</span><span class="p">,</span> <span class="n">max_y</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span>
<span class="n">npoints</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
<span class="k">for</span> <span class="n">method</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">:</span>
<span class="n">tr_test_drifts</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">&#39;x&#39;</span><span class="p">]</span>
<span class="n">method_drifts</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">&#39;y&#39;</span><span class="p">]</span>
<span class="k">if</span> <span class="n">logscale</span><span class="p">:</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_yscale</span><span class="p">(</span><span class="s2">&quot;log&quot;</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">yaxis</span><span class="o">.</span><span class="n">set_major_formatter</span><span class="p">(</span><span class="n">ScalarFormatter</span><span class="p">())</span>
<span class="n">ax</span><span class="o">.</span><span class="n">yaxis</span><span class="o">.</span><span class="n">get_major_formatter</span><span class="p">()</span><span class="o">.</span><span class="n">set_scientific</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">minorticks_off</span><span class="p">()</span>
<span class="n">inds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">digitize</span><span class="p">(</span><span class="n">tr_test_drifts</span><span class="p">,</span> <span class="n">bins</span><span class="p">,</span> <span class="n">right</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">xs</span><span class="p">,</span> <span class="n">ys</span><span class="p">,</span> <span class="n">ystds</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[],</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">p</span><span class="p">,</span><span class="n">ind</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">))):</span>
<span class="n">selected</span> <span class="o">=</span> <span class="n">inds</span><span class="o">==</span><span class="n">ind</span>
<span class="k">if</span> <span class="n">selected</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">xs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">ind</span><span class="o">*</span><span class="n">binwidth</span><span class="o">-</span><span class="n">binwidth</span><span class="o">/</span><span class="mi">2</span><span class="p">)</span>
<span class="n">ys</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">method_drifts</span><span class="p">[</span><span class="n">selected</span><span class="p">]))</span>
<span class="n">ystds</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">std</span><span class="p">(</span><span class="n">method_drifts</span><span class="p">[</span><span class="n">selected</span><span class="p">]))</span>
<span class="n">npoints</span><span class="p">[</span><span class="n">p</span><span class="p">]</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">method_drifts</span><span class="p">[</span><span class="n">selected</span><span class="p">])</span>
<span class="n">xs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">xs</span><span class="p">)</span>
<span class="n">ys</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">ys</span><span class="p">)</span>
<span class="n">ystds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">ystds</span><span class="p">)</span>
<span class="n">min_x_method</span><span class="p">,</span> <span class="n">max_x_method</span><span class="p">,</span> <span class="n">min_y_method</span><span class="p">,</span> <span class="n">max_y_method</span> <span class="o">=</span> <span class="n">xs</span><span class="o">.</span><span class="n">min</span><span class="p">(),</span> <span class="n">xs</span><span class="o">.</span><span class="n">max</span><span class="p">(),</span> <span class="n">ys</span><span class="o">.</span><span class="n">min</span><span class="p">(),</span> <span class="n">ys</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
<span class="n">min_x</span> <span class="o">=</span> <span class="n">min_x_method</span> <span class="k">if</span> <span class="n">min_x</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">min_x_method</span> <span class="o">&lt;</span> <span class="n">min_x</span> <span class="k">else</span> <span class="n">min_x</span>
<span class="n">max_x</span> <span class="o">=</span> <span class="n">max_x_method</span> <span class="k">if</span> <span class="n">max_x</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">max_x_method</span> <span class="o">&gt;</span> <span class="n">max_x</span> <span class="k">else</span> <span class="n">max_x</span>
<span class="n">max_y</span> <span class="o">=</span> <span class="n">max_y_method</span> <span class="k">if</span> <span class="n">max_y</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">max_y_method</span> <span class="o">&gt;</span> <span class="n">max_y</span> <span class="k">else</span> <span class="n">max_y</span>
<span class="n">min_y</span> <span class="o">=</span> <span class="n">min_y_method</span> <span class="k">if</span> <span class="n">min_y</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">min_y_method</span> <span class="o">&lt;</span> <span class="n">min_y</span> <span class="k">else</span> <span class="n">min_y</span>
<span class="n">max_y</span> <span class="o">=</span> <span class="n">max_y_method</span> <span class="k">if</span> <span class="n">max_y</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">max_y_method</span> <span class="o">&gt;</span> <span class="n">max_y</span> <span class="k">else</span> <span class="n">max_y</span>
<span class="n">ax</span><span class="o">.</span><span class="n">errorbar</span><span class="p">(</span><span class="n">xs</span><span class="p">,</span> <span class="n">ys</span><span class="p">,</span> <span class="n">fmt</span><span class="o">=</span><span class="s1">&#39;-&#39;</span><span class="p">,</span> <span class="n">marker</span><span class="o">=</span><span class="s1">&#39;o&#39;</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">&#39;w&#39;</span><span class="p">,</span> <span class="n">markersize</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">errorbar</span><span class="p">(</span><span class="n">xs</span><span class="p">,</span> <span class="n">ys</span><span class="p">,</span> <span class="n">fmt</span><span class="o">=</span><span class="s1">&#39;-&#39;</span><span class="p">,</span> <span class="n">marker</span><span class="o">=</span><span class="s1">&#39;o&#39;</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="n">method</span><span class="p">,</span> <span class="n">markersize</span><span class="o">=</span><span class="mi">6</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<span class="k">if</span> <span class="n">show_std</span><span class="p">:</span>
<span class="n">ax</span><span class="o">.</span><span class="n">fill_between</span><span class="p">(</span><span class="n">xs</span><span class="p">,</span> <span class="n">ys</span><span class="o">-</span><span class="n">ystds</span><span class="p">,</span> <span class="n">ys</span><span class="o">+</span><span class="n">ystds</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.25</span><span class="p">)</span>
<span class="k">if</span> <span class="n">show_density</span><span class="p">:</span>
<span class="n">ax2</span> <span class="o">=</span> <span class="n">ax</span><span class="o">.</span><span class="n">twinx</span><span class="p">()</span>
<span class="n">densities</span> <span class="o">=</span> <span class="n">npoints</span><span class="o">/</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">npoints</span><span class="p">)</span>
<span class="n">ax2</span><span class="o">.</span><span class="n">bar</span><span class="p">([</span><span class="n">ind</span> <span class="o">*</span> <span class="n">binwidth</span><span class="o">-</span><span class="n">binwidth</span><span class="o">/</span><span class="mi">2</span> <span class="k">for</span> <span class="n">ind</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">))],</span>
<span class="n">densities</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.15</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">&#39;g&#39;</span><span class="p">,</span> <span class="n">width</span><span class="o">=</span><span class="n">binwidth</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s1">&#39;density&#39;</span><span class="p">)</span>
<span class="n">ax2</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="nb">max</span><span class="p">(</span><span class="n">densities</span><span class="p">))</span>
<span class="n">ax2</span><span class="o">.</span><span class="n">spines</span><span class="p">[</span><span class="s1">&#39;right&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">set_color</span><span class="p">(</span><span class="s1">&#39;g&#39;</span><span class="p">)</span>
<span class="n">ax2</span><span class="o">.</span><span class="n">tick_params</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">&#39;y&#39;</span><span class="p">,</span> <span class="n">colors</span><span class="o">=</span><span class="s1">&#39;g&#39;</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">xlabel</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;Distribution shift between training set and test sample&#39;</span><span class="p">,</span>
<span class="n">ylabel</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">error_name</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span><span class="si">}</span><span class="s1"> (true distribution, predicted distribution)&#39;</span><span class="p">,</span>
<span class="n">title</span><span class="o">=</span><span class="n">title</span><span class="p">)</span>
<span class="n">box</span> <span class="o">=</span> <span class="n">ax</span><span class="o">.</span><span class="n">get_position</span><span class="p">()</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_position</span><span class="p">([</span><span class="n">box</span><span class="o">.</span><span class="n">x0</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">y0</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">width</span> <span class="o">*</span> <span class="mf">0.8</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">height</span><span class="p">])</span>
<span class="k">if</span> <span class="n">vlines</span><span class="p">:</span>
<span class="k">for</span> <span class="n">vline</span> <span class="ow">in</span> <span class="n">vlines</span><span class="p">:</span>
<span class="n">ax</span><span class="o">.</span><span class="n">axvline</span><span class="p">(</span><span class="n">vline</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">linestyle</span><span class="o">=</span><span class="s1">&#39;--&#39;</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="n">min_x</span><span class="p">,</span> <span class="n">max_x</span><span class="p">)</span>
<span class="k">if</span> <span class="n">logscale</span><span class="p">:</span>
<span class="c1">#nice scale for the logaritmic axis</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="mi">10</span> <span class="o">**</span> <span class="n">math</span><span class="o">.</span><span class="n">ceil</span><span class="p">(</span><span class="n">math</span><span class="o">.</span><span class="n">log10</span><span class="p">(</span><span class="n">max_y</span><span class="p">)))</span>
<span class="k">if</span> <span class="n">show_legend</span><span class="p">:</span>
<span class="n">fig</span><span class="o">.</span><span class="n">legend</span><span class="p">(</span><span class="n">loc</span><span class="o">=</span><span class="s1">&#39;lower center&#39;</span><span class="p">,</span>
<span class="n">bbox_to_anchor</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">),</span>
<span class="n">ncol</span><span class="o">=</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">method_names</span><span class="p">)</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span><span class="o">//</span><span class="mi">2</span><span class="p">)</span>
<span class="n">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span></div>
<div class="viewcode-block" id="brokenbar_supremacy_by_drift">
<a class="viewcode-back" href="../../quapy.html#quapy.plot.brokenbar_supremacy_by_drift">[docs]</a>
<span class="k">def</span> <span class="nf">brokenbar_supremacy_by_drift</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">,</span>
<span class="n">n_bins</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">binning</span><span class="o">=</span><span class="s1">&#39;isomerous&#39;</span><span class="p">,</span>
<span class="n">x_error</span><span class="o">=</span><span class="s1">&#39;ae&#39;</span><span class="p">,</span> <span class="n">y_error</span><span class="o">=</span><span class="s1">&#39;ae&#39;</span><span class="p">,</span> <span class="n">ttest_alpha</span><span class="o">=</span><span class="mf">0.005</span><span class="p">,</span> <span class="n">tail_density_threshold</span><span class="o">=</span><span class="mf">0.005</span><span class="p">,</span>
<span class="n">method_order</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">savepath</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Displays (only) the top performing methods for different regions of the train-test shift in form of a broken</span>
<span class="sd"> bar chart, in which each method has bars only for those regions in which either one of the following conditions</span>
<span class="sd"> hold: (i) it is the best method (in average) for the bin, or (ii) it is not statistically significantly different</span>
<span class="sd"> (in average) as according to a two-sided t-test on independent samples at confidence `ttest_alpha`.</span>
<span class="sd"> The binning can be made &quot;isometric&quot; (same size), or &quot;isomerous&quot; (same number of experiments -- default). A second</span>
<span class="sd"> plot is displayed on top, that displays the distribution of experiments for each bin (when binning=&quot;isometric&quot;) or</span>
<span class="sd"> the percentiles points of the distribution (when binning=&quot;isomerous&quot;).</span>
<span class="sd"> :param method_names: array-like with the method names for each experiment</span>
<span class="sd"> :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for</span>
<span class="sd"> each experiment</span>
<span class="sd"> :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)</span>
<span class="sd"> for each experiment</span>
<span class="sd"> :param tr_prevs: training prevalence of each experiment</span>
<span class="sd"> :param n_bins: number of bins in which the y-axis is to be divided (default is 20)</span>
<span class="sd"> :param binning: type of binning, either &quot;isomerous&quot; (default) or &quot;isometric&quot;</span>
<span class="sd"> :param x_error: a string representing the name of an error function (as defined in `quapy.error`) to be used for</span>
<span class="sd"> measuring the amount of train-test shift (default is &quot;ae&quot;)</span>
<span class="sd"> :param y_error: a string representing the name of an error function (as defined in `quapy.error`) to be used for</span>
<span class="sd"> measuring the amount of error in the prevalence estimations (default is &quot;ae&quot;)</span>
<span class="sd"> :param ttest_alpha: the confidence interval above which a p-value (two-sided t-test on independent samples) is</span>
<span class="sd"> to be considered as an indicator that the two means are not statistically significantly different. Default is</span>
<span class="sd"> 0.005, meaning that a `p-value &gt; 0.005` indicates the two methods involved are to be considered similar</span>
<span class="sd"> :param tail_density_threshold: sets a threshold on the density of experiments (over the total number of experiments)</span>
<span class="sd"> below which a bin in the tail (i.e., the right-most ones) will be discarded. This is in order to avoid some</span>
<span class="sd"> bins to be shown for train-test outliers.</span>
<span class="sd"> :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,</span>
<span class="sd"> listed in the legend and associated with matplotlib colors).</span>
<span class="sd"> :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.</span>
<span class="sd"> :return:</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">binning</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;isomerous&#39;</span><span class="p">,</span> <span class="s1">&#39;isometric&#39;</span><span class="p">],</span> <span class="s1">&#39;unknown binning type; valid types are &quot;isomerous&quot; and &quot;isometric&quot;&#39;</span>
<span class="n">x_error</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="p">,</span> <span class="n">x_error</span><span class="p">)</span>
<span class="n">y_error</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="p">,</span> <span class="n">y_error</span><span class="p">)</span>
<span class="c1"># get all data as a dictionary {&#39;m&#39;:{&#39;x&#39;:ndarray, &#39;y&#39;:ndarray}} where &#39;m&#39; is a method name (in the same</span>
<span class="c1"># order as in method_order (if specified), and where &#39;x&#39; are the train-test shifts (computed as according to</span>
<span class="c1"># x_error function) and &#39;y&#39; is the estim-test shift (computed as according to y_error)</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">_join_data_by_drift</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">,</span> <span class="n">x_error</span><span class="p">,</span> <span class="n">y_error</span><span class="p">,</span> <span class="n">method_order</span><span class="p">)</span>
<span class="k">if</span> <span class="n">method_order</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">method_order</span> <span class="o">=</span> <span class="n">method_names</span>
<span class="k">if</span> <span class="n">binning</span> <span class="o">==</span> <span class="s1">&#39;isomerous&#39;</span><span class="p">:</span>
<span class="c1"># take bins containing the same amount of examples</span>
<span class="n">tr_test_drifts</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">data</span><span class="p">[</span><span class="n">m</span><span class="p">][</span><span class="s1">&#39;x&#39;</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">])</span>
<span class="n">bins</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">quantile</span><span class="p">(</span><span class="n">tr_test_drifts</span><span class="p">,</span> <span class="n">q</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">n_bins</span><span class="o">+</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># take equidistant bins</span>
<span class="n">bins</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">n_bins</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
<span class="n">bins</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="o">-</span><span class="mf">0.001</span>
<span class="n">bins</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">+=</span> <span class="mf">0.001</span>
<span class="c1"># we use this to keep track of how many datapoits contribute to each bin</span>
<span class="n">inds_histogram_global</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">n_bins</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
<span class="n">n_methods</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">method_order</span><span class="p">)</span>
<span class="n">buckets</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">n_methods</span><span class="p">,</span> <span class="n">n_bins</span><span class="p">,</span> <span class="mi">3</span><span class="p">))</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">method</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">method_order</span><span class="p">):</span>
<span class="n">tr_test_drifts</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">&#39;x&#39;</span><span class="p">]</span>
<span class="n">method_drifts</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">&#39;y&#39;</span><span class="p">]</span>
<span class="n">inds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">digitize</span><span class="p">(</span><span class="n">tr_test_drifts</span><span class="p">,</span> <span class="n">bins</span><span class="p">,</span> <span class="n">right</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="n">inds_histogram_global</span> <span class="o">+=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">tr_test_drifts</span><span class="p">,</span> <span class="n">density</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="n">bins</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">)):</span>
<span class="n">selected</span> <span class="o">=</span> <span class="n">inds</span> <span class="o">==</span> <span class="n">j</span>
<span class="k">if</span> <span class="n">selected</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">buckets</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">method_drifts</span><span class="p">[</span><span class="n">selected</span><span class="p">])</span>
<span class="n">buckets</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">std</span><span class="p">(</span><span class="n">method_drifts</span><span class="p">[</span><span class="n">selected</span><span class="p">])</span>
<span class="n">buckets</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span> <span class="o">=</span> <span class="n">selected</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="c1"># cancel last buckets with low density</span>
<span class="n">histogram</span> <span class="o">=</span> <span class="n">inds_histogram_global</span> <span class="o">/</span> <span class="n">inds_histogram_global</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="k">for</span> <span class="n">tail</span> <span class="ow">in</span> <span class="nb">reversed</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">histogram</span><span class="p">))):</span>
<span class="k">if</span> <span class="n">histogram</span><span class="p">[</span><span class="n">tail</span><span class="p">]</span> <span class="o">&lt;</span> <span class="n">tail_density_threshold</span><span class="p">:</span>
<span class="n">buckets</span><span class="p">[:,</span><span class="n">tail</span><span class="p">,</span><span class="mi">2</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">break</span>
<span class="n">salient_methods</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
<span class="n">best_methods</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">bucket</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">buckets</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]):</span>
<span class="n">nc</span> <span class="o">=</span> <span class="n">buckets</span><span class="p">[:,</span> <span class="n">bucket</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="k">if</span> <span class="n">nc</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">best_methods</span><span class="o">.</span><span class="n">append</span><span class="p">([])</span>
<span class="k">continue</span>
<span class="n">order</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">buckets</span><span class="p">[:,</span> <span class="n">bucket</span><span class="p">,</span> <span class="mi">0</span><span class="p">])</span>
<span class="n">rank1</span> <span class="o">=</span> <span class="n">order</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">best_bucket_methods</span> <span class="o">=</span> <span class="p">[</span><span class="n">method_order</span><span class="p">[</span><span class="n">rank1</span><span class="p">]]</span>
<span class="n">best_mean</span><span class="p">,</span> <span class="n">best_std</span><span class="p">,</span> <span class="n">best_nc</span> <span class="o">=</span> <span class="n">buckets</span><span class="p">[</span><span class="n">rank1</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="p">:]</span>
<span class="k">for</span> <span class="n">method_index</span> <span class="ow">in</span> <span class="n">order</span><span class="p">[</span><span class="mi">1</span><span class="p">:]:</span>
<span class="n">method_mean</span><span class="p">,</span> <span class="n">method_std</span><span class="p">,</span> <span class="n">method_nc</span> <span class="o">=</span> <span class="n">buckets</span><span class="p">[</span><span class="n">method_index</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="p">:]</span>
<span class="n">_</span><span class="p">,</span> <span class="n">pval</span> <span class="o">=</span> <span class="n">ttest_ind_from_stats</span><span class="p">(</span><span class="n">best_mean</span><span class="p">,</span> <span class="n">best_std</span><span class="p">,</span> <span class="n">best_nc</span><span class="p">,</span> <span class="n">method_mean</span><span class="p">,</span> <span class="n">method_std</span><span class="p">,</span> <span class="n">method_nc</span><span class="p">)</span>
<span class="k">if</span> <span class="n">pval</span> <span class="o">&gt;</span> <span class="n">ttest_alpha</span><span class="p">:</span>
<span class="n">best_bucket_methods</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">method_order</span><span class="p">[</span><span class="n">method_index</span><span class="p">])</span>
<span class="n">best_methods</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">best_bucket_methods</span><span class="p">)</span>
<span class="n">salient_methods</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">best_bucket_methods</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">best_bucket_methods</span><span class="p">)</span>
<span class="k">if</span> <span class="n">binning</span><span class="o">==</span><span class="s1">&#39;isomerous&#39;</span><span class="p">:</span>
<span class="n">fig</span><span class="p">,</span> <span class="n">axes</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">gridspec_kw</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;height_ratios&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mf">0.2</span><span class="p">,</span> <span class="mi">1</span><span class="p">]},</span> <span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">20</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">salient_methods</span><span class="p">)))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">fig</span><span class="p">,</span> <span class="n">axes</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">gridspec_kw</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;height_ratios&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]},</span> <span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">20</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">salient_methods</span><span class="p">)))</span>
<span class="n">ax</span> <span class="o">=</span> <span class="n">axes</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="n">high_from</span> <span class="o">=</span> <span class="mi">0</span>
<span class="n">yticks</span><span class="p">,</span> <span class="n">yticks_method_names</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
<span class="n">color</span> <span class="o">=</span> <span class="n">get_cmap</span><span class="p">(</span><span class="s1">&#39;Accent&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">colors</span>
<span class="n">vlines</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">bar_high</span> <span class="o">=</span> <span class="mi">1</span>
<span class="k">for</span> <span class="n">method</span> <span class="ow">in</span> <span class="p">[</span><span class="n">m</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">method_order</span> <span class="k">if</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">salient_methods</span><span class="p">]:</span>
<span class="n">broken_paths</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">path_start</span><span class="p">,</span> <span class="n">path_end</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">best_bucket_methods</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">best_methods</span><span class="p">):</span>
<span class="k">if</span> <span class="n">method</span> <span class="ow">in</span> <span class="n">best_bucket_methods</span><span class="p">:</span>
<span class="k">if</span> <span class="n">path_start</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">path_start</span> <span class="o">=</span> <span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
<span class="n">path_end</span> <span class="o">=</span> <span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">path_start</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">path_end</span> <span class="o">+=</span> <span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">path_start</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">broken_paths</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">tuple</span><span class="p">((</span><span class="n">path_start</span><span class="p">,</span> <span class="n">path_end</span><span class="p">)))</span>
<span class="n">path_start</span><span class="p">,</span> <span class="n">path_end</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">path_start</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">broken_paths</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">tuple</span><span class="p">((</span><span class="n">path_start</span><span class="p">,</span> <span class="n">path_end</span><span class="p">)))</span>
<span class="n">ax</span><span class="o">.</span><span class="n">broken_barh</span><span class="p">(</span><span class="n">broken_paths</span><span class="p">,</span> <span class="p">(</span><span class="n">high_from</span><span class="p">,</span> <span class="n">bar_high</span><span class="p">),</span> <span class="n">facecolors</span><span class="o">=</span><span class="n">color</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">yticks_method_names</span><span class="p">)])</span>
<span class="n">yticks</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">high_from</span><span class="o">+</span><span class="n">bar_high</span><span class="o">/</span><span class="mi">2</span><span class="p">)</span>
<span class="n">high_from</span> <span class="o">+=</span> <span class="n">bar_high</span>
<span class="n">yticks_method_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">method</span><span class="p">)</span>
<span class="k">for</span> <span class="n">path_start</span><span class="p">,</span> <span class="n">path_end</span> <span class="ow">in</span> <span class="n">broken_paths</span><span class="p">:</span>
<span class="n">vlines</span><span class="o">.</span><span class="n">extend</span><span class="p">([</span><span class="n">path_start</span><span class="p">,</span> <span class="n">path_start</span><span class="o">+</span><span class="n">path_end</span><span class="p">])</span>
<span class="n">vlines</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">vlines</span><span class="p">)</span>
<span class="n">vlines</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">vlines</span><span class="p">)</span>
<span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">vlines</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]:</span>
<span class="n">ax</span><span class="o">.</span><span class="n">axvline</span><span class="p">(</span><span class="n">x</span><span class="o">=</span><span class="n">v</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">,</span> <span class="n">linestyle</span><span class="o">=</span><span class="s1">&#39;--&#39;</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">high_from</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="n">vlines</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">vlines</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlabel</span><span class="p">(</span><span class="s1">&#39;Distribution shift between training set and sample&#39;</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_yticks</span><span class="p">(</span><span class="n">yticks</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_yticklabels</span><span class="p">(</span><span class="n">yticks_method_names</span><span class="p">)</span>
<span class="c1"># upper plot (explaining distribution)</span>
<span class="n">ax</span> <span class="o">=</span> <span class="n">axes</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">if</span> <span class="n">binning</span> <span class="o">==</span> <span class="s1">&#39;isometric&#39;</span><span class="p">:</span>
<span class="c1"># show the density for each region</span>
<span class="n">bins</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">=</span><span class="mi">0</span>
<span class="n">y_pos</span> <span class="o">=</span> <span class="p">[</span><span class="n">b</span><span class="o">+</span><span class="p">(</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">b</span><span class="p">)</span><span class="o">/</span><span class="mi">2</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span><span class="n">b</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">bins</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span> <span class="k">if</span> <span class="n">histogram</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">&gt;</span><span class="mi">0</span><span class="p">]</span>
<span class="n">bar_width</span> <span class="o">=</span> <span class="p">[</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]))</span> <span class="k">if</span> <span class="n">histogram</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">&gt;</span><span class="mi">0</span><span class="p">]</span>
<span class="n">ax</span><span class="o">.</span><span class="n">bar</span><span class="p">(</span><span class="n">y_pos</span><span class="p">,</span> <span class="p">[</span><span class="n">n</span> <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="n">histogram</span> <span class="k">if</span> <span class="n">n</span><span class="o">&gt;</span><span class="mi">0</span><span class="p">],</span> <span class="n">bar_width</span><span class="p">,</span> <span class="n">align</span><span class="o">=</span><span class="s1">&#39;center&#39;</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">&#39;silver&#39;</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_ylabel</span><span class="p">(</span><span class="s1">&#39;shift</span><span class="se">\n</span><span class="s1">distribution&#39;</span><span class="p">,</span> <span class="n">rotation</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">ha</span><span class="o">=</span><span class="s1">&#39;right&#39;</span><span class="p">,</span> <span class="n">va</span><span class="o">=</span><span class="s1">&#39;center&#39;</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="n">vlines</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">vlines</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
<span class="n">ax</span><span class="o">.</span><span class="n">get_xaxis</span><span class="p">()</span><span class="o">.</span><span class="n">set_visible</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">subplots_adjust</span><span class="p">(</span><span class="n">wspace</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">hspace</span><span class="o">=</span><span class="mf">0.1</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># show the percentiles of the distribution</span>
<span class="n">cumsum</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">cumsum</span><span class="p">(</span><span class="n">histogram</span><span class="p">)</span>
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">])):</span>
<span class="n">start</span><span class="p">,</span> <span class="n">width</span> <span class="o">=</span> <span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
<span class="n">ax</span><span class="o">.</span><span class="n">broken_barh</span><span class="p">([</span><span class="nb">tuple</span><span class="p">((</span><span class="n">start</span><span class="p">,</span> <span class="n">width</span><span class="p">))],</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">facecolors</span><span class="o">=</span><span class="s1">&#39;whitesmoke&#39;</span> <span class="k">if</span> <span class="n">i</span><span class="o">%</span><span class="mi">2</span><span class="o">==</span><span class="mi">0</span> <span class="k">else</span> <span class="s1">&#39;silver&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">i</span> <span class="o">&lt;</span> <span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">)</span><span class="o">-</span><span class="mi">2</span><span class="p">:</span>
<span class="n">ax</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">],</span> <span class="mf">0.5</span><span class="p">,</span> <span class="s1">&#39;$P_{&#39;</span><span class="o">+</span><span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="n">cumsum</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">*</span><span class="mi">100</span><span class="p">))</span><span class="si">}</span><span class="s1">&#39;</span><span class="o">+</span><span class="s1">&#39;}$&#39;</span><span class="p">,</span> <span class="n">ha</span><span class="o">=</span><span class="s1">&#39;center&#39;</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="n">vlines</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">vlines</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
<span class="n">ax</span><span class="o">.</span><span class="n">get_yaxis</span><span class="p">()</span><span class="o">.</span><span class="n">set_visible</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">get_xaxis</span><span class="p">()</span><span class="o">.</span><span class="n">set_visible</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">subplots_adjust</span><span class="p">(</span><span class="n">wspace</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">hspace</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_merge</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">):</span>
<span class="n">ndims</span> <span class="o">=</span> <span class="n">true_prevs</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="k">lambda</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;true&#39;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">ndims</span><span class="p">)),</span> <span class="s1">&#39;estim&#39;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">ndims</span><span class="p">))})</span>
<span class="n">method_order</span><span class="o">=</span><span class="p">[]</span>
<span class="k">for</span> <span class="n">method</span><span class="p">,</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">):</span>
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">&#39;true&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">&#39;true&#39;</span><span class="p">],</span> <span class="n">true_prev</span><span class="p">])</span>
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">&#39;estim&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">&#39;estim&#39;</span><span class="p">],</span> <span class="n">estim_prev</span><span class="p">])</span>
<span class="k">if</span> <span class="n">method</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">:</span>
<span class="n">method_order</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">method</span><span class="p">)</span>
<span class="n">true_prevs_</span> <span class="o">=</span> <span class="p">[</span><span class="n">data</span><span class="p">[</span><span class="n">m</span><span class="p">][</span><span class="s1">&#39;true&#39;</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">]</span>
<span class="n">estim_prevs_</span> <span class="o">=</span> <span class="p">[</span><span class="n">data</span><span class="p">[</span><span class="n">m</span><span class="p">][</span><span class="s1">&#39;estim&#39;</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">]</span>
<span class="k">return</span> <span class="n">method_order</span><span class="p">,</span> <span class="n">true_prevs_</span><span class="p">,</span> <span class="n">estim_prevs_</span>
<span class="k">def</span> <span class="nf">_set_colors</span><span class="p">(</span><span class="n">ax</span><span class="p">,</span> <span class="n">n_methods</span><span class="p">):</span>
<span class="n">NUM_COLORS</span> <span class="o">=</span> <span class="n">n_methods</span>
<span class="n">cm</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">get_cmap</span><span class="p">(</span><span class="s1">&#39;tab20&#39;</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">set_prop_cycle</span><span class="p">(</span><span class="n">color</span><span class="o">=</span><span class="p">[</span><span class="n">cm</span><span class="p">(</span><span class="mf">1.</span> <span class="o">*</span> <span class="n">i</span> <span class="o">/</span> <span class="n">NUM_COLORS</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">NUM_COLORS</span><span class="p">)])</span>
<span class="k">def</span> <span class="nf">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">):</span>
<span class="c1"># if savepath is specified, then saves the plot in that path; otherwise the plot is shown</span>
<span class="k">if</span> <span class="n">savepath</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">create_parent_dir</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span>
<span class="c1"># plt.tight_layout()</span>
<span class="n">plt</span><span class="o">.</span><span class="n">savefig</span><span class="p">(</span><span class="n">savepath</span><span class="p">,</span> <span class="n">bbox_inches</span><span class="o">=</span><span class="s1">&#39;tight&#39;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">_join_data_by_drift</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">,</span> <span class="n">x_error</span><span class="p">,</span> <span class="n">y_error</span><span class="p">,</span> <span class="n">method_order</span><span class="p">):</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="k">lambda</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;x&#39;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">)),</span> <span class="s1">&#39;y&#39;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">))})</span>
<span class="k">if</span> <span class="n">method_order</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">method_order</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">method</span><span class="p">,</span> <span class="n">test_prevs_i</span><span class="p">,</span> <span class="n">estim_prevs_i</span><span class="p">,</span> <span class="n">tr_prev_i</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">):</span>
<span class="n">tr_prev_i</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">repeat</span><span class="p">(</span><span class="n">tr_prev_i</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">),</span> <span class="n">repeats</span><span class="o">=</span><span class="n">test_prevs_i</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">tr_test_drifts</span> <span class="o">=</span> <span class="n">x_error</span><span class="p">(</span><span class="n">test_prevs_i</span><span class="p">,</span> <span class="n">tr_prev_i</span><span class="p">)</span>
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">&#39;x&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">&#39;x&#39;</span><span class="p">],</span> <span class="n">tr_test_drifts</span><span class="p">])</span>
<span class="n">method_drifts</span> <span class="o">=</span> <span class="n">y_error</span><span class="p">(</span><span class="n">test_prevs_i</span><span class="p">,</span> <span class="n">estim_prevs_i</span><span class="p">)</span>
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">&#39;y&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">&#39;y&#39;</span><span class="p">],</span> <span class="n">method_drifts</span><span class="p">])</span>
<span class="k">if</span> <span class="n">method</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">:</span>
<span class="n">method_order</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">method</span><span class="p">)</span>
<span class="k">return</span> <span class="n">data</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,692 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.protocol &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=22607128"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.protocol</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.protocol</h1><div class="highlight"><pre>
<span></span><span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">itertools</span>
<span class="kn">from</span> <span class="nn">contextlib</span> <span class="kn">import</span> <span class="n">ExitStack</span>
<span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABCMeta</span><span class="p">,</span> <span class="n">abstractmethod</span>
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
<span class="kn">from</span> <span class="nn">os.path</span> <span class="kn">import</span> <span class="n">exists</span>
<span class="kn">from</span> <span class="nn">glob</span> <span class="kn">import</span> <span class="n">glob</span>
<div class="viewcode-block" id="AbstractProtocol">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractProtocol">[docs]</a>
<span class="k">class</span> <span class="nc">AbstractProtocol</span><span class="p">(</span><span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Abstract parent class for sample generation protocols.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Implements the protocol. Yields one sample at a time along with its prevalence</span>
<span class="sd"> :return: yields a tuple `(sample, prev) at a time, where `sample` is a set of instances</span>
<span class="sd"> and in which `prev` is an `nd.array` with the class prevalence values</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="o">...</span>
<div class="viewcode-block" id="AbstractProtocol.total">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractProtocol.total">[docs]</a>
<span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Indicates the total number of samples that the protocol generates.</span>
<span class="sd"> :return: The number of samples to generate if known, or `None` otherwise.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="kc">None</span></div>
</div>
<div class="viewcode-block" id="IterateProtocol">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.IterateProtocol">[docs]</a>
<span class="k">class</span> <span class="nc">IterateProtocol</span><span class="p">(</span><span class="n">AbstractProtocol</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> A very simple protocol which simply iterates over a list of previously generated samples</span>
<span class="sd"> :param samples: a list of :class:`quapy.data.base.LabelledCollection`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">samples</span><span class="p">:</span> <span class="p">[</span><span class="n">LabelledCollection</span><span class="p">]):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">samples</span> <span class="o">=</span> <span class="n">samples</span>
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Yields one sample from the initial list at a time</span>
<span class="sd"> :return: yields a tuple `(sample, prev) at a time, where `sample` is a set of instances</span>
<span class="sd"> and in which `prev` is an `nd.array` with the class prevalence values</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">for</span> <span class="n">sample</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">samples</span><span class="p">:</span>
<span class="k">yield</span> <span class="n">sample</span><span class="o">.</span><span class="n">Xp</span>
<div class="viewcode-block" id="IterateProtocol.total">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.IterateProtocol.total">[docs]</a>
<span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the number of samples in this protocol</span>
<span class="sd"> :return: int</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">samples</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="AbstractStochasticSeededProtocol">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractStochasticSeededProtocol">[docs]</a>
<span class="k">class</span> <span class="nc">AbstractStochasticSeededProtocol</span><span class="p">(</span><span class="n">AbstractProtocol</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> An `AbstractStochasticSeededProtocol` is a protocol that generates, via any random procedure (e.g.,</span>
<span class="sd"> via random sampling), sequences of :class:`quapy.data.base.LabelledCollection` samples.</span>
<span class="sd"> The protocol abstraction enforces</span>
<span class="sd"> the object to be instantiated using a seed, so that the sequence can be fully replicated.</span>
<span class="sd"> In order to make this functionality possible, the classes extending this abstraction need to</span>
<span class="sd"> implement only two functions, :meth:`samples_parameters` which generates all the parameters</span>
<span class="sd"> needed for extracting the samples, and :meth:`sample` that, given some parameters as input,</span>
<span class="sd"> deterministically generates a sample.</span>
<span class="sd"> :param random_state: the seed for allowing to replicate any sequence of samples. Default is 0, meaning that</span>
<span class="sd"> the sequence will be consistent every time the protocol is called.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">_random_state</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span> <span class="c1"># means &quot;not set&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">random_state</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_random_state</span>
<span class="nd">@random_state</span><span class="o">.</span><span class="n">setter</span>
<span class="k">def</span> <span class="nf">random_state</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">random_state</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_random_state</span> <span class="o">=</span> <span class="n">random_state</span>
<div class="viewcode-block" id="AbstractStochasticSeededProtocol.samples_parameters">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractStochasticSeededProtocol.samples_parameters">[docs]</a>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> This function has to return all the necessary parameters to replicate the samples</span>
<span class="sd"> :return: a list of parameters, each of which serves to deterministically generate a sample</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="o">...</span></div>
<div class="viewcode-block" id="AbstractStochasticSeededProtocol.sample">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractStochasticSeededProtocol.sample">[docs]</a>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Extract one sample determined by the given parameters</span>
<span class="sd"> :param params: all the necessary parameters to generate a sample</span>
<span class="sd"> :return: one sample (the same sample has to be generated for the same parameters)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="o">...</span></div>
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Yields one sample at a time. The type of object returned depends on the `collator` function. The</span>
<span class="sd"> default behaviour returns tuples of the form `(sample, prevalence)`.</span>
<span class="sd"> :return: a tuple `(sample, prevalence)` if return_type=&#39;sample_prev&#39;, or an instance of</span>
<span class="sd"> :class:`qp.data.LabelledCollection` if return_type=&#39;labelled_collection&#39;</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">with</span> <span class="n">ExitStack</span><span class="p">()</span> <span class="k">as</span> <span class="n">stack</span><span class="p">:</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;The random seed has never been initialized. &#39;</span>
<span class="s1">&#39;Set it to None not to impose replicability.&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">stack</span><span class="o">.</span><span class="n">enter_context</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">))</span>
<span class="k">for</span> <span class="n">params</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">samples_parameters</span><span class="p">():</span>
<span class="k">yield</span> <span class="bp">self</span><span class="o">.</span><span class="n">collator</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">params</span><span class="p">))</span>
<div class="viewcode-block" id="AbstractStochasticSeededProtocol.collator">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractStochasticSeededProtocol.collator">[docs]</a>
<span class="k">def</span> <span class="nf">collator</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sample</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The collator prepares the sample to accommodate the desired output format before returning the output.</span>
<span class="sd"> This collator simply returns the sample as it is. Classes inheriting from this abstract class can</span>
<span class="sd"> implement their custom collators.</span>
<span class="sd"> :param sample: the sample to be returned</span>
<span class="sd"> :param args: additional arguments</span>
<span class="sd"> :return: the sample adhering to a desired output format (in this case, the sample is returned as it is)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">sample</span></div>
</div>
<div class="viewcode-block" id="OnLabelledCollectionProtocol">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.OnLabelledCollectionProtocol">[docs]</a>
<span class="k">class</span> <span class="nc">OnLabelledCollectionProtocol</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Protocols that generate samples from a :class:`qp.data.LabelledCollection` object.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">RETURN_TYPES</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;sample_prev&#39;</span><span class="p">,</span> <span class="s1">&#39;labelled_collection&#39;</span><span class="p">,</span> <span class="s1">&#39;index&#39;</span><span class="p">]</span>
<div class="viewcode-block" id="OnLabelledCollectionProtocol.get_labelled_collection">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.OnLabelledCollectionProtocol.get_labelled_collection">[docs]</a>
<span class="k">def</span> <span class="nf">get_labelled_collection</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the labelled collection on which this protocol acts.</span>
<span class="sd"> :return: an object of type :class:`qp.data.LabelledCollection`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span></div>
<div class="viewcode-block" id="OnLabelledCollectionProtocol.on_preclassified_instances">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.OnLabelledCollectionProtocol.on_preclassified_instances">[docs]</a>
<span class="k">def</span> <span class="nf">on_preclassified_instances</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pre_classifications</span><span class="p">,</span> <span class="n">in_place</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns a copy of this protocol that acts on a modified version of the original</span>
<span class="sd"> :class:`qp.data.LabelledCollection` in which the original instances have been replaced</span>
<span class="sd"> with the outputs of a classifier for each instance. (This is convenient for speeding-up</span>
<span class="sd"> the evaluation procedures for many samples, by pre-classifying the instances in advance.)</span>
<span class="sd"> :param pre_classifications: the predictions issued by a classifier, typically an array-like</span>
<span class="sd"> with shape `(n_instances,)` when the classifier is a hard one, or with shape</span>
<span class="sd"> `(n_instances, n_classes)` when the classifier is a probabilistic one.</span>
<span class="sd"> :param in_place: whether or not to apply the modification in-place or in a new copy (default).</span>
<span class="sd"> :return: a copy of this protocol</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">pre_classifications</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">),</span> \
<span class="sa">f</span><span class="s1">&#39;error: the pre-classified data has different shape &#39;</span> \
<span class="sa">f</span><span class="s1">&#39;(expected </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">)</span><span class="si">}</span><span class="s1">, found </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">pre_classifications</span><span class="p">)</span><span class="si">}</span><span class="s1">)&#39;</span>
<span class="k">if</span> <span class="n">in_place</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">pre_classifications</span>
<span class="k">return</span> <span class="bp">self</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">new</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">return</span> <span class="n">new</span><span class="o">.</span><span class="n">on_preclassified_instances</span><span class="p">(</span><span class="n">pre_classifications</span><span class="p">,</span> <span class="n">in_place</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div>
<div class="viewcode-block" id="OnLabelledCollectionProtocol.get_collator">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.OnLabelledCollectionProtocol.get_collator">[docs]</a>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">get_collator</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">return_type</span><span class="o">=</span><span class="s1">&#39;sample_prev&#39;</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns a collator function, i.e., a function that prepares the yielded data</span>
<span class="sd"> :param return_type: either &#39;sample_prev&#39; (default) if the collator is requested to yield tuples of</span>
<span class="sd"> `(sample, prevalence)`, or &#39;labelled_collection&#39; when it is requested to yield instances of</span>
<span class="sd"> :class:`qp.data.LabelledCollection`</span>
<span class="sd"> :return: the collator function (a callable function that takes as input an instance of</span>
<span class="sd"> :class:`qp.data.LabelledCollection`)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">return_type</span> <span class="ow">in</span> <span class="bp">cls</span><span class="o">.</span><span class="n">RETURN_TYPES</span><span class="p">,</span> \
<span class="sa">f</span><span class="s1">&#39;unknown return type passed as argument; valid ones are </span><span class="si">{</span><span class="bp">cls</span><span class="o">.</span><span class="n">RETURN_TYPES</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="k">if</span> <span class="n">return_type</span><span class="o">==</span><span class="s1">&#39;sample_prev&#39;</span><span class="p">:</span>
<span class="k">return</span> <span class="k">lambda</span> <span class="n">lc</span><span class="p">:</span><span class="n">lc</span><span class="o">.</span><span class="n">Xp</span>
<span class="k">elif</span> <span class="n">return_type</span><span class="o">==</span><span class="s1">&#39;labelled_collection&#39;</span><span class="p">:</span>
<span class="k">return</span> <span class="k">lambda</span> <span class="n">lc</span><span class="p">:</span><span class="n">lc</span></div>
</div>
<div class="viewcode-block" id="APP">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.APP">[docs]</a>
<span class="k">class</span> <span class="nc">APP</span><span class="p">(</span><span class="n">AbstractStochasticSeededProtocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Implementation of the artificial prevalence protocol (APP).</span>
<span class="sd"> The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g.,</span>
<span class="sd"> [0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of</span>
<span class="sd"> prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ...,</span>
<span class="sd"> [1, 0, 0] prevalence values of size `sample_size` will be yielded). The number of samples for each valid</span>
<span class="sd"> combination of prevalence values is indicated by `repeats`.</span>
<span class="sd"> :param data: a `LabelledCollection` from which the samples will be drawn</span>
<span class="sd"> :param sample_size: integer, number of instances in each sample; if None (default) then it is taken from</span>
<span class="sd"> qp.environ[&quot;SAMPLE_SIZE&quot;]. If this is not set, a ValueError exception is raised.</span>
<span class="sd"> :param n_prevalences: the number of equidistant prevalence points to extract from the [0,1] interval for the</span>
<span class="sd"> grid (default is 21)</span>
<span class="sd"> :param repeats: number of copies for each valid prevalence vector (default is 10)</span>
<span class="sd"> :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1</span>
<span class="sd"> :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples</span>
<span class="sd"> will be the same every time the protocol is called)</span>
<span class="sd"> :param sanity_check: int, raises an exception warning the user that the number of examples to be generated exceed</span>
<span class="sd"> this number; set to None for skipping this check</span>
<span class="sd"> :param return_type: set to &quot;sample_prev&quot; (default) to get the pairs of (sample, prevalence) at each iteration, or</span>
<span class="sd"> to &quot;labelled_collection&quot; to get instead instances of LabelledCollection</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">21</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
<span class="n">smooth_limits_epsilon</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">sanity_check</span><span class="o">=</span><span class="mi">10000</span><span class="p">,</span> <span class="n">return_type</span><span class="o">=</span><span class="s1">&#39;sample_prev&#39;</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">APP</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">random_state</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="n">data</span>
<span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_prevalences</span> <span class="o">=</span> <span class="n">n_prevalences</span>
<span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">=</span> <span class="n">repeats</span>
<span class="bp">self</span><span class="o">.</span><span class="n">smooth_limits_epsilon</span> <span class="o">=</span> <span class="n">smooth_limits_epsilon</span>
<span class="k">if</span> <span class="ow">not</span> <span class="p">((</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">sanity_check</span><span class="p">,</span> <span class="nb">int</span><span class="p">)</span> <span class="ow">and</span> <span class="n">sanity_check</span><span class="o">&gt;</span><span class="mi">0</span><span class="p">)</span> <span class="ow">or</span> <span class="n">sanity_check</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;param &quot;sanity_check&quot; must either be None or a positive integer&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">sanity_check</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="n">n</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">num_prevalence_combinations</span><span class="p">(</span><span class="n">n_prevpoints</span><span class="o">=</span><span class="n">n_prevalences</span><span class="p">,</span> <span class="n">n_classes</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">n_repeats</span><span class="o">=</span><span class="n">repeats</span><span class="p">)</span>
<span class="k">if</span> <span class="n">n</span> <span class="o">&gt;</span> <span class="n">sanity_check</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Abort: the number of samples that will be generated by </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s2"> (</span><span class="si">{</span><span class="n">n</span><span class="si">}</span><span class="s2">) &quot;</span>
<span class="sa">f</span><span class="s2">&quot;exceeds the maximum number of allowed samples (</span><span class="si">{</span><span class="n">sanity_check</span><span class="w"> </span><span class="si">= }</span><span class="s2">). Set &#39;sanity_check&#39; to &quot;</span>
<span class="sa">f</span><span class="s2">&quot;None, or to a higher number, for bypassing this check.&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">collator</span> <span class="o">=</span> <span class="n">OnLabelledCollectionProtocol</span><span class="o">.</span><span class="n">get_collator</span><span class="p">(</span><span class="n">return_type</span><span class="p">)</span>
<div class="viewcode-block" id="APP.prevalence_grid">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.APP.prevalence_grid">[docs]</a>
<span class="k">def</span> <span class="nf">prevalence_grid</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Generates vectors of prevalence values from an exhaustive grid of prevalence values. The</span>
<span class="sd"> number of prevalence values explored for each dimension depends on `n_prevalences`, so that, if, for example,</span>
<span class="sd"> `n_prevalences=11` then the prevalence values of the grid are taken from [0, 0.1, 0.2, ..., 0.9, 1]. Only</span>
<span class="sd"> valid prevalence distributions are returned, i.e., vectors of prevalence values that sum up to 1. For each</span>
<span class="sd"> valid vector of prevalence values, `repeat` copies are returned. The vector of prevalence values can be</span>
<span class="sd"> implicit (by setting `return_constrained_dim=False`), meaning that the last dimension (which is constrained</span>
<span class="sd"> to 1 - sum of the rest) is not returned (note that, quite obviously, in this case the vector does not sum up to</span>
<span class="sd"> 1). Note that this method is deterministic, i.e., there is no random sampling anywhere.</span>
<span class="sd"> :return: a `np.ndarray` of shape `(n, dimensions)` if `return_constrained_dim=True` or of shape</span>
<span class="sd"> `(n, dimensions-1)` if `return_constrained_dim=False`, where `n` is the number of valid combinations found</span>
<span class="sd"> in the grid multiplied by `repeat`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">dimensions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span>
<span class="n">s</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">prevalence_linspace</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n_prevalences</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">smooth_limits_epsilon</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">smooth_limits_epsilon</span><span class="p">)</span>
<span class="n">eps</span> <span class="o">=</span> <span class="p">(</span><span class="n">s</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">s</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">/</span><span class="mi">2</span> <span class="c1"># handling floating rounding</span>
<span class="n">s</span> <span class="o">=</span> <span class="p">[</span><span class="n">s</span><span class="p">]</span> <span class="o">*</span> <span class="p">(</span><span class="n">dimensions</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">prevs</span> <span class="o">=</span> <span class="p">[</span><span class="n">p</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">product</span><span class="p">(</span><span class="o">*</span><span class="n">s</span><span class="p">,</span> <span class="n">repeat</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span> <span class="k">if</span> <span class="p">(</span><span class="nb">sum</span><span class="p">(</span><span class="n">p</span><span class="p">)</span> <span class="o">&lt;</span> <span class="p">(</span><span class="mf">1.</span><span class="o">+</span><span class="n">eps</span><span class="p">))]</span>
<span class="n">prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">prevs</span><span class="p">),</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">repeat</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="k">return</span> <span class="n">prevs</span></div>
<div class="viewcode-block" id="APP.samples_parameters">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.APP.samples_parameters">[docs]</a>
<span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return all the necessary parameters to replicate the samples as according to the APP protocol.</span>
<span class="sd"> :return: a list of indexes that realize the APP sampling</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">indexes</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">prevs</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">prevalence_grid</span><span class="p">():</span>
<span class="n">index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span><span class="p">,</span> <span class="o">*</span><span class="n">prevs</span><span class="p">)</span>
<span class="n">indexes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
<span class="k">return</span> <span class="n">indexes</span></div>
<div class="viewcode-block" id="APP.sample">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.APP.sample">[docs]</a>
<span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Realizes the sample given the index of the instances.</span>
<span class="sd"> :param index: indexes of the instances to select</span>
<span class="sd"> :return: an instance of :class:`qp.data.LabelledCollection`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">index</span><span class="p">)</span></div>
<div class="viewcode-block" id="APP.total">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.APP.total">[docs]</a>
<span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the number of samples that will be generated</span>
<span class="sd"> :return: int</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">num_prevalence_combinations</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n_prevalences</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="NPP">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.NPP">[docs]</a>
<span class="k">class</span> <span class="nc">NPP</span><span class="p">(</span><span class="n">AbstractStochasticSeededProtocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> A generator of samples that implements the natural prevalence protocol (NPP). The NPP consists of drawing</span>
<span class="sd"> samples uniformly at random, therefore approximately preserving the natural prevalence of the collection.</span>
<span class="sd"> :param data: a `LabelledCollection` from which the samples will be drawn</span>
<span class="sd"> :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from</span>
<span class="sd"> qp.environ[&quot;SAMPLE_SIZE&quot;]. If this is not set, a ValueError exception is raised.</span>
<span class="sd"> :param repeats: the number of samples to generate. Default is 100.</span>
<span class="sd"> :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples</span>
<span class="sd"> will be the same every time the protocol is called)</span>
<span class="sd"> :param return_type: set to &quot;sample_prev&quot; (default) to get the pairs of (sample, prevalence) at each iteration, or</span>
<span class="sd"> to &quot;labelled_collection&quot; to get instead instances of LabelledCollection</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span><span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
<span class="n">return_type</span><span class="o">=</span><span class="s1">&#39;sample_prev&#39;</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">NPP</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">random_state</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="n">data</span>
<span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">=</span> <span class="n">repeats</span>
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
<span class="bp">self</span><span class="o">.</span><span class="n">collator</span> <span class="o">=</span> <span class="n">OnLabelledCollectionProtocol</span><span class="o">.</span><span class="n">get_collator</span><span class="p">(</span><span class="n">return_type</span><span class="p">)</span>
<div class="viewcode-block" id="NPP.samples_parameters">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.NPP.samples_parameters">[docs]</a>
<span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return all the necessary parameters to replicate the samples as according to the NPP protocol.</span>
<span class="sd"> :return: a list of indexes that realize the NPP sampling</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">indexes</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">repeats</span><span class="p">):</span>
<span class="n">index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">uniform_sampling_index</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span><span class="p">)</span>
<span class="n">indexes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
<span class="k">return</span> <span class="n">indexes</span></div>
<div class="viewcode-block" id="NPP.sample">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.NPP.sample">[docs]</a>
<span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Realizes the sample given the index of the instances.</span>
<span class="sd"> :param index: indexes of the instances to select</span>
<span class="sd"> :return: an instance of :class:`qp.data.LabelledCollection`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">index</span><span class="p">)</span></div>
<div class="viewcode-block" id="NPP.total">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.NPP.total">[docs]</a>
<span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the number of samples that will be generated (equals to &quot;repeats&quot;)</span>
<span class="sd"> :return: int</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span></div>
</div>
<div class="viewcode-block" id="UPP">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.UPP">[docs]</a>
<span class="k">class</span> <span class="nc">UPP</span><span class="p">(</span><span class="n">AbstractStochasticSeededProtocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> A variant of :class:`APP` that, instead of using a grid of equidistant prevalence values,</span>
<span class="sd"> relies on the Kraemer algorithm for sampling unit (k-1)-simplex uniformly at random, with</span>
<span class="sd"> k the number of classes. This protocol covers the entire range of prevalence values in a</span>
<span class="sd"> statistical sense, i.e., unlike APP there is no guarantee that it is covered precisely</span>
<span class="sd"> equally for all classes, but it is preferred in cases in which the number of possible</span>
<span class="sd"> combinations of the grid values of APP makes this endeavour intractable.</span>
<span class="sd"> :param data: a `LabelledCollection` from which the samples will be drawn</span>
<span class="sd"> :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from</span>
<span class="sd"> qp.environ[&quot;SAMPLE_SIZE&quot;]. If this is not set, a ValueError exception is raised.</span>
<span class="sd"> :param repeats: the number of samples to generate. Default is 100.</span>
<span class="sd"> :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples</span>
<span class="sd"> will be the same every time the protocol is called)</span>
<span class="sd"> :param return_type: set to &quot;sample_prev&quot; (default) to get the pairs of (sample, prevalence) at each iteration, or</span>
<span class="sd"> to &quot;labelled_collection&quot; to get instead instances of LabelledCollection</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
<span class="n">return_type</span><span class="o">=</span><span class="s1">&#39;sample_prev&#39;</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">UPP</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">random_state</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="n">data</span>
<span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">=</span> <span class="n">repeats</span>
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
<span class="bp">self</span><span class="o">.</span><span class="n">collator</span> <span class="o">=</span> <span class="n">OnLabelledCollectionProtocol</span><span class="o">.</span><span class="n">get_collator</span><span class="p">(</span><span class="n">return_type</span><span class="p">)</span>
<div class="viewcode-block" id="UPP.samples_parameters">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.UPP.samples_parameters">[docs]</a>
<span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return all the necessary parameters to replicate the samples as according to the UPP protocol.</span>
<span class="sd"> :return: a list of indexes that realize the UPP sampling</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">indexes</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">prevs</span> <span class="ow">in</span> <span class="n">F</span><span class="o">.</span><span class="n">uniform_simplex_sampling</span><span class="p">(</span><span class="n">n_classes</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">repeats</span><span class="p">):</span>
<span class="n">index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span><span class="p">,</span> <span class="o">*</span><span class="n">prevs</span><span class="p">)</span>
<span class="n">indexes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
<span class="k">return</span> <span class="n">indexes</span></div>
<div class="viewcode-block" id="UPP.sample">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.UPP.sample">[docs]</a>
<span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Realizes the sample given the index of the instances.</span>
<span class="sd"> :param index: indexes of the instances to select</span>
<span class="sd"> :return: an instance of :class:`qp.data.LabelledCollection`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">index</span><span class="p">)</span></div>
<div class="viewcode-block" id="UPP.total">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.UPP.total">[docs]</a>
<span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the number of samples that will be generated (equals to &quot;repeats&quot;)</span>
<span class="sd"> :return: int</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span></div>
</div>
<div class="viewcode-block" id="DomainMixer">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.DomainMixer">[docs]</a>
<span class="k">class</span> <span class="nc">DomainMixer</span><span class="p">(</span><span class="n">AbstractStochasticSeededProtocol</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence.</span>
<span class="sd"> :param domainA: one domain, an object of :class:`qp.data.LabelledCollection`</span>
<span class="sd"> :param domainB: another domain, an object of :class:`qp.data.LabelledCollection`</span>
<span class="sd"> :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from</span>
<span class="sd"> qp.environ[&quot;SAMPLE_SIZE&quot;]. If this is not set, a ValueError exception is raised.</span>
<span class="sd"> :param repeats: int, number of samples to draw for every mixture rate</span>
<span class="sd"> :param prevalence: the prevalence to preserv along the mixtures. If specified, should be an array containing</span>
<span class="sd"> one prevalence value (positive float) for each class and summing up to one. If not specified, the prevalence</span>
<span class="sd"> will be taken from the domain A (default).</span>
<span class="sd"> :param mixture_points: an integer indicating the number of points to take from a linear scale (e.g., 21 will</span>
<span class="sd"> generate the mixture points [1, 0.95, 0.9, ..., 0]), or the array of mixture values itself.</span>
<span class="sd"> the specific points</span>
<span class="sd"> :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples</span>
<span class="sd"> will be the same every time the protocol is called)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">domainA</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span>
<span class="n">domainB</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span>
<span class="n">sample_size</span><span class="p">,</span>
<span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
<span class="n">prevalence</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">mixture_points</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span>
<span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
<span class="n">return_type</span><span class="o">=</span><span class="s1">&#39;sample_prev&#39;</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">DomainMixer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">random_state</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">A</span> <span class="o">=</span> <span class="n">domainA</span>
<span class="bp">self</span><span class="o">.</span><span class="n">B</span> <span class="o">=</span> <span class="n">domainB</span>
<span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">=</span> <span class="n">repeats</span>
<span class="k">if</span> <span class="n">prevalence</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span> <span class="o">=</span> <span class="n">domainA</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prevalence</span><span class="p">)</span>
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">)</span> <span class="o">==</span> <span class="n">domainA</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> \
<span class="sa">f</span><span class="s1">&#39;wrong shape for the vector prevalence (expected </span><span class="si">{</span><span class="n">domainA</span><span class="o">.</span><span class="n">n_classes</span><span class="si">}</span><span class="s1">)&#39;</span>
<span class="k">assert</span> <span class="n">F</span><span class="o">.</span><span class="n">check_prevalence_vector</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">),</span> \
<span class="sa">f</span><span class="s1">&#39;the prevalence vector is not valid (either it contains values outside [0,1] or does not sum up to 1)&#39;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">mixture_points</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">mixture_points</span><span class="p">)[::</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">mixture_points</span><span class="p">)</span>
<span class="k">assert</span> <span class="nb">all</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span> <span class="o">&gt;=</span> <span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span><span class="o">&lt;=</span><span class="mi">1</span><span class="p">)),</span> \
<span class="s1">&#39;mixture_model datatype not understood (expected int or a sequence of real values in [0,1])&#39;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
<span class="bp">self</span><span class="o">.</span><span class="n">collator</span> <span class="o">=</span> <span class="n">OnLabelledCollectionProtocol</span><span class="o">.</span><span class="n">get_collator</span><span class="p">(</span><span class="n">return_type</span><span class="p">)</span>
<div class="viewcode-block" id="DomainMixer.samples_parameters">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.DomainMixer.samples_parameters">[docs]</a>
<span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return all the necessary parameters to replicate the samples as according to the this protocol.</span>
<span class="sd"> :return: a list of zipped indexes (from A and B) that realize the sampling</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">indexesA</span><span class="p">,</span> <span class="n">indexesB</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">propA</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span><span class="p">:</span>
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">repeats</span><span class="p">):</span>
<span class="n">nA</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">*</span> <span class="n">propA</span><span class="p">))</span>
<span class="n">nB</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span><span class="o">-</span><span class="n">nA</span>
<span class="n">sampleAidx</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">A</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="n">nA</span><span class="p">,</span> <span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">)</span>
<span class="n">sampleBidx</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">B</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="n">nB</span><span class="p">,</span> <span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">)</span>
<span class="n">indexesA</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sampleAidx</span><span class="p">)</span>
<span class="n">indexesB</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sampleBidx</span><span class="p">)</span>
<span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">indexesA</span><span class="p">,</span> <span class="n">indexesB</span><span class="p">))</span></div>
<div class="viewcode-block" id="DomainMixer.sample">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.DomainMixer.sample">[docs]</a>
<span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">indexes</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Realizes the sample given a pair of indexes of the instances from A and B.</span>
<span class="sd"> :param indexes: indexes of the instances to select from A and B</span>
<span class="sd"> :return: an instance of :class:`qp.data.LabelledCollection`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">indexesA</span><span class="p">,</span> <span class="n">indexesB</span> <span class="o">=</span> <span class="n">indexes</span>
<span class="n">sampleA</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">A</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">indexesA</span><span class="p">)</span>
<span class="n">sampleB</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">B</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">indexesB</span><span class="p">)</span>
<span class="k">return</span> <span class="n">sampleA</span><span class="o">+</span><span class="n">sampleB</span></div>
<div class="viewcode-block" id="DomainMixer.total">
<a class="viewcode-back" href="../../quapy.html#quapy.protocol.DomainMixer.total">[docs]</a>
<span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the number of samples that will be generated (equals to &quot;repeats * mixture_points&quot;)</span>
<span class="sd"> :return: int</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span><span class="p">)</span></div>
</div>
<span class="c1"># aliases</span>
<span class="n">ArtificialPrevalenceProtocol</span> <span class="o">=</span> <span class="n">APP</span>
<span class="n">NaturalPrevalenceProtocol</span> <span class="o">=</span> <span class="n">NPP</span>
<span class="n">UniformPrevalenceProtocol</span> <span class="o">=</span> <span class="n">UPP</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,110 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.tests.test_base &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.tests.test_base</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.tests.test_base</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">pytest</span>
<div class="viewcode-block" id="test_import">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_base.test_import">[docs]</a>
<span class="k">def</span> <span class="nf">test_import</span><span class="p">():</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="k">assert</span> <span class="n">qp</span><span class="o">.</span><span class="n">__version__</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,178 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.tests.test_datasets &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.tests.test_datasets</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.tests.test_datasets</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">pytest</span>
<span class="kn">from</span> <span class="nn">quapy.data.datasets</span> <span class="kn">import</span> <span class="n">REVIEWS_SENTIMENT_DATASETS</span><span class="p">,</span> <span class="n">TWITTER_SENTIMENT_DATASETS_TEST</span><span class="p">,</span> \
<span class="n">TWITTER_SENTIMENT_DATASETS_TRAIN</span><span class="p">,</span> <span class="n">UCI_BINARY_DATASETS</span><span class="p">,</span> <span class="n">LEQUA2022_TASKS</span><span class="p">,</span> <span class="n">UCI_MULTICLASS_DATASETS</span><span class="p">,</span>\
<span class="n">fetch_reviews</span><span class="p">,</span> <span class="n">fetch_twitter</span><span class="p">,</span> <span class="n">fetch_UCIBinaryDataset</span><span class="p">,</span> <span class="n">fetch_lequa2022</span><span class="p">,</span> <span class="n">fetch_UCIMulticlassLabelledCollection</span>
<div class="viewcode-block" id="test_fetch_reviews">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_datasets.test_fetch_reviews">[docs]</a>
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">,</span> <span class="n">REVIEWS_SENTIMENT_DATASETS</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">test_fetch_reviews</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">):</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="n">fetch_reviews</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Dataset </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Training set stats&#39;</span><span class="p">)</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Test set stats&#39;</span><span class="p">)</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span></div>
<div class="viewcode-block" id="test_fetch_twitter">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_datasets.test_fetch_twitter">[docs]</a>
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">,</span> <span class="n">TWITTER_SENTIMENT_DATASETS_TEST</span> <span class="o">+</span> <span class="n">TWITTER_SENTIMENT_DATASETS_TRAIN</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">test_fetch_twitter</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">):</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="n">fetch_twitter</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">ValueError</span> <span class="k">as</span> <span class="n">ve</span><span class="p">:</span>
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;semeval&#39;</span> <span class="ow">and</span> <span class="n">ve</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span>
<span class="s1">&#39;dataset &quot;semeval&quot; can only be used for model selection.&#39;</span><span class="p">):</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="n">fetch_twitter</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">for_model_selection</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Dataset </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Training set stats&#39;</span><span class="p">)</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Test set stats&#39;</span><span class="p">)</span></div>
<div class="viewcode-block" id="test_fetch_UCIDataset">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_datasets.test_fetch_UCIDataset">[docs]</a>
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">,</span> <span class="n">UCI_BINARY_DATASETS</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">test_fetch_UCIDataset</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">):</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="n">fetch_UCIBinaryDataset</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">FileNotFoundError</span> <span class="k">as</span> <span class="n">fnfe</span><span class="p">:</span>
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">&#39;pageblocks.5&#39;</span> <span class="ow">and</span> <span class="n">fnfe</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">find</span><span class="p">(</span>
<span class="s1">&#39;If this is the first time you attempt to load this dataset&#39;</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;The pageblocks.5 dataset requires some hand processing to be usable, skipping this test.&#39;</span><span class="p">)</span>
<span class="k">return</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Dataset </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Training set stats&#39;</span><span class="p">)</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Test set stats&#39;</span><span class="p">)</span></div>
<div class="viewcode-block" id="test_fetch_UCIMultiDataset">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_datasets.test_fetch_UCIMultiDataset">[docs]</a>
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">,</span> <span class="n">UCI_MULTICLASS_DATASETS</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">test_fetch_UCIMultiDataset</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">):</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="n">fetch_UCIMulticlassLabelledCollection</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Dataset </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Training set stats&#39;</span><span class="p">)</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Test set stats&#39;</span><span class="p">)</span></div>
<div class="viewcode-block" id="test_fetch_lequa2022">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_datasets.test_fetch_lequa2022">[docs]</a>
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">,</span> <span class="n">LEQUA2022_TASKS</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">test_fetch_lequa2022</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">):</span>
<span class="n">train</span><span class="p">,</span> <span class="n">gen_val</span><span class="p">,</span> <span class="n">gen_test</span> <span class="o">=</span> <span class="n">fetch_lequa2022</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">train</span><span class="o">.</span><span class="n">stats</span><span class="p">())</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Val:&#39;</span><span class="p">,</span> <span class="n">gen_val</span><span class="o">.</span><span class="n">total</span><span class="p">())</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Test:&#39;</span><span class="p">,</span> <span class="n">gen_test</span><span class="o">.</span><span class="n">total</span><span class="p">())</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,195 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.tests.test_evaluation &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.tests.test_evaluation</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.tests.test_evaluation</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
<span class="kn">from</span> <span class="nn">time</span> <span class="kn">import</span> <span class="n">time</span>
<span class="kn">from</span> <span class="nn">quapy.error</span> <span class="kn">import</span> <span class="n">QUANTIFICATION_ERROR_SINGLE</span><span class="p">,</span> <span class="n">QUANTIFICATION_ERROR</span><span class="p">,</span> <span class="n">QUANTIFICATION_ERROR_NAMES</span><span class="p">,</span> \
<span class="n">QUANTIFICATION_ERROR_SINGLE_NAMES</span>
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">EMQ</span><span class="p">,</span> <span class="n">PCC</span>
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span>
<div class="viewcode-block" id="EvalTestCase">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_evaluation.EvalTestCase">[docs]</a>
<span class="k">class</span> <span class="nc">EvalTestCase</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
<div class="viewcode-block" id="EvalTestCase.test_eval_speedup">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_evaluation.EvalTestCase.test_eval_speedup">[docs]</a>
<span class="k">def</span> <span class="nf">test_eval_speedup</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">&#39;hp&#39;</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">test</span>
<span class="n">protocol</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">protocol</span><span class="o">.</span><span class="n">APP</span><span class="p">(</span><span class="n">test</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="k">class</span> <span class="nc">SlowLR</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
<span class="kn">import</span> <span class="nn">time</span>
<span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
<span class="n">emq</span> <span class="o">=</span> <span class="n">EMQ</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">())</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">)</span>
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
<span class="n">score</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">emq</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="s1">&#39;mae&#39;</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">aggr_speedup</span><span class="o">=</span><span class="s1">&#39;force&#39;</span><span class="p">)</span>
<span class="n">tend_optim</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span><span class="o">-</span><span class="n">tinit</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;evaluation (with optimization) took </span><span class="si">{</span><span class="n">tend_optim</span><span class="si">}</span><span class="s1">s [MAE=</span><span class="si">{</span><span class="n">score</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">]&#39;</span><span class="p">)</span>
<span class="k">class</span> <span class="nc">NonAggregativeEMQ</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="bp">cls</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">emq</span> <span class="o">=</span> <span class="n">EMQ</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">emq</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">emq</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span>
<span class="n">emq</span> <span class="o">=</span> <span class="n">NonAggregativeEMQ</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">())</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">)</span>
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
<span class="n">score</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">emq</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="s1">&#39;mae&#39;</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">tend_no_optim</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">tinit</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;evaluation (w/o optimization) took </span><span class="si">{</span><span class="n">tend_no_optim</span><span class="si">}</span><span class="s1">s [MAE=</span><span class="si">{</span><span class="n">score</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">]&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">tend_no_optim</span><span class="o">&gt;</span><span class="p">(</span><span class="n">tend_optim</span><span class="o">/</span><span class="mi">2</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span></div>
<div class="viewcode-block" id="EvalTestCase.test_evaluation_output">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_evaluation.EvalTestCase.test_evaluation_output">[docs]</a>
<span class="k">def</span> <span class="nf">test_evaluation_output</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">&#39;hp&#39;</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">test</span>
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;SAMPLE_SIZE&#39;</span><span class="p">]</span><span class="o">=</span><span class="mi">100</span>
<span class="n">protocol</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">protocol</span><span class="o">.</span><span class="n">APP</span><span class="p">(</span><span class="n">test</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">q</span> <span class="o">=</span> <span class="n">PCC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">)</span>
<span class="n">single_errors</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">QUANTIFICATION_ERROR_SINGLE_NAMES</span><span class="p">)</span>
<span class="n">averaged_errors</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;m&#39;</span><span class="o">+</span><span class="n">e</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">single_errors</span><span class="p">]</span>
<span class="n">single_errors</span> <span class="o">=</span> <span class="n">single_errors</span> <span class="o">+</span> <span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">single_errors</span><span class="p">]</span>
<span class="n">averaged_errors</span> <span class="o">=</span> <span class="n">averaged_errors</span> <span class="o">+</span> <span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">averaged_errors</span><span class="p">]</span>
<span class="k">for</span> <span class="n">error_metric</span><span class="p">,</span> <span class="n">averaged_error_metric</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">single_errors</span><span class="p">,</span> <span class="n">averaged_errors</span><span class="p">):</span>
<span class="n">score</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="n">averaged_error_metric</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">score</span><span class="p">,</span> <span class="nb">float</span><span class="p">))</span>
<span class="n">scores</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="n">error_metric</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">scores</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">scores</span><span class="o">.</span><span class="n">mean</span><span class="p">(),</span> <span class="n">score</span><span class="p">)</span></div>
</div>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,143 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.tests.test_hierarchy &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.tests.test_hierarchy</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.tests.test_hierarchy</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="o">*</span>
<div class="viewcode-block" id="HierarchyTestCase">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_hierarchy.HierarchyTestCase">[docs]</a>
<span class="k">class</span> <span class="nc">HierarchyTestCase</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
<div class="viewcode-block" id="HierarchyTestCase.test_aggregative">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_hierarchy.HierarchyTestCase.test_aggregative">[docs]</a>
<span class="k">def</span> <span class="nf">test_aggregative</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">()</span>
<span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="p">[</span><span class="n">CC</span><span class="p">(</span><span class="n">lr</span><span class="p">),</span> <span class="n">PCC</span><span class="p">(</span><span class="n">lr</span><span class="p">),</span> <span class="n">ACC</span><span class="p">(</span><span class="n">lr</span><span class="p">),</span> <span class="n">PACC</span><span class="p">(</span><span class="n">lr</span><span class="p">)]:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span></div>
<div class="viewcode-block" id="HierarchyTestCase.test_binary">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_hierarchy.HierarchyTestCase.test_binary">[docs]</a>
<span class="k">def</span> <span class="nf">test_binary</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">()</span>
<span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="p">[</span><span class="n">HDy</span><span class="p">(</span><span class="n">lr</span><span class="p">)]:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">BinaryQuantifier</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span></div>
<div class="viewcode-block" id="HierarchyTestCase.test_probabilistic">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_hierarchy.HierarchyTestCase.test_probabilistic">[docs]</a>
<span class="k">def</span> <span class="nf">test_probabilistic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">()</span>
<span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="p">[</span><span class="n">CC</span><span class="p">(</span><span class="n">lr</span><span class="p">),</span> <span class="n">ACC</span><span class="p">(</span><span class="n">lr</span><span class="p">)]:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">AggregativeCrispQuantifier</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">AggregativeSoftQuantifier</span><span class="p">),</span> <span class="kc">False</span><span class="p">)</span>
<span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="p">[</span><span class="n">PCC</span><span class="p">(</span><span class="n">lr</span><span class="p">),</span> <span class="n">PACC</span><span class="p">(</span><span class="n">lr</span><span class="p">)]:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">AggregativeCrispQuantifier</span><span class="p">),</span> <span class="kc">False</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">AggregativeSoftQuantifier</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span></div>
</div>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,176 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.tests.test_labelcollection &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.tests.test_labelcollection</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.tests.test_labelcollection</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">scipy.sparse</span> <span class="kn">import</span> <span class="n">csr_matrix</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<div class="viewcode-block" id="LabelCollectionTestCase">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_labelcollection.LabelCollectionTestCase">[docs]</a>
<span class="k">class</span> <span class="nc">LabelCollectionTestCase</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
<div class="viewcode-block" id="LabelCollectionTestCase.test_split">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_labelcollection.LabelCollectionTestCase.test_split">[docs]</a>
<span class="k">def</span> <span class="nf">test_split</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">100</span><span class="p">)</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="mi">100</span><span class="p">)</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span><span class="n">y</span><span class="p">)</span>
<span class="n">tr</span><span class="p">,</span> <span class="n">te</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_random</span><span class="p">(</span><span class="mf">0.7</span><span class="p">)</span>
<span class="n">check_prev</span> <span class="o">=</span> <span class="n">tr</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span><span class="o">*</span><span class="mf">0.7</span> <span class="o">+</span> <span class="n">te</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span><span class="o">*</span><span class="mf">0.3</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">tr</span><span class="p">),</span> <span class="mi">70</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">te</span><span class="p">),</span> <span class="mi">30</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">allclose</span><span class="p">(</span><span class="n">check_prev</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()),</span> <span class="kc">True</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">tr</span><span class="o">+</span><span class="n">te</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">))</span></div>
<div class="viewcode-block" id="LabelCollectionTestCase.test_join">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_labelcollection.LabelCollectionTestCase.test_join">[docs]</a>
<span class="k">def</span> <span class="nf">test_join</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">50</span><span class="p">)</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">50</span><span class="p">)</span>
<span class="n">data1</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">200</span><span class="p">)</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">200</span><span class="p">)</span>
<span class="n">data2</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">100</span><span class="p">)</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">100</span><span class="p">)</span>
<span class="n">data3</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data1</span><span class="p">,</span> <span class="n">data2</span><span class="p">,</span> <span class="n">data3</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">combined</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">data1</span><span class="p">)</span><span class="o">+</span><span class="nb">len</span><span class="p">(</span><span class="n">data2</span><span class="p">)</span><span class="o">+</span><span class="nb">len</span><span class="p">(</span><span class="n">data3</span><span class="p">))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">all</span><span class="p">(</span><span class="n">combined</span><span class="o">.</span><span class="n">classes_</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">6</span><span class="p">)),</span> <span class="kc">True</span><span class="p">)</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
<span class="n">data4</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data1</span><span class="p">,</span> <span class="n">data2</span><span class="p">,</span> <span class="n">data3</span><span class="p">,</span> <span class="n">data4</span><span class="p">)</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">20</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">20</span><span class="p">)</span>
<span class="n">data5</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data4</span><span class="p">,</span> <span class="n">data5</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">combined</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">data4</span><span class="p">)</span><span class="o">+</span><span class="nb">len</span><span class="p">(</span><span class="n">data5</span><span class="p">))</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
<span class="n">data6</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data4</span><span class="p">,</span> <span class="n">data5</span><span class="p">,</span> <span class="n">data6</span><span class="p">)</span>
<span class="n">data4</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">csr_matrix</span><span class="p">(</span><span class="n">data4</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data4</span><span class="p">,</span> <span class="n">data5</span><span class="p">)</span>
<span class="n">data5</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">csr_matrix</span><span class="p">(</span><span class="n">data5</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data4</span><span class="p">,</span> <span class="n">data5</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">combined</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">data4</span><span class="p">)</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">data5</span><span class="p">))</span></div>
</div>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,357 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.tests.test_methods &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.tests.test_methods</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.tests.test_methods</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">pytest</span>
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
<span class="kn">from</span> <span class="nn">sklearn.svm</span> <span class="kn">import</span> <span class="n">LinearSVC</span>
<span class="kn">import</span> <span class="nn">method.aggregative</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="kn">from</span> <span class="nn">quapy.model_selection</span> <span class="kn">import</span> <span class="n">GridSearchQ</span>
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BinaryQuantifier</span>
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">LabelledCollection</span>
<span class="kn">from</span> <span class="nn">quapy.method</span> <span class="kn">import</span> <span class="n">AGGREGATIVE_METHODS</span><span class="p">,</span> <span class="n">NON_AGGREGATIVE_METHODS</span>
<span class="kn">from</span> <span class="nn">quapy.method.meta</span> <span class="kn">import</span> <span class="n">Ensemble</span>
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">APP</span>
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">DMy</span>
<span class="kn">from</span> <span class="nn">quapy.method.meta</span> <span class="kn">import</span> <span class="n">MedianEstimator</span>
<span class="c1"># datasets = [pytest.param(qp.datasets.fetch_twitter(&#39;hcr&#39;, pickle=True), id=&#39;hcr&#39;),</span>
<span class="c1"># pytest.param(qp.datasets.fetch_UCIDataset(&#39;ionosphere&#39;), id=&#39;ionosphere&#39;)]</span>
<span class="n">tinydatasets</span> <span class="o">=</span> <span class="p">[</span><span class="n">pytest</span><span class="o">.</span><span class="n">param</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_twitter</span><span class="p">(</span><span class="s1">&#39;hcr&#39;</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">reduce</span><span class="p">(),</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;tiny_hcr&#39;</span><span class="p">),</span>
<span class="n">pytest</span><span class="o">.</span><span class="n">param</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCIBinaryDataset</span><span class="p">(</span><span class="s1">&#39;ionosphere&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">reduce</span><span class="p">(),</span> <span class="nb">id</span><span class="o">=</span><span class="s1">&#39;tiny_ionosphere&#39;</span><span class="p">)]</span>
<span class="n">learners</span> <span class="o">=</span> <span class="p">[</span><span class="n">LogisticRegression</span><span class="p">,</span> <span class="n">LinearSVC</span><span class="p">]</span>
<div class="viewcode-block" id="test_aggregative_methods">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_aggregative_methods">[docs]</a>
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">&#39;dataset&#39;</span><span class="p">,</span> <span class="n">tinydatasets</span><span class="p">)</span>
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">&#39;aggregative_method&#39;</span><span class="p">,</span> <span class="n">AGGREGATIVE_METHODS</span><span class="p">)</span>
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">&#39;learner&#39;</span><span class="p">,</span> <span class="n">learners</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">test_aggregative_methods</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">aggregative_method</span><span class="p">,</span> <span class="n">learner</span><span class="p">):</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">aggregative_method</span><span class="p">(</span><span class="n">learner</span><span class="p">())</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">BinaryQuantifier</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">dataset</span><span class="o">.</span><span class="n">binary</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;skipping the test of binary model </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">model</span><span class="p">)</span><span class="si">}</span><span class="s1"> on non-binary dataset </span><span class="si">{</span><span class="n">dataset</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">return</span>
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
<span class="n">estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">estim_prevalences</span><span class="p">)</span>
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span></div>
<div class="viewcode-block" id="test_non_aggregative_methods">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_non_aggregative_methods">[docs]</a>
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">&#39;dataset&#39;</span><span class="p">,</span> <span class="n">tinydatasets</span><span class="p">)</span>
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">&#39;non_aggregative_method&#39;</span><span class="p">,</span> <span class="n">NON_AGGREGATIVE_METHODS</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">test_non_aggregative_methods</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">non_aggregative_method</span><span class="p">):</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">non_aggregative_method</span><span class="p">()</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">BinaryQuantifier</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">dataset</span><span class="o">.</span><span class="n">binary</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;skipping the test of binary model </span><span class="si">{</span><span class="n">model</span><span class="si">}</span><span class="s1"> on non-binary dataset </span><span class="si">{</span><span class="n">dataset</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">return</span>
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
<span class="n">estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">estim_prevalences</span><span class="p">)</span>
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span></div>
<div class="viewcode-block" id="test_ensemble_method">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_ensemble_method">[docs]</a>
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">&#39;base_method&#39;</span><span class="p">,</span> <span class="p">[</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">ACC</span><span class="p">,</span> <span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">PACC</span><span class="p">])</span>
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">&#39;learner&#39;</span><span class="p">,</span> <span class="p">[</span><span class="n">LogisticRegression</span><span class="p">])</span>
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">&#39;dataset&#39;</span><span class="p">,</span> <span class="n">tinydatasets</span><span class="p">)</span>
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">&#39;policy&#39;</span><span class="p">,</span> <span class="n">Ensemble</span><span class="o">.</span><span class="n">VALID_POLICIES</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">test_ensemble_method</span><span class="p">(</span><span class="n">base_method</span><span class="p">,</span> <span class="n">learner</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">policy</span><span class="p">):</span>
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;SAMPLE_SIZE&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="mi">20</span>
<span class="n">base_quantifier</span><span class="o">=</span><span class="n">base_method</span><span class="p">(</span><span class="n">learner</span><span class="p">())</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">dataset</span><span class="o">.</span><span class="n">binary</span> <span class="ow">and</span> <span class="n">policy</span><span class="o">==</span><span class="s1">&#39;ds&#39;</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;skipping the test of binary policy ds on non-binary dataset </span><span class="si">{</span><span class="n">dataset</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">return</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">Ensemble</span><span class="p">(</span><span class="n">quantifier</span><span class="o">=</span><span class="n">base_quantifier</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">policy</span><span class="o">=</span><span class="n">policy</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
<span class="n">estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">estim_prevalences</span><span class="p">)</span>
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span></div>
<div class="viewcode-block" id="test_quanet_method">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_quanet_method">[docs]</a>
<span class="k">def</span> <span class="nf">test_quanet_method</span><span class="p">():</span>
<span class="k">try</span><span class="p">:</span>
<span class="kn">import</span> <span class="nn">quapy.classification.neural</span>
<span class="k">except</span> <span class="ne">ModuleNotFoundError</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;skipping QuaNet test due to missing torch package&#39;</span><span class="p">)</span>
<span class="k">return</span>
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;SAMPLE_SIZE&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="mi">100</span>
<span class="c1"># load the kindle dataset as text, and convert words to numerical indexes</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">&#39;kindle&#39;</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="mi">200</span><span class="p">,</span> <span class="mi">200</span><span class="p">)</span>
<span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">preprocessing</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="kn">from</span> <span class="nn">quapy.classification.neural</span> <span class="kn">import</span> <span class="n">CNNnet</span>
<span class="n">cnn</span> <span class="o">=</span> <span class="n">CNNnet</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">vocabulary_size</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">n_classes</span><span class="p">)</span>
<span class="kn">from</span> <span class="nn">quapy.classification.neural</span> <span class="kn">import</span> <span class="n">NeuralClassifierTrainer</span>
<span class="n">learner</span> <span class="o">=</span> <span class="n">NeuralClassifierTrainer</span><span class="p">(</span><span class="n">cnn</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="s1">&#39;cuda&#39;</span><span class="p">)</span>
<span class="kn">from</span> <span class="nn">quapy.method.meta</span> <span class="kn">import</span> <span class="n">QuaNet</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">QuaNet</span><span class="p">(</span><span class="n">learner</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="s1">&#39;cuda&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">BinaryQuantifier</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">dataset</span><span class="o">.</span><span class="n">binary</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;skipping the test of binary model </span><span class="si">{</span><span class="n">model</span><span class="si">}</span><span class="s1"> on non-binary dataset </span><span class="si">{</span><span class="n">dataset</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">return</span>
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
<span class="n">estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">estim_prevalences</span><span class="p">)</span>
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span></div>
<div class="viewcode-block" id="test_str_label_names">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_str_label_names">[docs]</a>
<span class="k">def</span> <span class="nf">test_str_label_names</span><span class="p">():</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">CC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">&#39;imdb&#39;</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">1000</span><span class="p">,</span> <span class="o">*</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()),</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">1000</span><span class="p">,</span> <span class="mf">0.25</span><span class="p">,</span> <span class="mf">0.75</span><span class="p">))</span>
<span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">preprocessing</span><span class="o">.</span><span class="n">text2tfidf</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
<span class="n">int_estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">int_estim_prevalences</span><span class="p">)</span>
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span>
<span class="n">dataset_str</span> <span class="o">=</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span>
<span class="p">[</span><span class="s1">&#39;one&#39;</span> <span class="k">if</span> <span class="n">label</span> <span class="o">==</span> <span class="mi">1</span> <span class="k">else</span> <span class="s1">&#39;zero&#39;</span> <span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="p">]),</span>
<span class="n">LabelledCollection</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span>
<span class="p">[</span><span class="s1">&#39;one&#39;</span> <span class="k">if</span> <span class="n">label</span> <span class="o">==</span> <span class="mi">1</span> <span class="k">else</span> <span class="s1">&#39;zero&#39;</span> <span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="p">]))</span>
<span class="k">assert</span> <span class="nb">all</span><span class="p">(</span><span class="n">dataset_str</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">classes_</span> <span class="o">==</span> <span class="n">dataset_str</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">classes_</span><span class="p">),</span> <span class="s1">&#39;wrong indexation&#39;</span>
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset_str</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
<span class="n">str_estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset_str</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset_str</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">str_estim_prevalences</span><span class="p">)</span>
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span>
<span class="nb">print</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">int_estim_prevalences</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">str_estim_prevalences</span><span class="p">)</span>
<span class="n">np</span><span class="o">.</span><span class="n">testing</span><span class="o">.</span><span class="n">assert_almost_equal</span><span class="p">(</span><span class="n">int_estim_prevalences</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span>
<span class="n">str_estim_prevalences</span><span class="p">[</span><span class="nb">list</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="s1">&#39;one&#39;</span><span class="p">)])</span></div>
<span class="c1"># helper</span>
<span class="k">def</span> <span class="nf">__fit_test</span><span class="p">(</span><span class="n">quantifier</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">):</span>
<span class="n">quantifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">)</span>
<span class="n">test_samples</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">test</span><span class="p">)</span>
<span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">prediction</span><span class="p">(</span><span class="n">quantifier</span><span class="p">,</span> <span class="n">test_samples</span><span class="p">)</span>
<span class="k">return</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">),</span> <span class="n">estim_prevs</span>
<div class="viewcode-block" id="test_median_meta">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_median_meta">[docs]</a>
<span class="k">def</span> <span class="nf">test_median_meta</span><span class="p">():</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> This test compares the performance of the MedianQuantifier with respect to computing the median of the predictions</span>
<span class="sd"> of a differently parameterized quantifier. We use the DistributionMatching base quantifier and the median is</span>
<span class="sd"> computed across different values of nbins</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;SAMPLE_SIZE&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="mi">100</span>
<span class="c1"># grid of values</span>
<span class="n">nbins_grid</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">11</span><span class="p">))</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="s1">&#39;kindle&#39;</span>
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span><span class="o">.</span><span class="n">train_test</span>
<span class="n">prevs</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">errors</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">nbins</span> <span class="ow">in</span> <span class="n">nbins_grid</span><span class="p">:</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
<span class="n">q</span> <span class="o">=</span> <span class="n">DMy</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(),</span> <span class="n">nbins</span><span class="o">=</span><span class="n">nbins</span><span class="p">)</span>
<span class="n">mae</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">__fit_test</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span>
<span class="n">prevs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">estim_prevs</span><span class="p">)</span>
<span class="n">errors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">mae</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">dataset</span><span class="si">}</span><span class="s1"> DistributionMatching(nbins=</span><span class="si">{</span><span class="n">nbins</span><span class="si">}</span><span class="s1">) got MAE </span><span class="si">{</span><span class="n">mae</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span>
<span class="n">mae</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">errors</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="se">\t</span><span class="s1">MAE=</span><span class="si">{</span><span class="n">mae</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">q</span> <span class="o">=</span> <span class="n">DMy</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span>
<span class="n">q</span> <span class="o">=</span> <span class="n">MedianEstimator</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;nbins&#39;</span><span class="p">:</span> <span class="n">nbins_grid</span><span class="p">},</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
<span class="n">median_mae</span><span class="p">,</span> <span class="n">prev</span> <span class="o">=</span> <span class="n">__fit_test</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="se">\t</span><span class="s1">MAE=</span><span class="si">{</span><span class="n">median_mae</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">np</span><span class="o">.</span><span class="n">testing</span><span class="o">.</span><span class="n">assert_almost_equal</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">),</span> <span class="n">prev</span><span class="p">)</span>
<span class="k">assert</span> <span class="n">median_mae</span> <span class="o">&lt;</span> <span class="n">mae</span><span class="p">,</span> <span class="s1">&#39;the median-based quantifier provided a higher error...&#39;</span></div>
<div class="viewcode-block" id="test_median_meta_modsel">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_median_meta_modsel">[docs]</a>
<span class="k">def</span> <span class="nf">test_median_meta_modsel</span><span class="p">():</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> This test checks the median-meta quantifier with model selection</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;SAMPLE_SIZE&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="mi">100</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="s1">&#39;kindle&#39;</span>
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span><span class="o">.</span><span class="n">train_test</span>
<span class="n">train</span><span class="p">,</span> <span class="n">val</span> <span class="o">=</span> <span class="n">train</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">nbins_grid</span> <span class="o">=</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">15</span><span class="p">]</span>
<span class="n">q</span> <span class="o">=</span> <span class="n">DMy</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span>
<span class="n">q</span> <span class="o">=</span> <span class="n">MedianEstimator</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;nbins&#39;</span><span class="p">:</span> <span class="n">nbins_grid</span><span class="p">},</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
<span class="n">median_mae</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">__fit_test</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="se">\t</span><span class="s1">MAE=</span><span class="si">{</span><span class="n">median_mae</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">q</span> <span class="o">=</span> <span class="n">DMy</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span>
<span class="n">lr_params</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;classifier__C&#39;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">)}</span>
<span class="n">q</span> <span class="o">=</span> <span class="n">MedianEstimator</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;nbins&#39;</span><span class="p">:</span> <span class="n">nbins_grid</span><span class="p">},</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
<span class="n">q</span> <span class="o">=</span> <span class="n">GridSearchQ</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="n">lr_params</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">APP</span><span class="p">(</span><span class="n">val</span><span class="p">),</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
<span class="n">optimized_median_ave</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">__fit_test</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="se">\t</span><span class="s1">MAE=</span><span class="si">{</span><span class="n">optimized_median_ave</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">assert</span> <span class="n">optimized_median_ave</span> <span class="o">&lt;</span> <span class="n">median_mae</span><span class="p">,</span> <span class="s2">&quot;the optimized method yielded worse performance...&quot;</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,225 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.tests.test_modsel &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.tests.test_modsel</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.tests.test_modsel</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
<span class="kn">from</span> <span class="nn">sklearn.svm</span> <span class="kn">import</span> <span class="n">SVC</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">PACC</span>
<span class="kn">from</span> <span class="nn">quapy.model_selection</span> <span class="kn">import</span> <span class="n">GridSearchQ</span>
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">APP</span>
<span class="kn">import</span> <span class="nn">time</span>
<div class="viewcode-block" id="ModselTestCase">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_modsel.ModselTestCase">[docs]</a>
<span class="k">class</span> <span class="nc">ModselTestCase</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
<div class="viewcode-block" id="ModselTestCase.test_modsel">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_modsel.ModselTestCase.test_modsel">[docs]</a>
<span class="k">def</span> <span class="nf">test_modsel</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">q</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">5000</span><span class="p">))</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">&#39;imdb&#39;</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
<span class="n">training</span><span class="p">,</span> <span class="n">validation</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="n">param_grid</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;classifier__C&#39;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">7</span><span class="p">)}</span>
<span class="n">app</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">validation</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="n">q</span> <span class="o">=</span> <span class="n">GridSearchQ</span><span class="p">(</span>
<span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">app</span><span class="p">,</span> <span class="n">error</span><span class="o">=</span><span class="s1">&#39;mae&#39;</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span>
<span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;best params&#39;</span><span class="p">,</span> <span class="n">q</span><span class="o">.</span><span class="n">best_params_</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;best score&#39;</span><span class="p">,</span> <span class="n">q</span><span class="o">.</span><span class="n">best_score_</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">q</span><span class="o">.</span><span class="n">best_params_</span><span class="p">[</span><span class="s1">&#39;classifier__C&#39;</span><span class="p">],</span> <span class="mf">10.0</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">q</span><span class="o">.</span><span class="n">best_model</span><span class="p">()</span><span class="o">.</span><span class="n">get_params</span><span class="p">()[</span><span class="s1">&#39;classifier__C&#39;</span><span class="p">],</span> <span class="mf">10.0</span><span class="p">)</span></div>
<div class="viewcode-block" id="ModselTestCase.test_modsel_parallel">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_modsel.ModselTestCase.test_modsel_parallel">[docs]</a>
<span class="k">def</span> <span class="nf">test_modsel_parallel</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">q</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">5000</span><span class="p">))</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">&#39;imdb&#39;</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
<span class="n">training</span><span class="p">,</span> <span class="n">validation</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="c1"># test = data.test</span>
<span class="n">param_grid</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;classifier__C&#39;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">7</span><span class="p">)}</span>
<span class="n">app</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">validation</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="n">q</span> <span class="o">=</span> <span class="n">GridSearchQ</span><span class="p">(</span>
<span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">app</span><span class="p">,</span> <span class="n">error</span><span class="o">=</span><span class="s1">&#39;mae&#39;</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span>
<span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;best params&#39;</span><span class="p">,</span> <span class="n">q</span><span class="o">.</span><span class="n">best_params_</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;best score&#39;</span><span class="p">,</span> <span class="n">q</span><span class="o">.</span><span class="n">best_score_</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">q</span><span class="o">.</span><span class="n">best_params_</span><span class="p">[</span><span class="s1">&#39;classifier__C&#39;</span><span class="p">],</span> <span class="mf">10.0</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">q</span><span class="o">.</span><span class="n">best_model</span><span class="p">()</span><span class="o">.</span><span class="n">get_params</span><span class="p">()[</span><span class="s1">&#39;classifier__C&#39;</span><span class="p">],</span> <span class="mf">10.0</span><span class="p">)</span></div>
<div class="viewcode-block" id="ModselTestCase.test_modsel_parallel_speedup">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_modsel.ModselTestCase.test_modsel_parallel_speedup">[docs]</a>
<span class="k">def</span> <span class="nf">test_modsel_parallel_speedup</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">class</span> <span class="nc">SlowLR</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">sample_weight</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">sample_weight</span><span class="p">)</span>
<span class="n">q</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">5000</span><span class="p">))</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">&#39;imdb&#39;</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
<span class="n">training</span><span class="p">,</span> <span class="n">validation</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="n">param_grid</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;classifier__C&#39;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">7</span><span class="p">)}</span>
<span class="n">app</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">validation</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
<span class="n">GridSearchQ</span><span class="p">(</span>
<span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">app</span><span class="p">,</span> <span class="n">error</span><span class="o">=</span><span class="s1">&#39;mae&#39;</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span>
<span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
<span class="n">tend_nooptim</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span><span class="o">-</span><span class="n">tinit</span>
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
<span class="n">GridSearchQ</span><span class="p">(</span>
<span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">app</span><span class="p">,</span> <span class="n">error</span><span class="o">=</span><span class="s1">&#39;mae&#39;</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span>
<span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
<span class="n">tend_optim</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">tinit</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;parallel training took </span><span class="si">{</span><span class="n">tend_optim</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">s&#39;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;sequential training took </span><span class="si">{</span><span class="n">tend_nooptim</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">s&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">tend_optim</span> <span class="o">&lt;</span> <span class="p">(</span><span class="mf">0.5</span><span class="o">*</span><span class="n">tend_nooptim</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span></div>
<div class="viewcode-block" id="ModselTestCase.test_modsel_timeout">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_modsel.ModselTestCase.test_modsel_timeout">[docs]</a>
<span class="k">def</span> <span class="nf">test_modsel_timeout</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">class</span> <span class="nc">SlowLR</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">sample_weight</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="kn">import</span> <span class="nn">time</span>
<span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="nb">super</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">sample_weight</span><span class="p">)</span>
<span class="n">q</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">())</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">&#39;imdb&#39;</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
<span class="n">training</span><span class="p">,</span> <span class="n">validation</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="c1"># test = data.test</span>
<span class="n">param_grid</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;classifier__C&#39;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">7</span><span class="p">)}</span>
<span class="n">app</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">validation</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="n">q</span> <span class="o">=</span> <span class="n">GridSearchQ</span><span class="p">(</span>
<span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">app</span><span class="p">,</span> <span class="n">error</span><span class="o">=</span><span class="s1">&#39;mae&#39;</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span>
<span class="p">)</span>
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">TimeoutError</span><span class="p">):</span>
<span class="n">q</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span></div>
</div>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,336 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.tests.test_protocols &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.tests.test_protocols</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.tests.test_protocols</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">quapy.functional</span>
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">APP</span><span class="p">,</span> <span class="n">NPP</span><span class="p">,</span> <span class="n">UPP</span><span class="p">,</span> <span class="n">DomainMixer</span><span class="p">,</span> <span class="n">AbstractStochasticSeededProtocol</span>
<div class="viewcode-block" id="mock_labelled_collection">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.mock_labelled_collection">[docs]</a>
<span class="k">def</span> <span class="nf">mock_labelled_collection</span><span class="p">(</span><span class="n">prefix</span><span class="o">=</span><span class="s1">&#39;&#39;</span><span class="p">):</span>
<span class="n">y</span> <span class="o">=</span> <span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">*</span> <span class="mi">250</span> <span class="o">+</span> <span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">*</span> <span class="mi">250</span> <span class="o">+</span> <span class="p">[</span><span class="mi">2</span><span class="p">]</span> <span class="o">*</span> <span class="mi">250</span> <span class="o">+</span> <span class="p">[</span><span class="mi">3</span><span class="p">]</span> <span class="o">*</span> <span class="mi">250</span>
<span class="n">X</span> <span class="o">=</span> <span class="p">[</span><span class="n">prefix</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="o">+</span> <span class="s1">&#39;-&#39;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">yi</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">yi</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">y</span><span class="p">)]</span>
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="nb">sorted</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">)))</span></div>
<div class="viewcode-block" id="samples_to_str">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.samples_to_str">[docs]</a>
<span class="k">def</span> <span class="nf">samples_to_str</span><span class="p">(</span><span class="n">protocol</span><span class="p">):</span>
<span class="n">samples_str</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span>
<span class="k">for</span> <span class="n">instances</span><span class="p">,</span> <span class="n">prev</span> <span class="ow">in</span> <span class="n">protocol</span><span class="p">():</span>
<span class="n">samples_str</span> <span class="o">+=</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">instances</span><span class="si">}</span><span class="se">\t</span><span class="si">{</span><span class="n">prev</span><span class="si">}</span><span class="se">\n</span><span class="s1">&#39;</span>
<span class="k">return</span> <span class="n">samples_str</span></div>
<div class="viewcode-block" id="TestProtocols">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols">[docs]</a>
<span class="k">class</span> <span class="nc">TestProtocols</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
<div class="viewcode-block" id="TestProtocols.test_app_sanity_check">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_app_sanity_check">[docs]</a>
<span class="k">def</span> <span class="nf">test_app_sanity_check</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
<span class="n">n_prevpoints</span> <span class="o">=</span> <span class="mi">101</span>
<span class="n">repeats</span> <span class="o">=</span> <span class="mi">10</span>
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">RuntimeError</span><span class="p">):</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="n">n_prevpoints</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="n">repeats</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
<span class="n">n_combinations</span> <span class="o">=</span> \
<span class="n">quapy</span><span class="o">.</span><span class="n">functional</span><span class="o">.</span><span class="n">num_prevalence_combinations</span><span class="p">(</span><span class="n">n_prevpoints</span><span class="p">,</span> <span class="n">n_classes</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">n_repeats</span><span class="o">=</span><span class="n">repeats</span><span class="p">)</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="n">n_prevpoints</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">,</span> <span class="n">sanity_check</span><span class="o">=</span><span class="n">n_combinations</span><span class="p">)</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="n">n_prevpoints</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">,</span> <span class="n">sanity_check</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span></div>
<div class="viewcode-block" id="TestProtocols.test_app_replicate">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_app_replicate">[docs]</a>
<span class="k">def</span> <span class="nf">test_app_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">)</span> <span class="c1"># &lt;- random_state is by default set to 0</span>
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
<div class="viewcode-block" id="TestProtocols.test_app_not_replicate">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_app_not_replicate">[docs]</a>
<span class="k">def</span> <span class="nf">test_app_not_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
<div class="viewcode-block" id="TestProtocols.test_app_number">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_app_number">[docs]</a>
<span class="k">def</span> <span class="nf">test_app_number</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="c1"># surprisingly enough, for some n_prevalences the test fails, notwithstanding</span>
<span class="c1"># everything is correct. The problem is that in function APP.prevalence_grid()</span>
<span class="c1"># there is sometimes one rounding error that gets cumulated and</span>
<span class="c1"># surpasses 1.0 (by a very small float value, 0.0000000000002 or sthe like)</span>
<span class="c1"># so these tuples are mistakenly removed... I have tried with np.close, and</span>
<span class="c1"># other workarounds, but eventually happens that there is some negative probability</span>
<span class="c1"># in the sampling function...</span>
<span class="n">count</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="n">p</span><span class="p">():</span>
<span class="n">count</span><span class="o">+=</span><span class="mi">1</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">count</span><span class="p">,</span> <span class="n">p</span><span class="o">.</span><span class="n">total</span><span class="p">())</span></div>
<div class="viewcode-block" id="TestProtocols.test_npp_replicate">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_npp_replicate">[docs]</a>
<span class="k">def</span> <span class="nf">test_npp_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">NPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">NPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span> <span class="c1"># &lt;- random_state is by default set to 0</span>
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
<div class="viewcode-block" id="TestProtocols.test_npp_not_replicate">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_npp_not_replicate">[docs]</a>
<span class="k">def</span> <span class="nf">test_npp_not_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">NPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">NPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">NPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
<div class="viewcode-block" id="TestProtocols.test_kraemer_replicate">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_kraemer_replicate">[docs]</a>
<span class="k">def</span> <span class="nf">test_kraemer_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">UPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">UPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span> <span class="c1"># &lt;- random_state is by default set to 0</span>
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
<div class="viewcode-block" id="TestProtocols.test_kraemer_not_replicate">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_kraemer_not_replicate">[docs]</a>
<span class="k">def</span> <span class="nf">test_kraemer_not_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">UPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
<div class="viewcode-block" id="TestProtocols.test_covariate_shift_replicate">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_covariate_shift_replicate">[docs]</a>
<span class="k">def</span> <span class="nf">test_covariate_shift_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">dataA</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">(</span><span class="s1">&#39;domA&#39;</span><span class="p">)</span>
<span class="n">dataB</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">(</span><span class="s1">&#39;domB&#39;</span><span class="p">)</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">DomainMixer</span><span class="p">(</span><span class="n">dataA</span><span class="p">,</span> <span class="n">dataB</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">mixture_points</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">DomainMixer</span><span class="p">(</span><span class="n">dataA</span><span class="p">,</span> <span class="n">dataB</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">mixture_points</span><span class="o">=</span><span class="mi">11</span><span class="p">)</span> <span class="c1"># &lt;- random_state is by default set to 0</span>
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
<div class="viewcode-block" id="TestProtocols.test_covariate_shift_not_replicate">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_covariate_shift_not_replicate">[docs]</a>
<span class="k">def</span> <span class="nf">test_covariate_shift_not_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">dataA</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">(</span><span class="s1">&#39;domA&#39;</span><span class="p">)</span>
<span class="n">dataB</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">(</span><span class="s1">&#39;domB&#39;</span><span class="p">)</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">DomainMixer</span><span class="p">(</span><span class="n">dataA</span><span class="p">,</span> <span class="n">dataB</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">mixture_points</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
<div class="viewcode-block" id="TestProtocols.test_no_seed_init">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_no_seed_init">[docs]</a>
<span class="k">def</span> <span class="nf">test_no_seed_init</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">class</span> <span class="nc">NoSeedInit</span><span class="p">(</span><span class="n">AbstractStochasticSeededProtocol</span><span class="p">):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="c1"># return a matrix containing sampling indexes in the rows</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">),</span> <span class="mi">10</span><span class="o">*</span><span class="mi">10</span><span class="p">)</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
<span class="n">index</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">NoSeedInit</span><span class="p">()</span>
<span class="c1"># this should raise a ValueError, since the class is said to be AbstractStochasticSeededProtocol but the</span>
<span class="c1"># random_seed has never been passed to super(NoSeedInit, self).__init__(random_seed)</span>
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">ValueError</span><span class="p">):</span>
<span class="k">for</span> <span class="n">sample</span> <span class="ow">in</span> <span class="n">p</span><span class="p">():</span>
<span class="k">pass</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;done&#39;</span><span class="p">)</span></div>
</div>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,225 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.tests.test_replicability &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.tests.test_replicability</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.tests.test_replicability</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
<span class="kn">from</span> <span class="nn">quapy.functional</span> <span class="kn">import</span> <span class="n">strprev</span>
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">PACC</span>
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
<div class="viewcode-block" id="MyTestCase">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_replicability.MyTestCase">[docs]</a>
<span class="k">class</span> <span class="nc">MyTestCase</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
<div class="viewcode-block" id="MyTestCase.test_prediction_replicability">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_replicability.MyTestCase.test_prediction_replicability">[docs]</a>
<span class="k">def</span> <span class="nf">test_prediction_replicability</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCIBinaryDataset</span><span class="p">(</span><span class="s1">&#39;yeast&#39;</span><span class="p">)</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
<span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">10000</span><span class="p">)</span>
<span class="n">pacc</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">lr</span><span class="p">)</span>
<span class="n">prev</span> <span class="o">=</span> <span class="n">pacc</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">X</span><span class="p">)</span>
<span class="n">str_prev1</span> <span class="o">=</span> <span class="n">strprev</span><span class="p">(</span><span class="n">prev</span><span class="p">,</span> <span class="n">prec</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
<span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">10000</span><span class="p">)</span>
<span class="n">pacc</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">lr</span><span class="p">)</span>
<span class="n">prev2</span> <span class="o">=</span> <span class="n">pacc</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">X</span><span class="p">)</span>
<span class="n">str_prev2</span> <span class="o">=</span> <span class="n">strprev</span><span class="p">(</span><span class="n">prev2</span><span class="p">,</span> <span class="n">prec</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">str_prev1</span><span class="p">,</span> <span class="n">str_prev2</span><span class="p">)</span> <span class="c1"># add assertion here</span></div>
<div class="viewcode-block" id="MyTestCase.test_samping_replicability">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_replicability.MyTestCase.test_samping_replicability">[docs]</a>
<span class="k">def</span> <span class="nf">test_samping_replicability</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">equal_collections</span><span class="p">(</span><span class="n">c1</span><span class="p">,</span> <span class="n">c2</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">c1</span><span class="o">.</span><span class="n">Xtr</span> <span class="o">==</span> <span class="n">c2</span><span class="o">.</span><span class="n">Xtr</span><span class="p">),</span> <span class="n">value</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">c1</span><span class="o">.</span><span class="n">ytr</span> <span class="o">==</span> <span class="n">c2</span><span class="o">.</span><span class="n">ytr</span><span class="p">),</span> <span class="n">value</span><span class="p">)</span>
<span class="k">if</span> <span class="n">value</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">c1</span><span class="o">.</span><span class="n">classes_</span> <span class="o">==</span> <span class="n">c2</span><span class="o">.</span><span class="n">classes_</span><span class="p">),</span> <span class="n">value</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="nb">range</span><span class="p">(</span><span class="mi">100</span><span class="p">)))</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">100</span><span class="p">)</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">instances</span><span class="o">=</span><span class="n">X</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="n">y</span><span class="p">)</span>
<span class="n">sample1</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">)</span>
<span class="n">sample2</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">)</span>
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1</span><span class="p">,</span> <span class="n">sample2</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
<span class="n">sample1</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">sample2</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1</span><span class="p">,</span> <span class="n">sample2</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
<span class="n">sample1</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">],</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">sample2</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">],</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1</span><span class="p">,</span> <span class="n">sample2</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
<span class="n">sample1</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">])</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
<span class="n">sample2</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">])</span>
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1</span><span class="p">,</span> <span class="n">sample2</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
<span class="n">sample1</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">],</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">sample2</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">],</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1</span><span class="p">,</span> <span class="n">sample2</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
<span class="n">sample1_tr</span><span class="p">,</span> <span class="n">sample1_te</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">sample2_tr</span><span class="p">,</span> <span class="n">sample2_te</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1_tr</span><span class="p">,</span> <span class="n">sample2_tr</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1_te</span><span class="p">,</span> <span class="n">sample2_te</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
<span class="n">sample1_tr</span><span class="p">,</span> <span class="n">sample1_te</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="mf">0.7</span><span class="p">)</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
<span class="n">sample2_tr</span><span class="p">,</span> <span class="n">sample2_te</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="mf">0.7</span><span class="p">)</span>
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1_tr</span><span class="p">,</span> <span class="n">sample2_tr</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1_te</span><span class="p">,</span> <span class="n">sample2_te</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span></div>
<div class="viewcode-block" id="MyTestCase.test_parallel_replicability">
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_replicability.MyTestCase.test_parallel_replicability">[docs]</a>
<span class="k">def</span> <span class="nf">test_parallel_replicability</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCIMulticlassDataset</span><span class="p">(</span><span class="s1">&#39;dry-bean&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">train_test</span>
<span class="n">test</span> <span class="o">=</span> <span class="n">test</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">500</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.2</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">])</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span>
<span class="n">pacc</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(),</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<span class="n">pacc</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span>
<span class="n">prev1</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">pacc</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">))</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
<span class="n">pacc</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(),</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<span class="n">pacc</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span>
<span class="n">prev2</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">pacc</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">))</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
<span class="n">pacc</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(),</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<span class="n">pacc</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span>
<span class="n">prev3</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">pacc</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">))</span>
<span class="nb">print</span><span class="p">(</span><span class="n">prev1</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">prev2</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">prev3</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">prev1</span><span class="p">,</span> <span class="n">prev2</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">prev2</span><span class="p">,</span> <span class="n">prev3</span><span class="p">)</span></div>
</div>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,437 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.util &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=22607128"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.util</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.util</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">contextlib</span>
<span class="kn">import</span> <span class="nn">itertools</span>
<span class="kn">import</span> <span class="nn">multiprocessing</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">pickle</span>
<span class="kn">import</span> <span class="nn">urllib</span>
<span class="kn">from</span> <span class="nn">pathlib</span> <span class="kn">import</span> <span class="n">Path</span>
<span class="kn">from</span> <span class="nn">contextlib</span> <span class="kn">import</span> <span class="n">ExitStack</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">joblib</span> <span class="kn">import</span> <span class="n">Parallel</span><span class="p">,</span> <span class="n">delayed</span>
<span class="kn">from</span> <span class="nn">time</span> <span class="kn">import</span> <span class="n">time</span>
<span class="kn">import</span> <span class="nn">signal</span>
<span class="k">def</span> <span class="nf">_get_parallel_slices</span><span class="p">(</span><span class="n">n_tasks</span><span class="p">,</span> <span class="n">n_jobs</span><span class="p">):</span>
<span class="k">if</span> <span class="n">n_jobs</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">:</span>
<span class="n">n_jobs</span> <span class="o">=</span> <span class="n">multiprocessing</span><span class="o">.</span><span class="n">cpu_count</span><span class="p">()</span>
<span class="n">batch</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">n_tasks</span> <span class="o">/</span> <span class="n">n_jobs</span><span class="p">)</span>
<span class="n">remainder</span> <span class="o">=</span> <span class="n">n_tasks</span> <span class="o">%</span> <span class="n">n_jobs</span>
<span class="k">return</span> <span class="p">[</span><span class="nb">slice</span><span class="p">(</span><span class="n">job</span> <span class="o">*</span> <span class="n">batch</span><span class="p">,</span> <span class="p">(</span><span class="n">job</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">batch</span> <span class="o">+</span> <span class="p">(</span><span class="n">remainder</span> <span class="k">if</span> <span class="n">job</span> <span class="o">==</span> <span class="n">n_jobs</span> <span class="o">-</span> <span class="mi">1</span> <span class="k">else</span> <span class="mi">0</span><span class="p">))</span> <span class="k">for</span> <span class="n">job</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)]</span>
<div class="viewcode-block" id="map_parallel">
<a class="viewcode-back" href="../../quapy.html#quapy.util.map_parallel">[docs]</a>
<span class="k">def</span> <span class="nf">map_parallel</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="n">args</span><span class="p">,</span> <span class="n">n_jobs</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Applies func to n_jobs slices of args. E.g., if args is an array of 99 items and n_jobs=2, then</span>
<span class="sd"> func is applied in two parallel processes to args[0:50] and to args[50:99]. func is a function</span>
<span class="sd"> that already works with a list of arguments.</span>
<span class="sd"> :param func: function to be parallelized</span>
<span class="sd"> :param args: array-like of arguments to be passed to the function in different parallel calls</span>
<span class="sd"> :param n_jobs: the number of workers</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">args</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
<span class="n">slices</span> <span class="o">=</span> <span class="n">_get_parallel_slices</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">args</span><span class="p">),</span> <span class="n">n_jobs</span><span class="p">)</span>
<span class="n">results</span> <span class="o">=</span> <span class="n">Parallel</span><span class="p">(</span><span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">)(</span>
<span class="n">delayed</span><span class="p">(</span><span class="n">func</span><span class="p">)(</span><span class="n">args</span><span class="p">[</span><span class="n">slice_i</span><span class="p">])</span> <span class="k">for</span> <span class="n">slice_i</span> <span class="ow">in</span> <span class="n">slices</span>
<span class="p">)</span>
<span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="n">itertools</span><span class="o">.</span><span class="n">chain</span><span class="o">.</span><span class="n">from_iterable</span><span class="p">(</span><span class="n">results</span><span class="p">))</span></div>
<div class="viewcode-block" id="parallel">
<a class="viewcode-back" href="../../quapy.html#quapy.util.parallel">[docs]</a>
<span class="k">def</span> <span class="nf">parallel</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="n">args</span><span class="p">,</span> <span class="n">n_jobs</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">asarray</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">backend</span><span class="o">=</span><span class="s1">&#39;loky&#39;</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> A wrapper of multiprocessing:</span>
<span class="sd"> &gt;&gt;&gt; Parallel(n_jobs=n_jobs)(</span>
<span class="sd"> &gt;&gt;&gt; delayed(func)(args_i) for args_i in args</span>
<span class="sd"> &gt;&gt;&gt; )</span>
<span class="sd"> that takes the `quapy.environ` variable as input silently.</span>
<span class="sd"> Seeds the child processes to ensure reproducibility when n_jobs&gt;1.</span>
<span class="sd"> :param func: callable</span>
<span class="sd"> :param args: args of func</span>
<span class="sd"> :param seed: the numeric seed</span>
<span class="sd"> :param asarray: set to True to return a np.ndarray instead of a list</span>
<span class="sd"> :param backend: indicates the backend used for handling parallel works</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">func_dec</span><span class="p">(</span><span class="n">environ</span><span class="p">,</span> <span class="n">seed</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">):</span>
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span> <span class="o">=</span> <span class="n">environ</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;N_JOBS&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="mi">1</span>
<span class="c1">#set a context with a temporal seed to ensure results are reproducibles in parallel</span>
<span class="k">with</span> <span class="n">ExitStack</span><span class="p">()</span> <span class="k">as</span> <span class="n">stack</span><span class="p">:</span>
<span class="k">if</span> <span class="n">seed</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">stack</span><span class="o">.</span><span class="n">enter_context</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="n">seed</span><span class="p">))</span>
<span class="k">return</span> <span class="n">func</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span>
<span class="n">out</span> <span class="o">=</span> <span class="n">Parallel</span><span class="p">(</span><span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">backend</span><span class="o">=</span><span class="n">backend</span><span class="p">)(</span>
<span class="n">delayed</span><span class="p">(</span><span class="n">func_dec</span><span class="p">)(</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">,</span> <span class="kc">None</span> <span class="k">if</span> <span class="n">seed</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">seed</span><span class="o">+</span><span class="n">i</span><span class="p">,</span> <span class="n">args_i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">args_i</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">asarray</span><span class="p">:</span>
<span class="n">out</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
<span class="k">return</span> <span class="n">out</span></div>
<div class="viewcode-block" id="temp_seed">
<a class="viewcode-back" href="../../quapy.html#quapy.util.temp_seed">[docs]</a>
<span class="nd">@contextlib</span><span class="o">.</span><span class="n">contextmanager</span>
<span class="k">def</span> <span class="nf">temp_seed</span><span class="p">(</span><span class="n">random_state</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Can be used in a &quot;with&quot; context to set a temporal seed without modifying the outer numpy&#39;s current state. E.g.:</span>
<span class="sd"> &gt;&gt;&gt; with temp_seed(random_seed):</span>
<span class="sd"> &gt;&gt;&gt; pass # do any computation depending on np.random functionality</span>
<span class="sd"> :param random_state: the seed to set within the &quot;with&quot; context</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">random_state</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">state</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">get_state</span><span class="p">()</span>
<span class="c1">#save the seed just in case is needed (for instance for setting the seed to child processes)</span>
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;_R_SEED&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">random_state</span>
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="n">random_state</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">yield</span>
<span class="k">finally</span><span class="p">:</span>
<span class="k">if</span> <span class="n">random_state</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">set_state</span><span class="p">(</span><span class="n">state</span><span class="p">)</span></div>
<div class="viewcode-block" id="download_file">
<a class="viewcode-back" href="../../quapy.html#quapy.util.download_file">[docs]</a>
<span class="k">def</span> <span class="nf">download_file</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">archive_filename</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Downloads a file from a url</span>
<span class="sd"> :param url: the url</span>
<span class="sd"> :param archive_filename: destination filename</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">progress</span><span class="p">(</span><span class="n">blocknum</span><span class="p">,</span> <span class="n">bs</span><span class="p">,</span> <span class="n">size</span><span class="p">):</span>
<span class="n">total_sz_mb</span> <span class="o">=</span> <span class="s1">&#39;</span><span class="si">%.2f</span><span class="s1"> MB&#39;</span> <span class="o">%</span> <span class="p">(</span><span class="n">size</span> <span class="o">/</span> <span class="mf">1e6</span><span class="p">)</span>
<span class="n">current_sz_mb</span> <span class="o">=</span> <span class="s1">&#39;</span><span class="si">%.2f</span><span class="s1"> MB&#39;</span> <span class="o">%</span> <span class="p">((</span><span class="n">blocknum</span> <span class="o">*</span> <span class="n">bs</span><span class="p">)</span> <span class="o">/</span> <span class="mf">1e6</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;</span><span class="se">\r</span><span class="s1">downloaded </span><span class="si">%s</span><span class="s1"> / </span><span class="si">%s</span><span class="s1">&#39;</span> <span class="o">%</span> <span class="p">(</span><span class="n">current_sz_mb</span><span class="p">,</span> <span class="n">total_sz_mb</span><span class="p">),</span> <span class="n">end</span><span class="o">=</span><span class="s1">&#39;&#39;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Downloading </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">url</span><span class="p">)</span>
<span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">urlretrieve</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">filename</span><span class="o">=</span><span class="n">archive_filename</span><span class="p">,</span> <span class="n">reporthook</span><span class="o">=</span><span class="n">progress</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="download_file_if_not_exists">
<a class="viewcode-back" href="../../quapy.html#quapy.util.download_file_if_not_exists">[docs]</a>
<span class="k">def</span> <span class="nf">download_file_if_not_exists</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">archive_filename</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Dowloads a function (using :meth:`download_file`) if the file does not exist.</span>
<span class="sd"> :param url: the url</span>
<span class="sd"> :param archive_filename: destination filename</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">archive_filename</span><span class="p">):</span>
<span class="k">return</span>
<span class="n">create_if_not_exist</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="n">archive_filename</span><span class="p">))</span>
<span class="n">download_file</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">archive_filename</span><span class="p">)</span></div>
<div class="viewcode-block" id="create_if_not_exist">
<a class="viewcode-back" href="../../quapy.html#quapy.util.create_if_not_exist">[docs]</a>
<span class="k">def</span> <span class="nf">create_if_not_exist</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> An alias to `os.makedirs(path, exist_ok=True)` that also returns the path. This is useful in cases like, e.g.:</span>
<span class="sd"> &gt;&gt;&gt; path = create_if_not_exist(os.path.join(dir, subdir, anotherdir))</span>
<span class="sd"> :param path: path to create</span>
<span class="sd"> :return: the path itself</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">return</span> <span class="n">path</span></div>
<div class="viewcode-block" id="get_quapy_home">
<a class="viewcode-back" href="../../quapy.html#quapy.util.get_quapy_home">[docs]</a>
<span class="k">def</span> <span class="nf">get_quapy_home</span><span class="p">():</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Gets the home directory of QuaPy, i.e., the directory where QuaPy saves permanent data, such as dowloaded datasets.</span>
<span class="sd"> This directory is `~/quapy_data`</span>
<span class="sd"> :return: a string representing the path</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">home</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">Path</span><span class="o">.</span><span class="n">home</span><span class="p">()),</span> <span class="s1">&#39;quapy_data&#39;</span><span class="p">)</span>
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">home</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">return</span> <span class="n">home</span></div>
<div class="viewcode-block" id="create_parent_dir">
<a class="viewcode-back" href="../../quapy.html#quapy.util.create_parent_dir">[docs]</a>
<span class="k">def</span> <span class="nf">create_parent_dir</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Creates the parent dir (if any) of a given path, if not exists. E.g., for `./path/to/file.txt`, the path `./path/to`</span>
<span class="sd"> is created.</span>
<span class="sd"> :param path: the path</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">parentdir</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="n">path</span><span class="p">)</span><span class="o">.</span><span class="n">parent</span>
<span class="k">if</span> <span class="n">parentdir</span><span class="p">:</span>
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">parentdir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div>
<div class="viewcode-block" id="save_text_file">
<a class="viewcode-back" href="../../quapy.html#quapy.util.save_text_file">[docs]</a>
<span class="k">def</span> <span class="nf">save_text_file</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">text</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Saves a text file to disk, given its full path, and creates the parent directory if missing.</span>
<span class="sd"> :param path: path where to save the path.</span>
<span class="sd"> :param text: text to save.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">create_parent_dir</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="s1">&#39;wt&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">fout</span><span class="p">:</span>
<span class="n">fout</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">text</span><span class="p">)</span></div>
<div class="viewcode-block" id="pickled_resource">
<a class="viewcode-back" href="../../quapy.html#quapy.util.pickled_resource">[docs]</a>
<span class="k">def</span> <span class="nf">pickled_resource</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">:</span><span class="nb">str</span><span class="p">,</span> <span class="n">generation_func</span><span class="p">:</span><span class="n">callable</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Allows for fast reuse of resources that are generated only once by calling generation_func(\\*args). The next times</span>
<span class="sd"> this function is invoked, it loads the pickled resource. Example:</span>
<span class="sd"> &gt;&gt;&gt; def some_array(n): # a mock resource created with one parameter (`n`)</span>
<span class="sd"> &gt;&gt;&gt; return np.random.rand(n)</span>
<span class="sd"> &gt;&gt;&gt; pickled_resource(&#39;./my_array.pkl&#39;, some_array, 10) # the resource does not exist: it is created by calling some_array(10)</span>
<span class="sd"> &gt;&gt;&gt; pickled_resource(&#39;./my_array.pkl&#39;, some_array, 10) # the resource exists; it is loaded from &#39;./my_array.pkl&#39;</span>
<span class="sd"> :param pickle_path: the path where to save (first time) and load (next times) the resource</span>
<span class="sd"> :param generation_func: the function that generates the resource, in case it does not exist in pickle_path</span>
<span class="sd"> :param args: any arg that generation_func uses for generating the resources</span>
<span class="sd"> :return: the resource</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">pickle_path</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="n">generation_func</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">):</span>
<span class="k">return</span> <span class="n">pickle</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">,</span> <span class="s1">&#39;rb&#39;</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">instance</span> <span class="o">=</span> <span class="n">generation_func</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span>
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">Path</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">)</span><span class="o">.</span><span class="n">parent</span><span class="p">),</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">pickle</span><span class="o">.</span><span class="n">dump</span><span class="p">(</span><span class="n">instance</span><span class="p">,</span> <span class="nb">open</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">,</span> <span class="s1">&#39;wb&#39;</span><span class="p">),</span> <span class="n">pickle</span><span class="o">.</span><span class="n">HIGHEST_PROTOCOL</span><span class="p">)</span>
<span class="k">return</span> <span class="n">instance</span></div>
<span class="k">def</span> <span class="nf">_check_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">):</span>
<span class="k">if</span> <span class="n">sample_size</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">assert</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;SAMPLE_SIZE&#39;</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">,</span> \
<span class="s1">&#39;error: sample_size set to None, and cannot be resolved from the environment&#39;</span>
<span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;SAMPLE_SIZE&#39;</span><span class="p">]</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">sample_size</span><span class="p">,</span> <span class="nb">int</span><span class="p">)</span> <span class="ow">and</span> <span class="n">sample_size</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> \
<span class="s1">&#39;error: sample_size is not a positive integer&#39;</span>
<span class="k">return</span> <span class="n">sample_size</span>
<div class="viewcode-block" id="EarlyStop">
<a class="viewcode-back" href="../../quapy.html#quapy.util.EarlyStop">[docs]</a>
<span class="k">class</span> <span class="nc">EarlyStop</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> A class implementing the early-stopping condition typically used for training neural networks.</span>
<span class="sd"> &gt;&gt;&gt; earlystop = EarlyStop(patience=2, lower_is_better=True)</span>
<span class="sd"> &gt;&gt;&gt; earlystop(0.9, epoch=0)</span>
<span class="sd"> &gt;&gt;&gt; earlystop(0.7, epoch=1)</span>
<span class="sd"> &gt;&gt;&gt; earlystop.IMPROVED # is True</span>
<span class="sd"> &gt;&gt;&gt; earlystop(1.0, epoch=2)</span>
<span class="sd"> &gt;&gt;&gt; earlystop.STOP # is False (patience=1)</span>
<span class="sd"> &gt;&gt;&gt; earlystop(1.0, epoch=3)</span>
<span class="sd"> &gt;&gt;&gt; earlystop.STOP # is True (patience=0)</span>
<span class="sd"> &gt;&gt;&gt; earlystop.best_epoch # is 1</span>
<span class="sd"> &gt;&gt;&gt; earlystop.best_score # is 0.7</span>
<span class="sd"> :param patience: the number of (consecutive) times that a monitored evaluation metric (typically obtaind in a</span>
<span class="sd"> held-out validation split) can be found to be worse than the best one obtained so far, before flagging the</span>
<span class="sd"> stopping condition. An instance of this class is `callable`, and is to be used as follows:</span>
<span class="sd"> :param lower_is_better: if True (default) the metric is to be minimized.</span>
<span class="sd"> :ivar best_score: keeps track of the best value seen so far</span>
<span class="sd"> :ivar best_epoch: keeps track of the epoch in which the best score was set</span>
<span class="sd"> :ivar STOP: flag (boolean) indicating the stopping condition</span>
<span class="sd"> :ivar IMPROVED: flag (boolean) indicating whether there was an improvement in the last call</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">patience</span><span class="p">,</span> <span class="n">lower_is_better</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">PATIENCE_LIMIT</span> <span class="o">=</span> <span class="n">patience</span>
<span class="bp">self</span><span class="o">.</span><span class="n">better</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">a</span><span class="p">,</span><span class="n">b</span><span class="p">:</span> <span class="n">a</span><span class="o">&lt;</span><span class="n">b</span> <span class="k">if</span> <span class="n">lower_is_better</span> <span class="k">else</span> <span class="n">a</span><span class="o">&gt;</span><span class="n">b</span>
<span class="bp">self</span><span class="o">.</span><span class="n">patience</span> <span class="o">=</span> <span class="n">patience</span>
<span class="bp">self</span><span class="o">.</span><span class="n">best_score</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">best_epoch</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">STOP</span> <span class="o">=</span> <span class="kc">False</span>
<span class="bp">self</span><span class="o">.</span><span class="n">IMPROVED</span> <span class="o">=</span> <span class="kc">False</span>
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">watch_score</span><span class="p">,</span> <span class="n">epoch</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Commits the new score found in epoch `epoch`. If the score improves over the best score found so far, then</span>
<span class="sd"> the patiente counter gets reset. If otherwise, the patience counter is decreased, and in case it reachs 0,</span>
<span class="sd"> the flag STOP becomes True.</span>
<span class="sd"> :param watch_score: the new score</span>
<span class="sd"> :param epoch: the current epoch</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">IMPROVED</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">best_score</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">better</span><span class="p">(</span><span class="n">watch_score</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_score</span><span class="p">))</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">IMPROVED</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">best_score</span> <span class="o">=</span> <span class="n">watch_score</span>
<span class="bp">self</span><span class="o">.</span><span class="n">best_epoch</span> <span class="o">=</span> <span class="n">epoch</span>
<span class="bp">self</span><span class="o">.</span><span class="n">patience</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">PATIENCE_LIMIT</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">patience</span> <span class="o">-=</span> <span class="mi">1</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">patience</span> <span class="o">&lt;=</span> <span class="mi">0</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">STOP</span> <span class="o">=</span> <span class="kc">True</span></div>
<div class="viewcode-block" id="timeout">
<a class="viewcode-back" href="../../quapy.html#quapy.util.timeout">[docs]</a>
<span class="nd">@contextlib</span><span class="o">.</span><span class="n">contextmanager</span>
<span class="k">def</span> <span class="nf">timeout</span><span class="p">(</span><span class="n">seconds</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Opens a context that will launch an exception if not closed after a given number of seconds</span>
<span class="sd"> &gt;&gt;&gt; def func(start_msg, end_msg):</span>
<span class="sd"> &gt;&gt;&gt; print(start_msg)</span>
<span class="sd"> &gt;&gt;&gt; sleep(2)</span>
<span class="sd"> &gt;&gt;&gt; print(end_msg)</span>
<span class="sd"> &gt;&gt;&gt;</span>
<span class="sd"> &gt;&gt;&gt; with timeout(1):</span>
<span class="sd"> &gt;&gt;&gt; func(&#39;begin function&#39;, &#39;end function&#39;)</span>
<span class="sd"> &gt;&gt;&gt; Out[]</span>
<span class="sd"> &gt;&gt;&gt; begin function</span>
<span class="sd"> &gt;&gt;&gt; TimeoutError</span>
<span class="sd"> :param seconds: number of seconds, set to &lt;=0 to ignore the timer</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">seconds</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">handler</span><span class="p">(</span><span class="n">signum</span><span class="p">,</span> <span class="n">frame</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TimeoutError</span><span class="p">()</span>
<span class="n">signal</span><span class="o">.</span><span class="n">signal</span><span class="p">(</span><span class="n">signal</span><span class="o">.</span><span class="n">SIGALRM</span><span class="p">,</span> <span class="n">handler</span><span class="p">)</span>
<span class="n">signal</span><span class="o">.</span><span class="n">alarm</span><span class="p">(</span><span class="n">seconds</span><span class="p">)</span>
<span class="k">yield</span>
<span class="k">if</span> <span class="n">seconds</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">signal</span><span class="o">.</span><span class="n">alarm</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,7 +0,0 @@
API
===
.. autosummary::
:toctree: generated
quapy

View File

@ -1,23 +0,0 @@
quapy
=====
.. automodule:: quapy

View File

@ -1,21 +0,0 @@
quapy.benchmarking package
==========================
Submodules
----------
quapy.benchmarking.typical module
---------------------------------
.. automodule:: quapy.benchmarking.typical
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: quapy.benchmarking
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,123 +0,0 @@
/* Compatability shim for jQuery and underscores.js.
*
* Copyright Sphinx contributors
* Released under the two clause BSD licence
*/
/**
* small helper function to urldecode strings
*
* See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL
*/
jQuery.urldecode = function(x) {
if (!x) {
return x
}
return decodeURIComponent(x.replace(/\+/g, ' '));
};
/**
* small helper function to urlencode strings
*/
jQuery.urlencode = encodeURIComponent;
/**
* This function returns the parsed url parameters of the
* current request. Multiple values per key are supported,
* it will always return arrays of strings for the value parts.
*/
jQuery.getQueryParameters = function(s) {
if (typeof s === 'undefined')
s = document.location.search;
var parts = s.substr(s.indexOf('?') + 1).split('&');
var result = {};
for (var i = 0; i < parts.length; i++) {
var tmp = parts[i].split('=', 2);
var key = jQuery.urldecode(tmp[0]);
var value = jQuery.urldecode(tmp[1]);
if (key in result)
result[key].push(value);
else
result[key] = [value];
}
return result;
};
/**
* highlight a given string on a jquery object by wrapping it in
* span elements with the given class name.
*/
jQuery.fn.highlightText = function(text, className) {
function highlight(node, addItems) {
if (node.nodeType === 3) {
var val = node.nodeValue;
var pos = val.toLowerCase().indexOf(text);
if (pos >= 0 &&
!jQuery(node.parentNode).hasClass(className) &&
!jQuery(node.parentNode).hasClass("nohighlight")) {
var span;
var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg");
if (isInSVG) {
span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
} else {
span = document.createElement("span");
span.className = className;
}
span.appendChild(document.createTextNode(val.substr(pos, text.length)));
node.parentNode.insertBefore(span, node.parentNode.insertBefore(
document.createTextNode(val.substr(pos + text.length)),
node.nextSibling));
node.nodeValue = val.substr(0, pos);
if (isInSVG) {
var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect");
var bbox = node.parentElement.getBBox();
rect.x.baseVal.value = bbox.x;
rect.y.baseVal.value = bbox.y;
rect.width.baseVal.value = bbox.width;
rect.height.baseVal.value = bbox.height;
rect.setAttribute('class', className);
addItems.push({
"parent": node.parentNode,
"target": rect});
}
}
}
else if (!jQuery(node).is("button, select, textarea")) {
jQuery.each(node.childNodes, function() {
highlight(this, addItems);
});
}
}
var addItems = [];
var result = this.each(function() {
highlight(this, addItems);
});
for (var i = 0; i < addItems.length; ++i) {
jQuery(addItems[i].parent).before(addItems[i].target);
}
return result;
};
/*
* backward compatibility for jQuery.browser
* This will be supported until firefox bug is fixed.
*/
if (!jQuery.browser) {
jQuery.uaMatch = function(ua) {
ua = ua.toLowerCase();
var match = /(chrome)[ \/]([\w.]+)/.exec(ua) ||
/(webkit)[ \/]([\w.]+)/.exec(ua) ||
/(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) ||
/(msie) ([\w.]+)/.exec(ua) ||
ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) ||
[];
return {
browser: match[ 1 ] || "",
version: match[ 2 ] || "0"
};
};
jQuery.browser = {};
jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true;
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 107 B

View File

@ -1 +0,0 @@
.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 434 KiB

Binary file not shown.

Binary file not shown.

File diff suppressed because one or more lines are too long

View File

@ -1 +0,0 @@
!function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=4)}({4:function(e,t,r){}});

View File

@ -1,4 +0,0 @@
/**
* @preserve HTML5 Shiv 3.7.3-pre | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
*/
!function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x<style>"+b+"</style>",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=y.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=y.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),y.elements=c+" "+a,j(b)}function f(a){var b=x[a[v]];return b||(b={},w++,a[v]=w,x[w]=b),b}function g(a,c,d){if(c||(c=b),q)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():u.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||t.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),q)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return y.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(y,b.frag)}function j(a){a||(a=b);var d=f(a);return!y.shivCSS||p||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),q||i(a,d),a}function k(a){for(var b,c=a.getElementsByTagName("*"),e=c.length,f=RegExp("^(?:"+d().join("|")+")$","i"),g=[];e--;)b=c[e],f.test(b.nodeName)&&g.push(b.applyElement(l(b)));return g}function l(a){for(var b,c=a.attributes,d=c.length,e=a.ownerDocument.createElement(A+":"+a.nodeName);d--;)b=c[d],b.specified&&e.setAttribute(b.nodeName,b.nodeValue);return e.style.cssText=a.style.cssText,e}function m(a){for(var b,c=a.split("{"),e=c.length,f=RegExp("(^|[\\s,>+~])("+d().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"),g="$1"+A+"\\:$2";e--;)b=c[e]=c[e].split("}"),b[b.length-1]=b[b.length-1].replace(f,g),c[e]=b.join("}");return c.join("{")}function n(a){for(var b=a.length;b--;)a[b].removeNode()}function o(a){function b(){clearTimeout(g._removeSheetTimer),d&&d.removeNode(!0),d=null}var d,e,g=f(a),h=a.namespaces,i=a.parentWindow;return!B||a.printShived?a:("undefined"==typeof h[A]&&h.add(A),i.attachEvent("onbeforeprint",function(){b();for(var f,g,h,i=a.styleSheets,j=[],l=i.length,n=Array(l);l--;)n[l]=i[l];for(;h=n.pop();)if(!h.disabled&&z.test(h.media)){try{f=h.imports,g=f.length}catch(o){g=0}for(l=0;g>l;l++)n.push(f[l]);try{j.push(h.cssText)}catch(o){}}j=m(j.reverse().join("")),e=k(a),d=c(a,j)}),i.attachEvent("onafterprint",function(){n(e),clearTimeout(g._removeSheetTimer),g._removeSheetTimer=setTimeout(b,500)}),a.printShived=!0,a)}var p,q,r="3.7.3",s=a.html5||{},t=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,u=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,v="_html5shiv",w=0,x={};!function(){try{var a=b.createElement("a");a.innerHTML="<xyz></xyz>",p="hidden"in a,q=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){p=!0,q=!0}}();var y={elements:s.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:r,shivCSS:s.shivCSS!==!1,supportsUnknownElements:q,shivMethods:s.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=y,j(b);var z=/^$|\b(?:all|print)\b/,A="html5shiv",B=!q&&function(){var c=b.documentElement;return!("undefined"==typeof b.namespaces||"undefined"==typeof b.parentWindow||"undefined"==typeof c.applyElement||"undefined"==typeof c.removeNode||"undefined"==typeof a.attachEvent)}();y.type+=" print",y.shivPrint=o,o(b),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof window?window:this,document);

View File

@ -1,4 +0,0 @@
/**
* @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
*/
!function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x<style>"+b+"</style>",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="<xyz></xyz>",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document);

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 120 B

View File

@ -1,154 +0,0 @@
/* Highlighting utilities for Sphinx HTML documentation. */
"use strict";
const SPHINX_HIGHLIGHT_ENABLED = true
/**
* highlight a given string on a node by wrapping it in
* span elements with the given class name.
*/
const _highlight = (node, addItems, text, className) => {
if (node.nodeType === Node.TEXT_NODE) {
const val = node.nodeValue;
const parent = node.parentNode;
const pos = val.toLowerCase().indexOf(text);
if (
pos >= 0 &&
!parent.classList.contains(className) &&
!parent.classList.contains("nohighlight")
) {
let span;
const closestNode = parent.closest("body, svg, foreignObject");
const isInSVG = closestNode && closestNode.matches("svg");
if (isInSVG) {
span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
} else {
span = document.createElement("span");
span.classList.add(className);
}
span.appendChild(document.createTextNode(val.substr(pos, text.length)));
const rest = document.createTextNode(val.substr(pos + text.length));
parent.insertBefore(
span,
parent.insertBefore(
rest,
node.nextSibling
)
);
node.nodeValue = val.substr(0, pos);
/* There may be more occurrences of search term in this node. So call this
* function recursively on the remaining fragment.
*/
_highlight(rest, addItems, text, className);
if (isInSVG) {
const rect = document.createElementNS(
"http://www.w3.org/2000/svg",
"rect"
);
const bbox = parent.getBBox();
rect.x.baseVal.value = bbox.x;
rect.y.baseVal.value = bbox.y;
rect.width.baseVal.value = bbox.width;
rect.height.baseVal.value = bbox.height;
rect.setAttribute("class", className);
addItems.push({ parent: parent, target: rect });
}
}
} else if (node.matches && !node.matches("button, select, textarea")) {
node.childNodes.forEach((el) => _highlight(el, addItems, text, className));
}
};
const _highlightText = (thisNode, text, className) => {
let addItems = [];
_highlight(thisNode, addItems, text, className);
addItems.forEach((obj) =>
obj.parent.insertAdjacentElement("beforebegin", obj.target)
);
};
/**
* Small JavaScript module for the documentation.
*/
const SphinxHighlight = {
/**
* highlight the search words provided in localstorage in the text
*/
highlightSearchWords: () => {
if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight
// get and clear terms from localstorage
const url = new URL(window.location);
const highlight =
localStorage.getItem("sphinx_highlight_terms")
|| url.searchParams.get("highlight")
|| "";
localStorage.removeItem("sphinx_highlight_terms")
url.searchParams.delete("highlight");
window.history.replaceState({}, "", url);
// get individual terms from highlight string
const terms = highlight.toLowerCase().split(/\s+/).filter(x => x);
if (terms.length === 0) return; // nothing to do
// There should never be more than one element matching "div.body"
const divBody = document.querySelectorAll("div.body");
const body = divBody.length ? divBody[0] : document.querySelector("body");
window.setTimeout(() => {
terms.forEach((term) => _highlightText(body, term, "highlighted"));
}, 10);
const searchBox = document.getElementById("searchbox");
if (searchBox === null) return;
searchBox.appendChild(
document
.createRange()
.createContextualFragment(
'<p class="highlight-link">' +
'<a href="javascript:SphinxHighlight.hideSearchWords()">' +
_("Hide Search Matches") +
"</a></p>"
)
);
},
/**
* helper function to hide the search marks again
*/
hideSearchWords: () => {
document
.querySelectorAll("#searchbox .highlight-link")
.forEach((el) => el.remove());
document
.querySelectorAll("span.highlighted")
.forEach((el) => el.classList.remove("highlighted"));
localStorage.removeItem("sphinx_highlight_terms")
},
initEscapeListener: () => {
// only install a listener if it is really needed
if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return;
document.addEventListener("keydown", (event) => {
// bail for input elements
if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
// bail with special keys
if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return;
if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) {
SphinxHighlight.hideSearchWords();
event.preventDefault();
}
});
},
};
_ready(() => {
/* Do not call highlightSearchWords() when we are on the search page.
* It will highlight words from the *previous* search query.
*/
if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords();
SphinxHighlight.initEscapeListener();
});

View File

@ -1,354 +0,0 @@
/*
* sphinxdoc.css_t
* ~~~~~~~~~~~~~~~
*
* Sphinx stylesheet -- sphinxdoc theme. Originally created by
* Armin Ronacher for Werkzeug.
*
* :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS.
* :license: BSD, see LICENSE for details.
*
*/
@import url("basic.css");
/* -- page layout ----------------------------------------------------------- */
body {
font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva',
'Verdana', sans-serif;
font-size: 14px;
letter-spacing: -0.01em;
line-height: 150%;
text-align: center;
background-color: #BFD1D4;
color: black;
padding: 0;
border: 1px solid #aaa;
margin: 0px 80px 0px 80px;
min-width: 740px;
}
div.document {
background-color: white;
text-align: left;
background-image: url(contents.png);
background-repeat: repeat-x;
}
div.documentwrapper {
float: left;
width: 100%;
}
div.bodywrapper {
margin: 0 calc(230px + 10px) 0 0;
border-right: 1px solid #ccc;
}
div.body {
margin: 0;
padding: 0.5em 20px 20px 20px;
}
div.related {
font-size: 1em;
}
div.related ul {
background-image: url(navigation.png);
height: 2em;
border-top: 1px solid #ddd;
border-bottom: 1px solid #ddd;
}
div.related ul li {
margin: 0;
padding: 0;
height: 2em;
float: left;
}
div.related ul li.right {
float: right;
margin-right: 5px;
}
div.related ul li a {
margin: 0;
padding: 0 5px 0 5px;
line-height: 1.75em;
color: #EE9816;
}
div.related ul li a:hover {
color: #3CA8E7;
}
div.sphinxsidebarwrapper {
padding: 0;
}
div.sphinxsidebar {
padding: 0.5em 15px 15px 0;
width: calc(230px - 20px);
float: right;
font-size: 1em;
text-align: left;
}
div.sphinxsidebar h3, div.sphinxsidebar h4 {
margin: 1em 0 0.5em 0;
font-size: 1em;
padding: 0.1em 0 0.1em 0.5em;
color: white;
border: 1px solid #86989B;
background-color: #AFC1C4;
}
div.sphinxsidebar h3 a {
color: white;
}
div.sphinxsidebar ul {
padding-left: 1.5em;
margin-top: 7px;
padding: 0;
line-height: 130%;
}
div.sphinxsidebar ul ul {
margin-left: 20px;
}
div.footer {
background-color: #E3EFF1;
color: #86989B;
padding: 3px 8px 3px 0;
clear: both;
font-size: 0.8em;
text-align: right;
}
div.footer a {
color: #86989B;
text-decoration: underline;
}
/* -- body styles ----------------------------------------------------------- */
p {
margin: 0.8em 0 0.5em 0;
}
a {
color: #CA7900;
text-decoration: none;
}
a:hover {
color: #2491CF;
}
a:visited {
color: #551A8B;
}
div.body a {
text-decoration: underline;
}
h1 {
margin: 0;
padding: 0.7em 0 0.3em 0;
font-size: 1.5em;
color: #11557C;
}
h2 {
margin: 1.3em 0 0.2em 0;
font-size: 1.35em;
padding: 0;
}
h3 {
margin: 1em 0 -0.3em 0;
font-size: 1.2em;
}
div.body h1 a, div.body h2 a, div.body h3 a, div.body h4 a, div.body h5 a, div.body h6 a {
color: black!important;
}
h1 a.anchor, h2 a.anchor, h3 a.anchor, h4 a.anchor, h5 a.anchor, h6 a.anchor {
display: none;
margin: 0 0 0 0.3em;
padding: 0 0.2em 0 0.2em;
color: #aaa!important;
}
h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor,
h5:hover a.anchor, h6:hover a.anchor {
display: inline;
}
h1 a.anchor:hover, h2 a.anchor:hover, h3 a.anchor:hover, h4 a.anchor:hover,
h5 a.anchor:hover, h6 a.anchor:hover {
color: #777;
background-color: #eee;
}
a.headerlink {
color: #c60f0f!important;
font-size: 1em;
margin-left: 6px;
padding: 0 4px 0 4px;
text-decoration: none!important;
}
a.headerlink:hover {
background-color: #ccc;
color: white!important;
}
cite, code, code {
font-family: 'Consolas', 'Deja Vu Sans Mono',
'Bitstream Vera Sans Mono', monospace;
font-size: 0.95em;
letter-spacing: 0.01em;
}
code {
background-color: #f2f2f2;
border-bottom: 1px solid #ddd;
color: #333;
}
code.descname, code.descclassname, code.xref {
border: 0;
}
hr {
border: 1px solid #abc;
margin: 2em;
}
a code {
border: 0;
color: #CA7900;
}
a code:hover {
color: #2491CF;
}
pre {
font-family: 'Consolas', 'Deja Vu Sans Mono',
'Bitstream Vera Sans Mono', monospace;
font-size: 0.95em;
letter-spacing: 0.015em;
line-height: 120%;
padding: 0.5em;
border: 1px solid #ccc;
}
pre a {
color: inherit;
text-decoration: underline;
}
td.linenos pre {
padding: 0.5em 0;
}
div.quotebar {
background-color: #f8f8f8;
max-width: 250px;
float: right;
padding: 2px 7px;
border: 1px solid #ccc;
}
nav.contents,
aside.topic,
div.topic {
background-color: #f8f8f8;
}
table {
border-collapse: collapse;
margin: 0 -0.5em 0 -0.5em;
}
table td, table th {
padding: 0.2em 0.5em 0.2em 0.5em;
}
div.admonition, div.warning {
font-size: 0.9em;
margin: 1em 0 1em 0;
border: 1px solid #86989B;
background-color: #f7f7f7;
padding: 0;
}
div.admonition p, div.warning p {
margin: 0.5em 1em 0.5em 1em;
padding: 0;
}
div.admonition pre, div.warning pre {
margin: 0.4em 1em 0.4em 1em;
}
div.admonition p.admonition-title,
div.warning p.admonition-title {
margin: 0;
padding: 0.1em 0 0.1em 0.5em;
color: white;
border-bottom: 1px solid #86989B;
font-weight: bold;
background-color: #AFC1C4;
}
div.warning {
border: 1px solid #940000;
}
div.warning p.admonition-title {
background-color: #CF0000;
border-bottom-color: #940000;
}
div.admonition ul, div.admonition ol,
div.warning ul, div.warning ol {
margin: 0.1em 0.5em 0.5em 3em;
padding: 0;
}
div.versioninfo {
margin: 1em 0 0 0;
border: 1px solid #ccc;
background-color: #DDEAF0;
padding: 8px;
line-height: 1.3em;
font-size: 0.9em;
}
.viewcode-back {
font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva',
'Verdana', sans-serif;
}
div.viewcode-block:target {
background-color: #f4debf;
border-top: 1px solid #ac9;
border-bottom: 1px solid #ac9;
}
div.code-block-caption {
background-color: #ddd;
color: #222;
border: 1px solid #ccc;
}

View File

@ -1,113 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="./">
<head>
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>API &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="_static/jquery.js?v=5d32c60e"></script>
<script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="_static/documentation_options.js?v=22607128"></script>
<script src="_static/doctools.js?v=9a2dae69"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<!-- Local TOC -->
<div class="local-toc"><ul>
<li><a class="reference internal" href="#">API</a></li>
</ul>
</div>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">API</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/api.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="api">
<h1>API<a class="headerlink" href="#api" title="Link to this heading"></a></h1>
<table class="autosummary longtable docutils align-default">
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="generated/quapy.html#module-quapy" title="quapy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">quapy</span></code></a></p></td>
<td><p>QuaPy module for quantification</p></td>
</tr>
</tbody>
</table>
</section>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,106 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../">
<head>
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../_static/jquery.js?v=5d32c60e"></script>
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../_static/documentation_options.js?v=22607128"></script>
<script src="../_static/doctools.js?v=9a2dae69"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">quapy</li>
<li class="wy-breadcrumbs-aside">
<a href="../_sources/generated/quapy.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="module-quapy">
<span id="quapy"></span><h1>quapy<a class="headerlink" href="#module-quapy" title="Link to this heading"></a></h1>
<p>QuaPy module for quantification</p>
</section>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,119 +0,0 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="./">
<head>
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.benchmarking package &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="_static/jquery.js?v=5d32c60e"></script>
<script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="_static/documentation_options.js?v=22607128"></script>
<script src="_static/doctools.js?v=9a2dae69"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">quapy.benchmarking package</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/quapy.benchmarking.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="quapy-benchmarking-package">
<h1>quapy.benchmarking package<a class="headerlink" href="#quapy-benchmarking-package" title="Link to this heading"></a></h1>
<section id="submodules">
<h2>Submodules<a class="headerlink" href="#submodules" title="Link to this heading"></a></h2>
</section>
<section id="module-quapy.benchmarking.typical">
<span id="quapy-benchmarking-typical-module"></span><h2>quapy.benchmarking.typical module<a class="headerlink" href="#module-quapy.benchmarking.typical" title="Link to this heading"></a></h2>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.benchmarking.typical.wrap_cls_params">
<span class="sig-prename descclassname"><span class="pre">quapy.benchmarking.typical.</span></span><span class="sig-name descname"><span class="pre">wrap_cls_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">params</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.benchmarking.typical.wrap_cls_params" title="Link to this definition"></a></dt>
<dd></dd></dl>
</section>
<section id="module-quapy.benchmarking">
<span id="module-contents"></span><h2>Module contents<a class="headerlink" href="#module-quapy.benchmarking" title="Link to this heading"></a></h2>
</section>
</section>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@ -1,10 +0,0 @@
Para meter los módulos dentro de doc hay que hacer un
sphinx-apidoc -o docs/source/ quapy/ -P
Eso importa todo lo que haya en quapy/ (incluidos los ficheros _ gracias a -P) en source y crea un rst para cada uno.
Parece que lo del -P no funciona. Hay que meterlos a mano en quapy.method.rst
Luego, simplemente
make html

View File

@ -1,35 +0,0 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd

View File

@ -1,55 +0,0 @@
# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
import pathlib
import sys
from os.path import join
quapy_path = join(pathlib.Path(__file__).parents[2].resolve().as_posix(), 'quapy')
print(f'quapy path={quapy_path}')
sys.path.insert(0, quapy_path)
project = 'QuaPy: A Python-based open-source framework for quantification'
copyright = '2024, Alejandro Moreo'
author = 'Alejandro Moreo'
import quapy
release = quapy.__version__
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
extensions = [
'sphinx.ext.duration',
'sphinx.ext.doctest',
'sphinx.ext.autodoc',
'sphinx.ext.autosummary',
'sphinx.ext.viewcode',
'sphinx.ext.napoleon'
]
templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
html_theme = 'sphinx_rtd_theme'
# html_theme = 'furo'
# need to be installed: pip install furo (not working...)
html_static_path = ['_static']

View File

@ -1,41 +0,0 @@
.. QuaPy: A Python-based open-source framework for quantification documentation master file, created by
sphinx-quickstart on Wed Feb 7 16:26:46 2024.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to QuaPy's documentation!
==========================================================================================
QuaPy is a Python-based open-source framework for quantification.
This document contains the API of the modules included in QuaPy.
Installation
------------
`pip install quapy`
GitHub
------------
QuaPy is hosted in GitHub at `https://github.com/HLT-ISTI/QuaPy <https://github.com/HLT-ISTI/QuaPy>`_
.. toctree::
:maxdepth: 2
:caption: Contents:
Contents
--------
.. toctree::
modules
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@ -1,7 +0,0 @@
quapy
=====
.. toctree::
:maxdepth: 4
quapy

View File

@ -1,45 +0,0 @@
quapy.classification package
============================
Submodules
----------
quapy.classification.calibration module
---------------------------------------
.. automodule:: quapy.classification.calibration
:members:
:undoc-members:
:show-inheritance:
quapy.classification.methods module
-----------------------------------
.. automodule:: quapy.classification.methods
:members:
:undoc-members:
:show-inheritance:
quapy.classification.neural module
----------------------------------
.. automodule:: quapy.classification.neural
:members:
:undoc-members:
:show-inheritance:
quapy.classification.svmperf module
-----------------------------------
.. automodule:: quapy.classification.svmperf
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: quapy.classification
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,46 +0,0 @@
quapy.data package
==================
Submodules
----------
quapy.data.base module
----------------------
.. automodule:: quapy.data.base
:members:
:undoc-members:
:show-inheritance:
quapy.data.datasets module
--------------------------
.. automodule:: quapy.data.datasets
:members:
:undoc-members:
:show-inheritance:
quapy.data.preprocessing module
-------------------------------
.. automodule:: quapy.data.preprocessing
:members:
:undoc-members:
:show-inheritance:
quapy.data.reader module
------------------------
.. automodule:: quapy.data.reader
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: quapy.data
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,61 +0,0 @@
quapy.method package
====================
Submodules
----------
quapy.method.aggregative module
-------------------------------
.. automodule:: quapy.method.aggregative
:members:
:undoc-members:
:show-inheritance:
.. automodule:: quapy.method._kdey
:members:
:undoc-members:
:show-inheritance:
.. automodule:: quapy.method._neural
:members:
:undoc-members:
:show-inheritance:
.. automodule:: quapy.method._threshold_optim
:members:
:undoc-members:
:show-inheritance:
quapy.method.base module
------------------------
.. automodule:: quapy.method.base
:members:
:undoc-members:
:show-inheritance:
quapy.method.meta module
------------------------
.. automodule:: quapy.method.meta
:members:
:undoc-members:
:show-inheritance:
quapy.method.non\_aggregative module
------------------------------------
.. automodule:: quapy.method.non_aggregative
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: quapy.method
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,80 +0,0 @@
quapy package
=============
Subpackages
-----------
.. toctree::
:maxdepth: 4
quapy.classification
quapy.data
quapy.method
Submodules
----------
quapy.error module
------------------
.. automodule:: quapy.error
:members:
:undoc-members:
:show-inheritance:
quapy.evaluation module
-----------------------
.. automodule:: quapy.evaluation
:members:
:undoc-members:
:show-inheritance:
quapy.functional module
-----------------------
.. automodule:: quapy.functional
:members:
:undoc-members:
:show-inheritance:
quapy.model\_selection module
-----------------------------
.. automodule:: quapy.model_selection
:members:
:undoc-members:
:show-inheritance:
quapy.plot module
-----------------
.. automodule:: quapy.plot
:members:
:undoc-members:
:show-inheritance:
quapy.protocol module
---------------------
.. automodule:: quapy.protocol
:members:
:undoc-members:
:show-inheritance:
quapy.util module
-----------------
.. automodule:: quapy.util
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: quapy
:members:
:undoc-members:
:show-inheritance:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 57 KiB

View File

@ -44,6 +44,28 @@ def acce(y_true, y_pred):
"""
return 1. - (y_true == y_pred).mean()
def bias_binary(prevs, prevs_hat):
"""
Computes the (positive) bias in a binary problem. The bias is simply the difference between the
predicted positive value and the true positive value, so that a positive such value indicates the
prediction has positive bias (i.e., it tends to overestimate) the true value, and negative otherwise.
:math:`bias(p,\\hat{p})=\\hat{p}_1-p_1`,
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
prevalence values
:return: binary bias
"""
assert prevs.shape[-1] == 2 and prevs.shape[-1] == 2, f'bias_binary can only be applied to binary problems'
return prevs_hat[...,1]-prevs[...,1]
def mean_bias_binary(prevs, prevs_hat):
"""
Computes the mean of the (positive) bias in a binary problem.
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:return: mean binary bias
"""
return np.mean(bias_binary(prevs, prevs_hat))
def mae(prevs, prevs_hat):
"""Computes the mean absolute error (see :meth:`quapy.error.ae`) across the sample pairs.
@ -308,8 +330,8 @@ def __check_eps(eps=None):
CLASSIFICATION_ERROR = {f1e, acce}
QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld}
QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld}
QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld, mean_bias_binary}
QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld, bias_binary}
QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, nrae, mkld, mnkld, mrae}
CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}

View File

@ -1,6 +1,6 @@
from collections import defaultdict
import matplotlib.pyplot as plt
from matplotlib.cm import get_cmap
from matplotlib.pyplot import get_cmap
import numpy as np
from matplotlib import cm
from scipy.stats import ttest_ind_from_stats
@ -9,6 +9,7 @@ import math
import quapy as qp
plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['figure.dpi'] = 200
plt.rcParams['font.size'] = 18
@ -212,13 +213,19 @@ def binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=N
def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
n_bins=20, error_name='ae', show_std=False,
n_bins=20,
y_error_name='ae',
x_error_name='ae',
show_std=False,
show_density=True,
show_legend=True,
y_axis_title=None,
x_axis_title=None,
logscale=False,
title=f'Quantification error as a function of distribution shift',
title=None,
vlines=None,
method_order=None,
bbox_to_anchor=(1,1),
savepath=None):
"""
Plots the error (along the x-axis, as measured in terms of `error_name`) as a function of the train-test shift
@ -234,7 +241,10 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
for each experiment
:param tr_prevs: training prevalence of each experiment
:param n_bins: number of bins in which the y-axis is to be divided (default is 20)
:param error_name: a string representing the name of an error function (as defined in `quapy.error`, default is "ae")
:param y_error_name: a string representing the name of an error function (as defined in `quapy.error`,
default is "ae") to be used along the y-axis
:param x_error_name: a string representing the name of an error function (as defined in `quapy.error`,
default is "ae") to be used along the x-axis
:param show_std: whether or not to show standard deviations as color bands (default is False)
:param show_density: whether or not to display the distribution of experiments for each bin (default is True)
:param show_density: whether or not to display the legend of the chart (default is True)
@ -250,31 +260,40 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
fig, ax = plt.subplots()
ax.grid()
x_error = qp.error.ae
y_error = getattr(qp.error, error_name)
if isinstance(x_error_name, str):
x_error = qp.error.from_name(x_error_name)
if isinstance(y_error_name, str):
y_error = qp.error.from_name(y_error_name)
# get all data as a dictionary {'m':{'x':ndarray, 'y':ndarray}} where 'm' is a method name (in the same
# order as in method_order (if specified), and where 'x' are the train-test shifts (computed as according to
# x_error function) and 'y' is the estim-test shift (computed as according to y_error)
data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order)
min_x = np.min([np.min(m_data['x']) for m_data in data.values()])
max_x = np.max([np.max(m_data['x']) for m_data in data.values()])
min_y = np.min([np.min(m_data['y']) for m_data in data.values()])
max_y = np.max([np.max(m_data['y']) for m_data in data.values()])
print(f'[{min_x}, {max_x}]<-')
if method_order is None:
method_order = method_names
_set_colors(ax, n_methods=len(method_order))
bins = np.linspace(0, 1, n_bins+1)
binwidth = 1 / n_bins
min_x, max_x, min_y, max_y = None, None, None, None
bins = np.linspace(min_x-1E-8, max_x+1E-8, n_bins+1)
print('bins', bins)
binwidth = (max_x-min_x) / n_bins
npoints = np.zeros(len(bins), dtype=float)
for method in method_order:
tr_test_drifts = data[method]['x']
method_drifts = data[method]['y']
if logscale:
ax.set_yscale("log")
ax.yaxis.set_major_formatter(ScalarFormatter())
ax.yaxis.get_major_formatter().set_scientific(False)
ax.minorticks_off()
# ax.yaxis.set_major_formatter(ScalarFormatter())
# ax.yaxis.get_major_formatter().set_scientific(False)
# ax.minorticks_off()
inds = np.digitize(tr_test_drifts, bins, right=True)
@ -282,7 +301,7 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
for p,ind in enumerate(range(len(bins))):
selected = inds==ind
if selected.sum() > 0:
xs.append(ind*binwidth-binwidth/2)
xs.append(min_x + (ind*binwidth-binwidth/2))
ys.append(np.mean(method_drifts[selected]))
ystds.append(np.std(method_drifts[selected]))
npoints[p] += len(method_drifts[selected])
@ -291,13 +310,6 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
ys = np.asarray(ys)
ystds = np.asarray(ystds)
min_x_method, max_x_method, min_y_method, max_y_method = xs.min(), xs.max(), ys.min(), ys.max()
min_x = min_x_method if min_x is None or min_x_method < min_x else min_x
max_x = max_x_method if max_x is None or max_x_method > max_x else max_x
max_y = max_y_method if max_y is None or max_y_method > max_y else max_y
min_y = min_y_method if min_y is None or min_y_method < min_y else min_y
max_y = max_y_method if max_y is None or max_y_method > max_y else max_y
ax.errorbar(xs, ys, fmt='-', marker='o', color='w', markersize=8, linewidth=4, zorder=1)
ax.errorbar(xs, ys, fmt='-', marker='o', label=method, markersize=6, linewidth=2, zorder=2)
@ -307,32 +319,41 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
if show_density:
ax2 = ax.twinx()
densities = npoints/np.sum(npoints)
ax2.bar([ind * binwidth-binwidth/2 for ind in range(len(bins))],
densities, alpha=0.15, color='g', width=binwidth, label='density')
ax2.bar([min_x + (ind * binwidth-binwidth/2) for ind in range(len(bins))],
densities, alpha=0.15, color='g', width=binwidth, label='density')
ax2.set_ylim(0,max(densities))
ax2.spines['right'].set_color('g')
ax2.tick_params(axis='y', colors='g')
ax.set(xlabel=f'Distribution shift between training set and test sample',
ylabel=f'{error_name.upper()} (true distribution, predicted distribution)',
title=title)
y_axis_err_name = y_error_name.upper()
if logscale:
y_axis_err_name = f'log({y_axis_err_name})'
if y_axis_title is None:
y_axis_title=f'{y_axis_err_name} (true distribution, predicted distribution)'
if x_axis_title is None:
x_axis_title = f'PPS between training set and test sample (in terms of {x_error_name.upper()})'
ax.set(xlabel=x_axis_title, ylabel=y_axis_title, title=title)
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
if vlines:
for vline in vlines:
ax.axvline(vline, 0, 1, linestyle='--', color='k')
ax.set_xlim(min_x, max_x)
margin = (max_x-min_x)*0.02
ax.set_xlim(min_x-margin, max_x+margin)
if logscale:
#nice scale for the logaritmic axis
ax.set_ylim(0,10 ** math.ceil(math.log10(max_y)))
if show_legend:
# fig.legend(loc='lower center',
# bbox_to_anchor=(1, 0.5),
# ncol=(len(method_names)+1)//2)
fig.legend(loc='lower center',
bbox_to_anchor=(1, 0.5),
ncol=(len(method_names)+1)//2)
bbox_to_anchor=bbox_to_anchor,
ncol=1)
_save_or_show(savepath)
@ -547,6 +568,7 @@ def _save_or_show(savepath):
plt.savefig(savepath, bbox_inches='tight')
else:
plt.show()
plt.close()
def _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order):
@ -567,4 +589,84 @@ def _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error
if method not in method_order:
method_order.append(method)
return data
return data
def bin_signal(x, y, n_bins, binning_type='isometric'):
"""
Bins the input data `x` and computes statistical summaries of `y` within each bin.
:param x: The independent variable to be binned. Must be a 1D array of numerical values.
:type x: array-like
:param y: The dependent variable corresponding to `x`. Must be the same length as `x`.
:type y: array-like
:param n_bins: The number of bins to create.
:type n_bins: int
:param binning_type: The method to use for binning:
- `'isometric'`: Creates bins with equal width (isometric binning).
- `'isotonic'`: Creates bins containing an equal number of instances (isotonic binning).
Defaults to `'isometric'`.
:type binning_type: str
:return: A tuple containing:
- `bin_means` (numpy.ndarray): The mean of `y` values in each bin (`np.nan` for empty bins).
- `bin_stds` (numpy.ndarray): The standard deviation (sample std) of `y` values in each bin (`np.nan` for empty bins).
- `bin_centers` (numpy.ndarray): The center points of each bin.
- `bin_lengths` (numpy.ndarray): The length (width) of each bin.
- `bin_counts` (numpy.ndarray): The number of elements in each bin.
:rtype: tuple (numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)
:raises ValueError: If `binning_type` is not one of `'isometric'` or `'isotonic'`.
.. note::
- For isometric binning, bins are equally spaced along the range of `x`.
- For isotonic binning, bins are constructed to contain approximately equal numbers of elements, based on sorted `x`.
- If a bin is empty (no elements fall within its range), its mean and standard deviation will be `np.nan`.
:example:
>>> import numpy as np
>>> x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> y = np.array([10, 20, 15, 25, 30, 35, 45, 50, 40])
>>> n_bins = 3
>>> bin_signal(x, y, n_bins, binning_type='isometric')
(array([15., 30., 45.]),
array([5., 5., 5.]),
array([2.33333333, 5. , 7.66666667]),
array([2.66666667, 2.66666667, 2.66666667]),
array([3, 3, 3]))
"""
x = np.asarray(x)
y = np.asarray(y)
if binning_type == 'isometric':
# all bins are equally-sized
bin_edges = np.linspace(x.min(), x.max(), n_bins + 1)
elif binning_type == 'isotonic':
# all bins contain the same number of instances
sorted_x = np.sort(x)
bin_edges = np.interp(np.linspace(0, len(x), n_bins + 1), np.arange(len(x)), sorted_x)
else:
raise ValueError("valid binning types include 'isometric' and 'isotonic'")
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
bin_lengths = bin_edges[1:] - bin_edges[:-1]
bin_means = []
bin_stds = []
bin_counts = []
for start, end in zip(bin_edges[:-1], bin_edges[1:]):
mask = (x >= start) & (x < end) if end != bin_edges[-1] else (x >= start) & (x <= end)
count = mask.sum()
if count > 0:
y_in_bin = y[mask]
bin_means.append(np.mean(y_in_bin))
bin_stds.append(np.std(y_in_bin, ddof=1))
else:
bin_means.append(np.nan)
bin_stds.append(np.nan)
bin_counts.append(count)
return np.array(bin_means), np.array(bin_stds), np.array(bin_centers), np.array(bin_lengths), np.array(bin_counts)