update trailing char
This commit is contained in:
parent
dd581f7937
commit
f346005515
|
@ -1,20 +1,20 @@
|
|||
*.code-workspace
|
||||
quavenv/*
|
||||
*.pdf
|
||||
|
||||
__pycache__/*
|
||||
baselines/__pycache__/*
|
||||
baselines/densratio/__pycache__/*
|
||||
quacc/__pycache__/*
|
||||
quacc/evaluation/__pycache__/*
|
||||
quacc/method/__pycache__/*
|
||||
tests/__pycache__/*
|
||||
|
||||
*.coverage
|
||||
.coverage
|
||||
|
||||
scp_sync.py
|
||||
|
||||
out/*
|
||||
output/*
|
||||
*.code-workspace
|
||||
quavenv/*
|
||||
*.pdf
|
||||
|
||||
__pycache__/*
|
||||
baselines/__pycache__/*
|
||||
baselines/densratio/__pycache__/*
|
||||
quacc/__pycache__/*
|
||||
quacc/evaluation/__pycache__/*
|
||||
quacc/method/__pycache__/*
|
||||
tests/__pycache__/*
|
||||
|
||||
*.coverage
|
||||
.coverage
|
||||
|
||||
scp_sync.py
|
||||
|
||||
out/*
|
||||
output/*
|
||||
!output/main/
|
|
@ -1,25 +1,25 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
|
||||
{
|
||||
"name": "main",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "C:\\Users\\Lorenzo Volpi\\source\\tesi\\quacc\\main.py",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": true
|
||||
},
|
||||
{
|
||||
"name": "main_test",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "C:\\Users\\Lorenzo Volpi\\source\\tesi\\quacc\\main_test.py",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": false
|
||||
},
|
||||
]
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
|
||||
{
|
||||
"name": "main",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "C:\\Users\\Lorenzo Volpi\\source\\tesi\\quacc\\main.py",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": true
|
||||
},
|
||||
{
|
||||
"name": "main_test",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "C:\\Users\\Lorenzo Volpi\\source\\tesi\\quacc\\main_test.py",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": false
|
||||
},
|
||||
]
|
||||
}
|
|
@ -1,54 +1,54 @@
|
|||
{
|
||||
"todo": [
|
||||
{
|
||||
"assignedTo": {
|
||||
"name": "Lorenzo Volpi"
|
||||
},
|
||||
"creation_time": "2023-10-28T14:33:36.069Z",
|
||||
"id": "2",
|
||||
"references": [],
|
||||
"title": "Creare plot avg con training prevalence sull'asse x e media rispetto a test prevalence"
|
||||
},
|
||||
{
|
||||
"assignedTo": {
|
||||
"name": "Lorenzo Volpi"
|
||||
},
|
||||
"creation_time": "2023-10-28T14:32:37.610Z",
|
||||
"id": "1",
|
||||
"references": [],
|
||||
"title": "Testare su imdb"
|
||||
}
|
||||
],
|
||||
"in-progress": [
|
||||
{
|
||||
"assignedTo": {
|
||||
"name": "Lorenzo Volpi"
|
||||
},
|
||||
"creation_time": "2023-10-28T14:34:23.217Z",
|
||||
"id": "3",
|
||||
"references": [],
|
||||
"title": "Relaizzare grid search per task specifico partedno da GridSearchQ"
|
||||
},
|
||||
{
|
||||
"assignedTo": {
|
||||
"name": "Lorenzo Volpi"
|
||||
},
|
||||
"creation_time": "2023-10-28T14:34:46.226Z",
|
||||
"id": "4",
|
||||
"references": [],
|
||||
"title": "Aggingere estimator basati su PACC (quantificatore)"
|
||||
}
|
||||
],
|
||||
"testing": [],
|
||||
"done": [
|
||||
{
|
||||
"assignedTo": {
|
||||
"name": "Lorenzo Volpi"
|
||||
},
|
||||
"creation_time": "2023-10-28T14:35:12.683Z",
|
||||
"id": "5",
|
||||
"references": [],
|
||||
"title": "Rework rappresentazione dati di report"
|
||||
}
|
||||
]
|
||||
{
|
||||
"todo": [
|
||||
{
|
||||
"assignedTo": {
|
||||
"name": "Lorenzo Volpi"
|
||||
},
|
||||
"creation_time": "2023-10-28T14:33:36.069Z",
|
||||
"id": "2",
|
||||
"references": [],
|
||||
"title": "Creare plot avg con training prevalence sull'asse x e media rispetto a test prevalence"
|
||||
},
|
||||
{
|
||||
"assignedTo": {
|
||||
"name": "Lorenzo Volpi"
|
||||
},
|
||||
"creation_time": "2023-10-28T14:32:37.610Z",
|
||||
"id": "1",
|
||||
"references": [],
|
||||
"title": "Testare su imdb"
|
||||
}
|
||||
],
|
||||
"in-progress": [
|
||||
{
|
||||
"assignedTo": {
|
||||
"name": "Lorenzo Volpi"
|
||||
},
|
||||
"creation_time": "2023-10-28T14:34:23.217Z",
|
||||
"id": "3",
|
||||
"references": [],
|
||||
"title": "Relaizzare grid search per task specifico partedno da GridSearchQ"
|
||||
},
|
||||
{
|
||||
"assignedTo": {
|
||||
"name": "Lorenzo Volpi"
|
||||
},
|
||||
"creation_time": "2023-10-28T14:34:46.226Z",
|
||||
"id": "4",
|
||||
"references": [],
|
||||
"title": "Aggingere estimator basati su PACC (quantificatore)"
|
||||
}
|
||||
],
|
||||
"testing": [],
|
||||
"done": [
|
||||
{
|
||||
"assignedTo": {
|
||||
"name": "Lorenzo Volpi"
|
||||
},
|
||||
"creation_time": "2023-10-28T14:35:12.683Z",
|
||||
"id": "5",
|
||||
"references": [],
|
||||
"title": "Rework rappresentazione dati di report"
|
||||
}
|
||||
]
|
||||
}
|
284
TODO.html
284
TODO.html
|
@ -1,143 +1,143 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title></title>
|
||||
<style>
|
||||
/* From extension vscode.github */
|
||||
/*---------------------------------------------------------------------------------------------
|
||||
* Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||||
*--------------------------------------------------------------------------------------------*/
|
||||
|
||||
.vscode-dark img[src$=\#gh-light-mode-only],
|
||||
.vscode-light img[src$=\#gh-dark-mode-only] {
|
||||
display: none;
|
||||
}
|
||||
|
||||
</style>
|
||||
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/markdown.css">
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/highlight.css">
|
||||
<style>
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe WPC', 'Segoe UI', system-ui, 'Ubuntu', 'Droid Sans', sans-serif;
|
||||
font-size: 14px;
|
||||
line-height: 1.6;
|
||||
}
|
||||
</style>
|
||||
<style>
|
||||
.task-list-item {
|
||||
list-style-type: none;
|
||||
}
|
||||
|
||||
.task-list-item-checkbox {
|
||||
margin-left: -20px;
|
||||
vertical-align: middle;
|
||||
pointer-events: none;
|
||||
}
|
||||
</style>
|
||||
|
||||
</head>
|
||||
<body class="vscode-body vscode-light">
|
||||
<ul class="contains-task-list">
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere media tabelle</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot; 3 tipi (appunti + email + garg)</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> sistemare kfcv baseline</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere metodo con CC oltre SLD</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> variare parametro recalibration in SLD</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> fix grafico diagonal</p>
|
||||
<ul>
|
||||
<li>seaborn example gallery</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> varianti recalib: bcts, SLD (provare exact_train_prev=False)</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> vedere cosa usa garg di validation size</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> per model selection testare il parametro c del classificatore, si esplora in np.logscale(-3,3, 7) oppure np.logscale(-4, 4, 9), parametro class_weight si esplora in None oppure "balanced"; va usato qp.model_selection.GridSearchQ in funzione di mae come errore, UPP come protocollo</p>
|
||||
<ul>
|
||||
<li>qp.train_test_split per avere v_train e v_val</li>
|
||||
<li>GridSearchQ(
|
||||
model: BaseQuantifier,
|
||||
param_grid: {
|
||||
'classifier__C': np.logspace(-3,3,7),
|
||||
'classifier__class_weight': [None, 'balanced'],
|
||||
'recalib': [None, 'bcts']
|
||||
},
|
||||
protocol: UPP(V_val, repeats=1000),
|
||||
error = qp.error.mae,
|
||||
refit=True,
|
||||
timeout=-1,
|
||||
n_jobs=-2,
|
||||
verbose=True).fit(V_tr)</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot collettivo, con sulla x lo shift e prenda in considerazione tutti i training set, facendo la media sui 9 casi (ogni line è un metodo), risultati non ottimizzati e ottimizzati</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> salvare il best score ottenuto da ogni applicazione di GridSearchQ</p>
|
||||
<ul>
|
||||
<li>nel caso di bin fare media dei due best score</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> import baselines</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox"type="checkbox"> importare mandoline</p>
|
||||
<ul>
|
||||
<li>mandoline può essere importato, ma richiedere uno slicing delle features a priori che devere essere realizzato ad hoc</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox"type="checkbox"> sistemare vecchie iw baselines</p>
|
||||
<ul>
|
||||
<li>non possono essere fixate perché dipendono da numpy</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot avg con train prevalence sull'asse x e media su test prevalecne</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> realizzare grid search per task specifico partendo da GridSearchQ</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> provare PACC come quantificatore</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere etichette in shift plot</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> sistemare exact_train quapy</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> testare anche su imbd</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox"type="checkbox"> rivedere nuove baselines</p>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title></title>
|
||||
<style>
|
||||
/* From extension vscode.github */
|
||||
/*---------------------------------------------------------------------------------------------
|
||||
* Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||||
*--------------------------------------------------------------------------------------------*/
|
||||
|
||||
.vscode-dark img[src$=\#gh-light-mode-only],
|
||||
.vscode-light img[src$=\#gh-dark-mode-only] {
|
||||
display: none;
|
||||
}
|
||||
|
||||
</style>
|
||||
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/markdown.css">
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/highlight.css">
|
||||
<style>
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe WPC', 'Segoe UI', system-ui, 'Ubuntu', 'Droid Sans', sans-serif;
|
||||
font-size: 14px;
|
||||
line-height: 1.6;
|
||||
}
|
||||
</style>
|
||||
<style>
|
||||
.task-list-item {
|
||||
list-style-type: none;
|
||||
}
|
||||
|
||||
.task-list-item-checkbox {
|
||||
margin-left: -20px;
|
||||
vertical-align: middle;
|
||||
pointer-events: none;
|
||||
}
|
||||
</style>
|
||||
|
||||
</head>
|
||||
<body class="vscode-body vscode-light">
|
||||
<ul class="contains-task-list">
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere media tabelle</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot; 3 tipi (appunti + email + garg)</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> sistemare kfcv baseline</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere metodo con CC oltre SLD</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> variare parametro recalibration in SLD</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> fix grafico diagonal</p>
|
||||
<ul>
|
||||
<li>seaborn example gallery</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> varianti recalib: bcts, SLD (provare exact_train_prev=False)</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> vedere cosa usa garg di validation size</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> per model selection testare il parametro c del classificatore, si esplora in np.logscale(-3,3, 7) oppure np.logscale(-4, 4, 9), parametro class_weight si esplora in None oppure "balanced"; va usato qp.model_selection.GridSearchQ in funzione di mae come errore, UPP come protocollo</p>
|
||||
<ul>
|
||||
<li>qp.train_test_split per avere v_train e v_val</li>
|
||||
<li>GridSearchQ(
|
||||
model: BaseQuantifier,
|
||||
param_grid: {
|
||||
'classifier__C': np.logspace(-3,3,7),
|
||||
'classifier__class_weight': [None, 'balanced'],
|
||||
'recalib': [None, 'bcts']
|
||||
},
|
||||
protocol: UPP(V_val, repeats=1000),
|
||||
error = qp.error.mae,
|
||||
refit=True,
|
||||
timeout=-1,
|
||||
n_jobs=-2,
|
||||
verbose=True).fit(V_tr)</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot collettivo, con sulla x lo shift e prenda in considerazione tutti i training set, facendo la media sui 9 casi (ogni line è un metodo), risultati non ottimizzati e ottimizzati</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> salvare il best score ottenuto da ogni applicazione di GridSearchQ</p>
|
||||
<ul>
|
||||
<li>nel caso di bin fare media dei due best score</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> import baselines</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox"type="checkbox"> importare mandoline</p>
|
||||
<ul>
|
||||
<li>mandoline può essere importato, ma richiedere uno slicing delle features a priori che devere essere realizzato ad hoc</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox"type="checkbox"> sistemare vecchie iw baselines</p>
|
||||
<ul>
|
||||
<li>non possono essere fixate perché dipendono da numpy</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot avg con train prevalence sull'asse x e media su test prevalecne</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> realizzare grid search per task specifico partendo da GridSearchQ</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> provare PACC come quantificatore</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere etichette in shift plot</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> sistemare exact_train quapy</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> testare anche su imbd</p>
|
||||
</li>
|
||||
<li class="task-list-item enabled">
|
||||
<p><input class="task-list-item-checkbox"type="checkbox"> rivedere nuove baselines</p>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
86
TODO.md
86
TODO.md
|
@ -1,44 +1,44 @@
|
|||
- [x] aggiungere media tabelle
|
||||
- [x] plot; 3 tipi (appunti + email + garg)
|
||||
- [x] sistemare kfcv baseline
|
||||
- [x] aggiungere metodo con CC oltre SLD
|
||||
- [x] prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)
|
||||
- [x] variare parametro recalibration in SLD
|
||||
|
||||
|
||||
- [x] fix grafico diagonal
|
||||
- seaborn example gallery
|
||||
- [x] varianti recalib: bcts, SLD (provare exact_train_prev=False)
|
||||
- [x] vedere cosa usa garg di validation size
|
||||
- [x] per model selection testare il parametro c del classificatore, si esplora in np.logscale(-3,3, 7) oppure np.logscale(-4, 4, 9), parametro class_weight si esplora in None oppure "balanced"; va usato qp.model_selection.GridSearchQ in funzione di mae come errore, UPP come protocollo
|
||||
- qp.train_test_split per avere v_train e v_val
|
||||
- GridSearchQ(
|
||||
model: BaseQuantifier,
|
||||
param_grid: {
|
||||
'classifier__C': np.logspace(-3,3,7),
|
||||
'classifier__class_weight': [None, 'balanced'],
|
||||
'recalib': [None, 'bcts']
|
||||
},
|
||||
protocol: UPP(V_val, repeats=1000),
|
||||
error = qp.error.mae,
|
||||
refit=True,
|
||||
timeout=-1,
|
||||
n_jobs=-2,
|
||||
verbose=True).fit(V_tr)
|
||||
- [x] plot collettivo, con sulla x lo shift e prenda in considerazione tutti i training set, facendo la media sui 9 casi (ogni line è un metodo), risultati non ottimizzati e ottimizzati
|
||||
- [x] salvare il best score ottenuto da ogni applicazione di GridSearchQ
|
||||
- nel caso di bin fare media dei due best score
|
||||
- [x] import baselines
|
||||
|
||||
- [ ] importare mandoline
|
||||
- mandoline può essere importato, ma richiedere uno slicing delle features a priori che devere essere realizzato ad hoc
|
||||
- [ ] sistemare vecchie iw baselines
|
||||
- non possono essere fixate perché dipendono da numpy
|
||||
- [x] plot avg con train prevalence sull'asse x e media su test prevalecne
|
||||
- [x] realizzare grid search per task specifico partendo da GridSearchQ
|
||||
- [x] provare PACC come quantificatore
|
||||
- [x] aggiungere etichette in shift plot
|
||||
- [x] sistemare exact_train quapy
|
||||
- [x] testare anche su imbd
|
||||
|
||||
- [x] aggiungere media tabelle
|
||||
- [x] plot; 3 tipi (appunti + email + garg)
|
||||
- [x] sistemare kfcv baseline
|
||||
- [x] aggiungere metodo con CC oltre SLD
|
||||
- [x] prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)
|
||||
- [x] variare parametro recalibration in SLD
|
||||
|
||||
|
||||
- [x] fix grafico diagonal
|
||||
- seaborn example gallery
|
||||
- [x] varianti recalib: bcts, SLD (provare exact_train_prev=False)
|
||||
- [x] vedere cosa usa garg di validation size
|
||||
- [x] per model selection testare il parametro c del classificatore, si esplora in np.logscale(-3,3, 7) oppure np.logscale(-4, 4, 9), parametro class_weight si esplora in None oppure "balanced"; va usato qp.model_selection.GridSearchQ in funzione di mae come errore, UPP come protocollo
|
||||
- qp.train_test_split per avere v_train e v_val
|
||||
- GridSearchQ(
|
||||
model: BaseQuantifier,
|
||||
param_grid: {
|
||||
'classifier__C': np.logspace(-3,3,7),
|
||||
'classifier__class_weight': [None, 'balanced'],
|
||||
'recalib': [None, 'bcts']
|
||||
},
|
||||
protocol: UPP(V_val, repeats=1000),
|
||||
error = qp.error.mae,
|
||||
refit=True,
|
||||
timeout=-1,
|
||||
n_jobs=-2,
|
||||
verbose=True).fit(V_tr)
|
||||
- [x] plot collettivo, con sulla x lo shift e prenda in considerazione tutti i training set, facendo la media sui 9 casi (ogni line è un metodo), risultati non ottimizzati e ottimizzati
|
||||
- [x] salvare il best score ottenuto da ogni applicazione di GridSearchQ
|
||||
- nel caso di bin fare media dei due best score
|
||||
- [x] import baselines
|
||||
|
||||
- [ ] importare mandoline
|
||||
- mandoline può essere importato, ma richiedere uno slicing delle features a priori che devere essere realizzato ad hoc
|
||||
- [ ] sistemare vecchie iw baselines
|
||||
- non possono essere fixate perché dipendono da numpy
|
||||
- [x] plot avg con train prevalence sull'asse x e media su test prevalecne
|
||||
- [x] realizzare grid search per task specifico partendo da GridSearchQ
|
||||
- [x] provare PACC come quantificatore
|
||||
- [x] aggiungere etichette in shift plot
|
||||
- [x] sistemare exact_train quapy
|
||||
- [x] testare anche su imbd
|
||||
|
||||
- [ ] rivedere nuove baselines
|
|
@ -1,44 +1,44 @@
|
|||
import numpy as np
|
||||
from sklearn.metrics import f1_score
|
||||
|
||||
|
||||
def get_entropy(probs):
|
||||
return np.sum(np.multiply(probs, np.log(probs + 1e-20)), axis=1)
|
||||
|
||||
|
||||
def get_max_conf(probs):
|
||||
return np.max(probs, axis=-1)
|
||||
|
||||
|
||||
def find_ATC_threshold(scores, labels):
|
||||
sorted_idx = np.argsort(scores)
|
||||
|
||||
sorted_scores = scores[sorted_idx]
|
||||
sorted_labels = labels[sorted_idx]
|
||||
|
||||
fp = np.sum(labels == 0)
|
||||
fn = 0.0
|
||||
|
||||
min_fp_fn = np.abs(fp - fn)
|
||||
thres = 0.0
|
||||
for i in range(len(labels)):
|
||||
if sorted_labels[i] == 0:
|
||||
fp -= 1
|
||||
else:
|
||||
fn += 1
|
||||
|
||||
if np.abs(fp - fn) < min_fp_fn:
|
||||
min_fp_fn = np.abs(fp - fn)
|
||||
thres = sorted_scores[i]
|
||||
|
||||
return min_fp_fn, thres
|
||||
|
||||
|
||||
def get_ATC_acc(thres, scores):
|
||||
return np.mean(scores >= thres)
|
||||
|
||||
|
||||
def get_ATC_f1(thres, scores, probs):
|
||||
preds = np.argmax(probs, axis=-1)
|
||||
estim_y = np.abs(1 - (scores >= thres) ^ preds)
|
||||
return f1_score(estim_y, preds)
|
||||
import numpy as np
|
||||
from sklearn.metrics import f1_score
|
||||
|
||||
|
||||
def get_entropy(probs):
|
||||
return np.sum(np.multiply(probs, np.log(probs + 1e-20)), axis=1)
|
||||
|
||||
|
||||
def get_max_conf(probs):
|
||||
return np.max(probs, axis=-1)
|
||||
|
||||
|
||||
def find_ATC_threshold(scores, labels):
|
||||
sorted_idx = np.argsort(scores)
|
||||
|
||||
sorted_scores = scores[sorted_idx]
|
||||
sorted_labels = labels[sorted_idx]
|
||||
|
||||
fp = np.sum(labels == 0)
|
||||
fn = 0.0
|
||||
|
||||
min_fp_fn = np.abs(fp - fn)
|
||||
thres = 0.0
|
||||
for i in range(len(labels)):
|
||||
if sorted_labels[i] == 0:
|
||||
fp -= 1
|
||||
else:
|
||||
fn += 1
|
||||
|
||||
if np.abs(fp - fn) < min_fp_fn:
|
||||
min_fp_fn = np.abs(fp - fn)
|
||||
thres = sorted_scores[i]
|
||||
|
||||
return min_fp_fn, thres
|
||||
|
||||
|
||||
def get_ATC_acc(thres, scores):
|
||||
return np.mean(scores >= thres)
|
||||
|
||||
|
||||
def get_ATC_f1(thres, scores, probs):
|
||||
preds = np.argmax(probs, axis=-1)
|
||||
estim_y = np.abs(1 - (scores >= thres) ^ preds)
|
||||
return f1_score(estim_y, preds)
|
||||
|
|
|
@ -1,277 +1,277 @@
|
|||
"""
|
||||
Relative Unconstrained Least-Squares Fitting (RuLSIF): A Python Implementation
|
||||
References:
|
||||
'Change-point detection in time-series data by relative density-ratio estimation'
|
||||
Song Liu, Makoto Yamada, Nigel Collier and Masashi Sugiyama,
|
||||
Neural Networks 43 (2013) 72-83.
|
||||
|
||||
'A Least-squares Approach to Direct Importance Estimation'
|
||||
Takafumi Kanamori, Shohei Hido, and Masashi Sugiyama,
|
||||
Journal of Machine Learning Research 10 (2009) 1391-1445.
|
||||
"""
|
||||
|
||||
from warnings import warn
|
||||
|
||||
from numpy import (
|
||||
array,
|
||||
asarray,
|
||||
asmatrix,
|
||||
diag,
|
||||
diagflat,
|
||||
empty,
|
||||
exp,
|
||||
inf,
|
||||
log,
|
||||
matrix,
|
||||
multiply,
|
||||
ones,
|
||||
power,
|
||||
sum,
|
||||
)
|
||||
from numpy.linalg import solve
|
||||
from numpy.random import randint
|
||||
|
||||
from .density_ratio import DensityRatio, KernelInfo
|
||||
from .helpers import guvectorize_compute, np_float, to_ndarray
|
||||
|
||||
|
||||
def RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num=100, verbose=True):
|
||||
"""
|
||||
Estimation of the alpha-Relative Density Ratio p(x)/p_alpha(x) by RuLSIF
|
||||
(Relative Unconstrained Least-Square Importance Fitting)
|
||||
|
||||
p_alpha(x) = alpha * p(x) + (1 - alpha) * q(x)
|
||||
|
||||
Arguments:
|
||||
x (numpy.matrix): Sample from p(x).
|
||||
y (numpy.matrix): Sample from q(x).
|
||||
alpha (float): Mixture parameter.
|
||||
sigma_range (list<float>): Search range of Gaussian kernel bandwidth.
|
||||
lambda_range (list<float>): Search range of regularization parameter.
|
||||
kernel_num (int): Number of kernels. (Default 100)
|
||||
verbose (bool): Indicator to print messages (Default True)
|
||||
|
||||
Returns:
|
||||
densratio.DensityRatio object which has `compute_density_ratio()`.
|
||||
"""
|
||||
|
||||
# Number of samples.
|
||||
nx = x.shape[0]
|
||||
ny = y.shape[0]
|
||||
|
||||
# Number of kernel functions.
|
||||
kernel_num = min(kernel_num, nx)
|
||||
|
||||
# Randomly take a subset of x, to identify centers for the kernels.
|
||||
centers = x[randint(nx, size=kernel_num)]
|
||||
|
||||
if verbose:
|
||||
print("RuLSIF starting...")
|
||||
|
||||
if len(sigma_range) == 1 and len(lambda_range) == 1:
|
||||
sigma = sigma_range[0]
|
||||
lambda_ = lambda_range[0]
|
||||
else:
|
||||
if verbose:
|
||||
print("Searching for the optimal sigma and lambda...")
|
||||
|
||||
# Grid-search cross-validation for optimal kernel and regularization parameters.
|
||||
opt_params = search_sigma_and_lambda(
|
||||
x, y, alpha, centers, sigma_range, lambda_range, verbose
|
||||
)
|
||||
sigma = opt_params["sigma"]
|
||||
lambda_ = opt_params["lambda"]
|
||||
|
||||
if verbose:
|
||||
print(
|
||||
"Found optimal sigma = {:.3f}, lambda = {:.3f}.".format(sigma, lambda_)
|
||||
)
|
||||
|
||||
if verbose:
|
||||
print("Optimizing theta...")
|
||||
|
||||
phi_x = compute_kernel_Gaussian(x, centers, sigma)
|
||||
phi_y = compute_kernel_Gaussian(y, centers, sigma)
|
||||
H = alpha * (phi_x.T.dot(phi_x) / nx) + (1 - alpha) * (phi_y.T.dot(phi_y) / ny)
|
||||
h = phi_x.mean(axis=0).T
|
||||
theta = asarray(solve(H + diag(array(lambda_).repeat(kernel_num)), h)).ravel()
|
||||
|
||||
# No negative coefficients.
|
||||
theta[theta < 0] = 0
|
||||
|
||||
# Compute the alpha-relative density ratio, at the given coordinates.
|
||||
def alpha_density_ratio(coordinates):
|
||||
# Evaluate the kernel at these coordinates, and take the dot-product with the weights.
|
||||
coordinates = to_ndarray(coordinates)
|
||||
phi_x = compute_kernel_Gaussian(coordinates, centers, sigma)
|
||||
alpha_density_ratio = phi_x @ theta
|
||||
|
||||
return alpha_density_ratio
|
||||
|
||||
# Compute the approximate alpha-relative PE-divergence, given samples x and y from the respective distributions.
|
||||
def alpha_PE_divergence(x, y):
|
||||
# This is Y, in Reference 1.
|
||||
x = to_ndarray(x)
|
||||
|
||||
# Obtain alpha-relative density ratio at these points.
|
||||
g_x = alpha_density_ratio(x)
|
||||
|
||||
# This is Y', in Reference 1.
|
||||
y = to_ndarray(y)
|
||||
|
||||
# Obtain alpha-relative density ratio at these points.
|
||||
g_y = alpha_density_ratio(y)
|
||||
|
||||
# Compute the alpha-relative PE-divergence as given in Reference 1.
|
||||
n = x.shape[0]
|
||||
divergence = (
|
||||
-alpha * (g_x @ g_x) / 2 - (1 - alpha) * (g_y @ g_y) / 2 + g_x.sum(axis=0)
|
||||
) / n - 1.0 / 2
|
||||
return divergence
|
||||
|
||||
# Compute the approximate alpha-relative KL-divergence, given samples x and y from the respective distributions.
|
||||
def alpha_KL_divergence(x, y):
|
||||
# This is Y, in Reference 1.
|
||||
x = to_ndarray(x)
|
||||
|
||||
# Obtain alpha-relative density ratio at these points.
|
||||
g_x = alpha_density_ratio(x)
|
||||
|
||||
# Compute the alpha-relative KL-divergence.
|
||||
n = x.shape[0]
|
||||
divergence = log(g_x).sum(axis=0) / n
|
||||
return divergence
|
||||
|
||||
alpha_PE = alpha_PE_divergence(x, y)
|
||||
alpha_KL = alpha_KL_divergence(x, y)
|
||||
|
||||
if verbose:
|
||||
print("Approximate alpha-relative PE-divergence = {:03.2f}".format(alpha_PE))
|
||||
print("Approximate alpha-relative KL-divergence = {:03.2f}".format(alpha_KL))
|
||||
|
||||
kernel_info = KernelInfo(
|
||||
kernel_type="Gaussian", kernel_num=kernel_num, sigma=sigma, centers=centers
|
||||
)
|
||||
result = DensityRatio(
|
||||
method="RuLSIF",
|
||||
alpha=alpha,
|
||||
theta=theta,
|
||||
lambda_=lambda_,
|
||||
alpha_PE=alpha_PE,
|
||||
alpha_KL=alpha_KL,
|
||||
kernel_info=kernel_info,
|
||||
compute_density_ratio=alpha_density_ratio,
|
||||
)
|
||||
|
||||
if verbose:
|
||||
print("RuLSIF completed.")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# Grid-search cross-validation for the optimal parameters sigma and lambda by leave-one-out cross-validation. See Reference 2.
|
||||
def search_sigma_and_lambda(x, y, alpha, centers, sigma_range, lambda_range, verbose):
|
||||
nx = x.shape[0]
|
||||
ny = y.shape[0]
|
||||
n_min = min(nx, ny)
|
||||
kernel_num = centers.shape[0]
|
||||
|
||||
score_new = inf
|
||||
sigma_new = 0
|
||||
lambda_new = 0
|
||||
|
||||
for sigma in sigma_range:
|
||||
phi_x = compute_kernel_Gaussian(x, centers, sigma) # (nx, kernel_num)
|
||||
phi_y = compute_kernel_Gaussian(y, centers, sigma) # (ny, kernel_num)
|
||||
H = alpha * (phi_x.T @ phi_x / nx) + (1 - alpha) * (
|
||||
phi_y.T @ phi_y / ny
|
||||
) # (kernel_num, kernel_num)
|
||||
h = phi_x.mean(axis=0).reshape(-1, 1) # (kernel_num, 1)
|
||||
phi_x = phi_x[:n_min].T # (kernel_num, n_min)
|
||||
phi_y = phi_y[:n_min].T # (kernel_num, n_min)
|
||||
|
||||
for lambda_ in lambda_range:
|
||||
B = H + diag(
|
||||
array(lambda_ * (ny - 1) / ny).repeat(kernel_num)
|
||||
) # (kernel_num, kernel_num)
|
||||
B_inv_X = solve(B, phi_y) # (kernel_num, n_min)
|
||||
X_B_inv_X = multiply(phi_y, B_inv_X) # (kernel_num, n_min)
|
||||
denom = ny * ones(n_min) - ones(kernel_num) @ X_B_inv_X # (n_min, )
|
||||
B0 = solve(B, h @ ones((1, n_min))) + B_inv_X @ diagflat(
|
||||
h.T @ B_inv_X / denom
|
||||
) # (kernel_num, n_min)
|
||||
B1 = solve(B, phi_x) + B_inv_X @ diagflat(
|
||||
ones(kernel_num) @ multiply(phi_x, B_inv_X)
|
||||
) # (kernel_num, n_min)
|
||||
B2 = (ny - 1) * (nx * B0 - B1) / (ny * (nx - 1)) # (kernel_num, n_min)
|
||||
B2[B2 < 0] = 0
|
||||
r_y = multiply(phi_y, B2).sum(axis=0).T # (n_min, )
|
||||
r_x = multiply(phi_x, B2).sum(axis=0).T # (n_min, )
|
||||
|
||||
# Squared loss of RuLSIF, without regularization term.
|
||||
# Directly related to the negative of the PE-divergence.
|
||||
score = (r_y @ r_y / 2 - r_x.sum(axis=0)) / n_min
|
||||
|
||||
if verbose:
|
||||
print(
|
||||
"sigma = %.5f, lambda = %.5f, score = %.5f"
|
||||
% (sigma, lambda_, score)
|
||||
)
|
||||
|
||||
if score < score_new:
|
||||
score_new = score
|
||||
sigma_new = sigma
|
||||
lambda_new = lambda_
|
||||
|
||||
return {"sigma": sigma_new, "lambda": lambda_new}
|
||||
|
||||
|
||||
def _compute_kernel_Gaussian(x_list, y_row, neg_gamma, res) -> None:
|
||||
sq_norm = sum(power(x_list - y_row, 2), 1)
|
||||
multiply(neg_gamma, sq_norm, res)
|
||||
exp(res, res)
|
||||
|
||||
|
||||
def _target_numpy_wrapper(x_list, y_list, neg_gamma):
|
||||
res = empty((y_list.shape[0], x_list.shape[0]), np_float)
|
||||
if isinstance(x_list, matrix) or isinstance(y_list, matrix):
|
||||
res = asmatrix(res)
|
||||
|
||||
for j, y_row in enumerate(y_list):
|
||||
# `.T` aligns shapes for matrices, does nothing for 1D ndarray.
|
||||
_compute_kernel_Gaussian(x_list, y_row, neg_gamma, res[j].T)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
_compute_functions = {"numpy": _target_numpy_wrapper}
|
||||
if guvectorize_compute:
|
||||
_compute_functions.update(
|
||||
{
|
||||
key: guvectorize_compute(key)(_compute_kernel_Gaussian)
|
||||
for key in ("cpu", "parallel")
|
||||
}
|
||||
)
|
||||
|
||||
_compute_function = _compute_functions[
|
||||
"cpu" if "cpu" in _compute_functions else "numpy"
|
||||
]
|
||||
|
||||
|
||||
# Returns a 2D numpy matrix of kernel evaluated at the gridpoints with coordinates from x_list and y_list.
|
||||
def compute_kernel_Gaussian(x_list, y_list, sigma):
|
||||
return _compute_function(x_list, y_list, -0.5 * sigma**-2).T
|
||||
|
||||
|
||||
def set_compute_kernel_target(target: str) -> None:
|
||||
global _compute_function
|
||||
if target not in ("numpy", "cpu", "parallel"):
|
||||
raise ValueError(
|
||||
"'target' must be one of the following: 'numpy', 'cpu', or 'parallel'."
|
||||
)
|
||||
|
||||
if target not in _compute_functions:
|
||||
warn("'numba' not available; defaulting to 'numpy'.", ImportWarning)
|
||||
target = "numpy"
|
||||
|
||||
_compute_function = _compute_functions[target]
|
||||
"""
|
||||
Relative Unconstrained Least-Squares Fitting (RuLSIF): A Python Implementation
|
||||
References:
|
||||
'Change-point detection in time-series data by relative density-ratio estimation'
|
||||
Song Liu, Makoto Yamada, Nigel Collier and Masashi Sugiyama,
|
||||
Neural Networks 43 (2013) 72-83.
|
||||
|
||||
'A Least-squares Approach to Direct Importance Estimation'
|
||||
Takafumi Kanamori, Shohei Hido, and Masashi Sugiyama,
|
||||
Journal of Machine Learning Research 10 (2009) 1391-1445.
|
||||
"""
|
||||
|
||||
from warnings import warn
|
||||
|
||||
from numpy import (
|
||||
array,
|
||||
asarray,
|
||||
asmatrix,
|
||||
diag,
|
||||
diagflat,
|
||||
empty,
|
||||
exp,
|
||||
inf,
|
||||
log,
|
||||
matrix,
|
||||
multiply,
|
||||
ones,
|
||||
power,
|
||||
sum,
|
||||
)
|
||||
from numpy.linalg import solve
|
||||
from numpy.random import randint
|
||||
|
||||
from .density_ratio import DensityRatio, KernelInfo
|
||||
from .helpers import guvectorize_compute, np_float, to_ndarray
|
||||
|
||||
|
||||
def RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num=100, verbose=True):
|
||||
"""
|
||||
Estimation of the alpha-Relative Density Ratio p(x)/p_alpha(x) by RuLSIF
|
||||
(Relative Unconstrained Least-Square Importance Fitting)
|
||||
|
||||
p_alpha(x) = alpha * p(x) + (1 - alpha) * q(x)
|
||||
|
||||
Arguments:
|
||||
x (numpy.matrix): Sample from p(x).
|
||||
y (numpy.matrix): Sample from q(x).
|
||||
alpha (float): Mixture parameter.
|
||||
sigma_range (list<float>): Search range of Gaussian kernel bandwidth.
|
||||
lambda_range (list<float>): Search range of regularization parameter.
|
||||
kernel_num (int): Number of kernels. (Default 100)
|
||||
verbose (bool): Indicator to print messages (Default True)
|
||||
|
||||
Returns:
|
||||
densratio.DensityRatio object which has `compute_density_ratio()`.
|
||||
"""
|
||||
|
||||
# Number of samples.
|
||||
nx = x.shape[0]
|
||||
ny = y.shape[0]
|
||||
|
||||
# Number of kernel functions.
|
||||
kernel_num = min(kernel_num, nx)
|
||||
|
||||
# Randomly take a subset of x, to identify centers for the kernels.
|
||||
centers = x[randint(nx, size=kernel_num)]
|
||||
|
||||
if verbose:
|
||||
print("RuLSIF starting...")
|
||||
|
||||
if len(sigma_range) == 1 and len(lambda_range) == 1:
|
||||
sigma = sigma_range[0]
|
||||
lambda_ = lambda_range[0]
|
||||
else:
|
||||
if verbose:
|
||||
print("Searching for the optimal sigma and lambda...")
|
||||
|
||||
# Grid-search cross-validation for optimal kernel and regularization parameters.
|
||||
opt_params = search_sigma_and_lambda(
|
||||
x, y, alpha, centers, sigma_range, lambda_range, verbose
|
||||
)
|
||||
sigma = opt_params["sigma"]
|
||||
lambda_ = opt_params["lambda"]
|
||||
|
||||
if verbose:
|
||||
print(
|
||||
"Found optimal sigma = {:.3f}, lambda = {:.3f}.".format(sigma, lambda_)
|
||||
)
|
||||
|
||||
if verbose:
|
||||
print("Optimizing theta...")
|
||||
|
||||
phi_x = compute_kernel_Gaussian(x, centers, sigma)
|
||||
phi_y = compute_kernel_Gaussian(y, centers, sigma)
|
||||
H = alpha * (phi_x.T.dot(phi_x) / nx) + (1 - alpha) * (phi_y.T.dot(phi_y) / ny)
|
||||
h = phi_x.mean(axis=0).T
|
||||
theta = asarray(solve(H + diag(array(lambda_).repeat(kernel_num)), h)).ravel()
|
||||
|
||||
# No negative coefficients.
|
||||
theta[theta < 0] = 0
|
||||
|
||||
# Compute the alpha-relative density ratio, at the given coordinates.
|
||||
def alpha_density_ratio(coordinates):
|
||||
# Evaluate the kernel at these coordinates, and take the dot-product with the weights.
|
||||
coordinates = to_ndarray(coordinates)
|
||||
phi_x = compute_kernel_Gaussian(coordinates, centers, sigma)
|
||||
alpha_density_ratio = phi_x @ theta
|
||||
|
||||
return alpha_density_ratio
|
||||
|
||||
# Compute the approximate alpha-relative PE-divergence, given samples x and y from the respective distributions.
|
||||
def alpha_PE_divergence(x, y):
|
||||
# This is Y, in Reference 1.
|
||||
x = to_ndarray(x)
|
||||
|
||||
# Obtain alpha-relative density ratio at these points.
|
||||
g_x = alpha_density_ratio(x)
|
||||
|
||||
# This is Y', in Reference 1.
|
||||
y = to_ndarray(y)
|
||||
|
||||
# Obtain alpha-relative density ratio at these points.
|
||||
g_y = alpha_density_ratio(y)
|
||||
|
||||
# Compute the alpha-relative PE-divergence as given in Reference 1.
|
||||
n = x.shape[0]
|
||||
divergence = (
|
||||
-alpha * (g_x @ g_x) / 2 - (1 - alpha) * (g_y @ g_y) / 2 + g_x.sum(axis=0)
|
||||
) / n - 1.0 / 2
|
||||
return divergence
|
||||
|
||||
# Compute the approximate alpha-relative KL-divergence, given samples x and y from the respective distributions.
|
||||
def alpha_KL_divergence(x, y):
|
||||
# This is Y, in Reference 1.
|
||||
x = to_ndarray(x)
|
||||
|
||||
# Obtain alpha-relative density ratio at these points.
|
||||
g_x = alpha_density_ratio(x)
|
||||
|
||||
# Compute the alpha-relative KL-divergence.
|
||||
n = x.shape[0]
|
||||
divergence = log(g_x).sum(axis=0) / n
|
||||
return divergence
|
||||
|
||||
alpha_PE = alpha_PE_divergence(x, y)
|
||||
alpha_KL = alpha_KL_divergence(x, y)
|
||||
|
||||
if verbose:
|
||||
print("Approximate alpha-relative PE-divergence = {:03.2f}".format(alpha_PE))
|
||||
print("Approximate alpha-relative KL-divergence = {:03.2f}".format(alpha_KL))
|
||||
|
||||
kernel_info = KernelInfo(
|
||||
kernel_type="Gaussian", kernel_num=kernel_num, sigma=sigma, centers=centers
|
||||
)
|
||||
result = DensityRatio(
|
||||
method="RuLSIF",
|
||||
alpha=alpha,
|
||||
theta=theta,
|
||||
lambda_=lambda_,
|
||||
alpha_PE=alpha_PE,
|
||||
alpha_KL=alpha_KL,
|
||||
kernel_info=kernel_info,
|
||||
compute_density_ratio=alpha_density_ratio,
|
||||
)
|
||||
|
||||
if verbose:
|
||||
print("RuLSIF completed.")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# Grid-search cross-validation for the optimal parameters sigma and lambda by leave-one-out cross-validation. See Reference 2.
|
||||
def search_sigma_and_lambda(x, y, alpha, centers, sigma_range, lambda_range, verbose):
|
||||
nx = x.shape[0]
|
||||
ny = y.shape[0]
|
||||
n_min = min(nx, ny)
|
||||
kernel_num = centers.shape[0]
|
||||
|
||||
score_new = inf
|
||||
sigma_new = 0
|
||||
lambda_new = 0
|
||||
|
||||
for sigma in sigma_range:
|
||||
phi_x = compute_kernel_Gaussian(x, centers, sigma) # (nx, kernel_num)
|
||||
phi_y = compute_kernel_Gaussian(y, centers, sigma) # (ny, kernel_num)
|
||||
H = alpha * (phi_x.T @ phi_x / nx) + (1 - alpha) * (
|
||||
phi_y.T @ phi_y / ny
|
||||
) # (kernel_num, kernel_num)
|
||||
h = phi_x.mean(axis=0).reshape(-1, 1) # (kernel_num, 1)
|
||||
phi_x = phi_x[:n_min].T # (kernel_num, n_min)
|
||||
phi_y = phi_y[:n_min].T # (kernel_num, n_min)
|
||||
|
||||
for lambda_ in lambda_range:
|
||||
B = H + diag(
|
||||
array(lambda_ * (ny - 1) / ny).repeat(kernel_num)
|
||||
) # (kernel_num, kernel_num)
|
||||
B_inv_X = solve(B, phi_y) # (kernel_num, n_min)
|
||||
X_B_inv_X = multiply(phi_y, B_inv_X) # (kernel_num, n_min)
|
||||
denom = ny * ones(n_min) - ones(kernel_num) @ X_B_inv_X # (n_min, )
|
||||
B0 = solve(B, h @ ones((1, n_min))) + B_inv_X @ diagflat(
|
||||
h.T @ B_inv_X / denom
|
||||
) # (kernel_num, n_min)
|
||||
B1 = solve(B, phi_x) + B_inv_X @ diagflat(
|
||||
ones(kernel_num) @ multiply(phi_x, B_inv_X)
|
||||
) # (kernel_num, n_min)
|
||||
B2 = (ny - 1) * (nx * B0 - B1) / (ny * (nx - 1)) # (kernel_num, n_min)
|
||||
B2[B2 < 0] = 0
|
||||
r_y = multiply(phi_y, B2).sum(axis=0).T # (n_min, )
|
||||
r_x = multiply(phi_x, B2).sum(axis=0).T # (n_min, )
|
||||
|
||||
# Squared loss of RuLSIF, without regularization term.
|
||||
# Directly related to the negative of the PE-divergence.
|
||||
score = (r_y @ r_y / 2 - r_x.sum(axis=0)) / n_min
|
||||
|
||||
if verbose:
|
||||
print(
|
||||
"sigma = %.5f, lambda = %.5f, score = %.5f"
|
||||
% (sigma, lambda_, score)
|
||||
)
|
||||
|
||||
if score < score_new:
|
||||
score_new = score
|
||||
sigma_new = sigma
|
||||
lambda_new = lambda_
|
||||
|
||||
return {"sigma": sigma_new, "lambda": lambda_new}
|
||||
|
||||
|
||||
def _compute_kernel_Gaussian(x_list, y_row, neg_gamma, res) -> None:
|
||||
sq_norm = sum(power(x_list - y_row, 2), 1)
|
||||
multiply(neg_gamma, sq_norm, res)
|
||||
exp(res, res)
|
||||
|
||||
|
||||
def _target_numpy_wrapper(x_list, y_list, neg_gamma):
|
||||
res = empty((y_list.shape[0], x_list.shape[0]), np_float)
|
||||
if isinstance(x_list, matrix) or isinstance(y_list, matrix):
|
||||
res = asmatrix(res)
|
||||
|
||||
for j, y_row in enumerate(y_list):
|
||||
# `.T` aligns shapes for matrices, does nothing for 1D ndarray.
|
||||
_compute_kernel_Gaussian(x_list, y_row, neg_gamma, res[j].T)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
_compute_functions = {"numpy": _target_numpy_wrapper}
|
||||
if guvectorize_compute:
|
||||
_compute_functions.update(
|
||||
{
|
||||
key: guvectorize_compute(key)(_compute_kernel_Gaussian)
|
||||
for key in ("cpu", "parallel")
|
||||
}
|
||||
)
|
||||
|
||||
_compute_function = _compute_functions[
|
||||
"cpu" if "cpu" in _compute_functions else "numpy"
|
||||
]
|
||||
|
||||
|
||||
# Returns a 2D numpy matrix of kernel evaluated at the gridpoints with coordinates from x_list and y_list.
|
||||
def compute_kernel_Gaussian(x_list, y_list, sigma):
|
||||
return _compute_function(x_list, y_list, -0.5 * sigma**-2).T
|
||||
|
||||
|
||||
def set_compute_kernel_target(target: str) -> None:
|
||||
global _compute_function
|
||||
if target not in ("numpy", "cpu", "parallel"):
|
||||
raise ValueError(
|
||||
"'target' must be one of the following: 'numpy', 'cpu', or 'parallel'."
|
||||
)
|
||||
|
||||
if target not in _compute_functions:
|
||||
warn("'numba' not available; defaulting to 'numpy'.", ImportWarning)
|
||||
target = "numpy"
|
||||
|
||||
_compute_function = _compute_functions[target]
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from warnings import filterwarnings
|
||||
|
||||
from .core import densratio
|
||||
from .RuLSIF import set_compute_kernel_target
|
||||
|
||||
filterwarnings("default", message="'numba'", category=ImportWarning, module="densratio")
|
||||
__all__ = ["densratio", "set_compute_kernel_target"]
|
||||
from warnings import filterwarnings
|
||||
|
||||
from .core import densratio
|
||||
from .RuLSIF import set_compute_kernel_target
|
||||
|
||||
filterwarnings("default", message="'numba'", category=ImportWarning, module="densratio")
|
||||
__all__ = ["densratio", "set_compute_kernel_target"]
|
||||
|
|
|
@ -1,70 +1,70 @@
|
|||
"""
|
||||
densratio.core
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
Estimate Density Ratio p(x)/q(y)
|
||||
"""
|
||||
|
||||
from numpy import linspace
|
||||
|
||||
from .helpers import to_ndarray
|
||||
from .RuLSIF import RuLSIF
|
||||
|
||||
|
||||
def densratio(
|
||||
x, y, alpha=0, sigma_range="auto", lambda_range="auto", kernel_num=100, verbose=True
|
||||
):
|
||||
"""Estimate alpha-mixture Density Ratio p(x)/(alpha*p(x) + (1 - alpha)*q(x))
|
||||
|
||||
Arguments:
|
||||
x: sample from p(x).
|
||||
y: sample from q(x).
|
||||
alpha: Default 0 - corresponds to ordinary density ratio.
|
||||
sigma_range: search range of Gaussian kernel bandwidth.
|
||||
Default "auto" means 10^-3, 10^-2, ..., 10^9.
|
||||
lambda_range: search range of regularization parameter for uLSIF.
|
||||
Default "auto" means 10^-3, 10^-2, ..., 10^9.
|
||||
kernel_num: number of kernels. Default 100.
|
||||
verbose: indicator to print messages. Default True.
|
||||
|
||||
Returns:
|
||||
densratio.DensityRatio object which has `compute_density_ratio()`.
|
||||
|
||||
Raises:
|
||||
ValueError: if dimension of x != dimension of y
|
||||
|
||||
Usage::
|
||||
>>> from scipy.stats import norm
|
||||
>>> from densratio import densratio
|
||||
|
||||
>>> x = norm.rvs(size=200, loc=1, scale=1./8)
|
||||
>>> y = norm.rvs(size=200, loc=1, scale=1./2)
|
||||
>>> result = densratio(x, y, alpha=0.7)
|
||||
>>> print(result)
|
||||
|
||||
>>> density_ratio = result.compute_density_ratio(y)
|
||||
>>> print(density_ratio)
|
||||
"""
|
||||
|
||||
x = to_ndarray(x)
|
||||
y = to_ndarray(y)
|
||||
|
||||
if x.shape[1] != y.shape[1]:
|
||||
raise ValueError("x and y must be same dimensions.")
|
||||
|
||||
if isinstance(sigma_range, str) and sigma_range != "auto":
|
||||
raise TypeError("Invalid value for sigma_range.")
|
||||
|
||||
if isinstance(lambda_range, str) and lambda_range != "auto":
|
||||
raise TypeError("Invalid value for lambda_range.")
|
||||
|
||||
if sigma_range is None or (isinstance(sigma_range, str) and sigma_range == "auto"):
|
||||
sigma_range = 10 ** linspace(-3, 9, 13)
|
||||
|
||||
if lambda_range is None or (
|
||||
isinstance(lambda_range, str) and lambda_range == "auto"
|
||||
):
|
||||
lambda_range = 10 ** linspace(-3, 9, 13)
|
||||
|
||||
result = RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num, verbose)
|
||||
return result
|
||||
"""
|
||||
densratio.core
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
Estimate Density Ratio p(x)/q(y)
|
||||
"""
|
||||
|
||||
from numpy import linspace
|
||||
|
||||
from .helpers import to_ndarray
|
||||
from .RuLSIF import RuLSIF
|
||||
|
||||
|
||||
def densratio(
|
||||
x, y, alpha=0, sigma_range="auto", lambda_range="auto", kernel_num=100, verbose=True
|
||||
):
|
||||
"""Estimate alpha-mixture Density Ratio p(x)/(alpha*p(x) + (1 - alpha)*q(x))
|
||||
|
||||
Arguments:
|
||||
x: sample from p(x).
|
||||
y: sample from q(x).
|
||||
alpha: Default 0 - corresponds to ordinary density ratio.
|
||||
sigma_range: search range of Gaussian kernel bandwidth.
|
||||
Default "auto" means 10^-3, 10^-2, ..., 10^9.
|
||||
lambda_range: search range of regularization parameter for uLSIF.
|
||||
Default "auto" means 10^-3, 10^-2, ..., 10^9.
|
||||
kernel_num: number of kernels. Default 100.
|
||||
verbose: indicator to print messages. Default True.
|
||||
|
||||
Returns:
|
||||
densratio.DensityRatio object which has `compute_density_ratio()`.
|
||||
|
||||
Raises:
|
||||
ValueError: if dimension of x != dimension of y
|
||||
|
||||
Usage::
|
||||
>>> from scipy.stats import norm
|
||||
>>> from densratio import densratio
|
||||
|
||||
>>> x = norm.rvs(size=200, loc=1, scale=1./8)
|
||||
>>> y = norm.rvs(size=200, loc=1, scale=1./2)
|
||||
>>> result = densratio(x, y, alpha=0.7)
|
||||
>>> print(result)
|
||||
|
||||
>>> density_ratio = result.compute_density_ratio(y)
|
||||
>>> print(density_ratio)
|
||||
"""
|
||||
|
||||
x = to_ndarray(x)
|
||||
y = to_ndarray(y)
|
||||
|
||||
if x.shape[1] != y.shape[1]:
|
||||
raise ValueError("x and y must be same dimensions.")
|
||||
|
||||
if isinstance(sigma_range, str) and sigma_range != "auto":
|
||||
raise TypeError("Invalid value for sigma_range.")
|
||||
|
||||
if isinstance(lambda_range, str) and lambda_range != "auto":
|
||||
raise TypeError("Invalid value for lambda_range.")
|
||||
|
||||
if sigma_range is None or (isinstance(sigma_range, str) and sigma_range == "auto"):
|
||||
sigma_range = 10 ** linspace(-3, 9, 13)
|
||||
|
||||
if lambda_range is None or (
|
||||
isinstance(lambda_range, str) and lambda_range == "auto"
|
||||
):
|
||||
lambda_range = 10 ** linspace(-3, 9, 13)
|
||||
|
||||
result = RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num, verbose)
|
||||
return result
|
||||
|
|
|
@ -1,88 +1,88 @@
|
|||
from pprint import pformat
|
||||
from re import sub
|
||||
|
||||
|
||||
class DensityRatio:
|
||||
"""Density Ratio."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
method,
|
||||
alpha,
|
||||
theta,
|
||||
lambda_,
|
||||
alpha_PE,
|
||||
alpha_KL,
|
||||
kernel_info,
|
||||
compute_density_ratio,
|
||||
):
|
||||
self.method = method
|
||||
self.alpha = alpha
|
||||
self.theta = theta
|
||||
self.lambda_ = lambda_
|
||||
self.alpha_PE = alpha_PE
|
||||
self.alpha_KL = alpha_KL
|
||||
self.kernel_info = kernel_info
|
||||
self.compute_density_ratio = compute_density_ratio
|
||||
|
||||
def __str__(self):
|
||||
return """
|
||||
Method: %(method)s
|
||||
|
||||
Alpha: %(alpha)s
|
||||
|
||||
Kernel Information:
|
||||
%(kernel_info)s
|
||||
|
||||
Kernel Weights (theta):
|
||||
%(theta)s
|
||||
|
||||
Regularization Parameter (lambda): %(lambda_)s
|
||||
|
||||
Alpha-Relative PE-Divergence: %(alpha_PE)s
|
||||
|
||||
Alpha-Relative KL-Divergence: %(alpha_KL)s
|
||||
|
||||
Function to Estimate Density Ratio:
|
||||
compute_density_ratio(x)
|
||||
|
||||
"""[
|
||||
1:-1
|
||||
] % dict(
|
||||
method=self.method,
|
||||
kernel_info=self.kernel_info,
|
||||
alpha=self.alpha,
|
||||
theta=my_format(self.theta),
|
||||
lambda_=self.lambda_,
|
||||
alpha_PE=self.alpha_PE,
|
||||
alpha_KL=self.alpha_KL,
|
||||
)
|
||||
|
||||
|
||||
class KernelInfo:
|
||||
"""Kernel Information."""
|
||||
|
||||
def __init__(self, kernel_type, kernel_num, sigma, centers):
|
||||
self.kernel_type = kernel_type
|
||||
self.kernel_num = kernel_num
|
||||
self.sigma = sigma
|
||||
self.centers = centers
|
||||
|
||||
def __str__(self):
|
||||
return """
|
||||
Kernel type: %(kernel_type)s
|
||||
Number of kernels: %(kernel_num)s
|
||||
Bandwidth(sigma): %(sigma)s
|
||||
Centers: %(centers)s
|
||||
"""[
|
||||
1:-1
|
||||
] % dict(
|
||||
kernel_type=self.kernel_type,
|
||||
kernel_num=self.kernel_num,
|
||||
sigma=self.sigma,
|
||||
centers=my_format(self.centers),
|
||||
)
|
||||
|
||||
|
||||
def my_format(str):
|
||||
return sub(r"\s+", " ", (pformat(str).split("\n")[0] + ".."))
|
||||
from pprint import pformat
|
||||
from re import sub
|
||||
|
||||
|
||||
class DensityRatio:
|
||||
"""Density Ratio."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
method,
|
||||
alpha,
|
||||
theta,
|
||||
lambda_,
|
||||
alpha_PE,
|
||||
alpha_KL,
|
||||
kernel_info,
|
||||
compute_density_ratio,
|
||||
):
|
||||
self.method = method
|
||||
self.alpha = alpha
|
||||
self.theta = theta
|
||||
self.lambda_ = lambda_
|
||||
self.alpha_PE = alpha_PE
|
||||
self.alpha_KL = alpha_KL
|
||||
self.kernel_info = kernel_info
|
||||
self.compute_density_ratio = compute_density_ratio
|
||||
|
||||
def __str__(self):
|
||||
return """
|
||||
Method: %(method)s
|
||||
|
||||
Alpha: %(alpha)s
|
||||
|
||||
Kernel Information:
|
||||
%(kernel_info)s
|
||||
|
||||
Kernel Weights (theta):
|
||||
%(theta)s
|
||||
|
||||
Regularization Parameter (lambda): %(lambda_)s
|
||||
|
||||
Alpha-Relative PE-Divergence: %(alpha_PE)s
|
||||
|
||||
Alpha-Relative KL-Divergence: %(alpha_KL)s
|
||||
|
||||
Function to Estimate Density Ratio:
|
||||
compute_density_ratio(x)
|
||||
|
||||
"""[
|
||||
1:-1
|
||||
] % dict(
|
||||
method=self.method,
|
||||
kernel_info=self.kernel_info,
|
||||
alpha=self.alpha,
|
||||
theta=my_format(self.theta),
|
||||
lambda_=self.lambda_,
|
||||
alpha_PE=self.alpha_PE,
|
||||
alpha_KL=self.alpha_KL,
|
||||
)
|
||||
|
||||
|
||||
class KernelInfo:
|
||||
"""Kernel Information."""
|
||||
|
||||
def __init__(self, kernel_type, kernel_num, sigma, centers):
|
||||
self.kernel_type = kernel_type
|
||||
self.kernel_num = kernel_num
|
||||
self.sigma = sigma
|
||||
self.centers = centers
|
||||
|
||||
def __str__(self):
|
||||
return """
|
||||
Kernel type: %(kernel_type)s
|
||||
Number of kernels: %(kernel_num)s
|
||||
Bandwidth(sigma): %(sigma)s
|
||||
Centers: %(centers)s
|
||||
"""[
|
||||
1:-1
|
||||
] % dict(
|
||||
kernel_type=self.kernel_type,
|
||||
kernel_num=self.kernel_num,
|
||||
sigma=self.sigma,
|
||||
centers=my_format(self.centers),
|
||||
)
|
||||
|
||||
|
||||
def my_format(str):
|
||||
return sub(r"\s+", " ", (pformat(str).split("\n")[0] + ".."))
|
||||
|
|
|
@ -1,36 +1,36 @@
|
|||
from numpy import array, ndarray, result_type
|
||||
|
||||
np_float = result_type(float)
|
||||
try:
|
||||
import numba as nb
|
||||
except ModuleNotFoundError:
|
||||
guvectorize_compute = None
|
||||
else:
|
||||
_nb_float = nb.from_dtype(np_float)
|
||||
|
||||
def guvectorize_compute(target: str, *, cache: bool = True):
|
||||
return nb.guvectorize(
|
||||
[nb.void(_nb_float[:, :], _nb_float[:], _nb_float, _nb_float[:])],
|
||||
"(m, p),(p),()->(m)",
|
||||
nopython=True,
|
||||
target=target,
|
||||
cache=cache,
|
||||
)
|
||||
|
||||
|
||||
def is_numeric(x):
|
||||
return isinstance(x, int) or isinstance(x, float)
|
||||
|
||||
|
||||
def to_ndarray(x):
|
||||
if isinstance(x, ndarray):
|
||||
if len(x.shape) == 1:
|
||||
return x.reshape(-1, 1)
|
||||
else:
|
||||
return x
|
||||
elif str(type(x)) == "<class 'pandas.core.frame.DataFrame'>":
|
||||
return x.values
|
||||
elif not x:
|
||||
raise ValueError("Cannot transform to numpy.matrix.")
|
||||
else:
|
||||
return to_ndarray(array(x))
|
||||
from numpy import array, ndarray, result_type
|
||||
|
||||
np_float = result_type(float)
|
||||
try:
|
||||
import numba as nb
|
||||
except ModuleNotFoundError:
|
||||
guvectorize_compute = None
|
||||
else:
|
||||
_nb_float = nb.from_dtype(np_float)
|
||||
|
||||
def guvectorize_compute(target: str, *, cache: bool = True):
|
||||
return nb.guvectorize(
|
||||
[nb.void(_nb_float[:, :], _nb_float[:], _nb_float, _nb_float[:])],
|
||||
"(m, p),(p),()->(m)",
|
||||
nopython=True,
|
||||
target=target,
|
||||
cache=cache,
|
||||
)
|
||||
|
||||
|
||||
def is_numeric(x):
|
||||
return isinstance(x, int) or isinstance(x, float)
|
||||
|
||||
|
||||
def to_ndarray(x):
|
||||
if isinstance(x, ndarray):
|
||||
if len(x.shape) == 1:
|
||||
return x.reshape(-1, 1)
|
||||
else:
|
||||
return x
|
||||
elif str(type(x)) == "<class 'pandas.core.frame.DataFrame'>":
|
||||
return x.values
|
||||
elif not x:
|
||||
raise ValueError("Cannot transform to numpy.matrix.")
|
||||
else:
|
||||
return to_ndarray(array(x))
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import numpy as np
|
||||
|
||||
def get_doc(probs1, probs2):
|
||||
import numpy as np
|
||||
|
||||
def get_doc(probs1, probs2):
|
||||
return np.mean(probs2) - np.mean(probs1)
|
|
@ -1,66 +1,66 @@
|
|||
import numpy as np
|
||||
from scipy.sparse import issparse, vstack
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.neighbors import KernelDensity
|
||||
|
||||
from baselines import densratio
|
||||
from baselines.pykliep import DensityRatioEstimator
|
||||
|
||||
|
||||
def kliep(Xtr, ytr, Xte):
|
||||
kliep = DensityRatioEstimator()
|
||||
kliep.fit(Xtr, Xte)
|
||||
return kliep.predict(Xtr)
|
||||
|
||||
|
||||
def usilf(Xtr, ytr, Xte, alpha=0.0):
|
||||
dense_ratio_obj = densratio(Xtr, Xte, alpha=alpha, verbose=False)
|
||||
return dense_ratio_obj.compute_density_ratio(Xtr)
|
||||
|
||||
|
||||
def logreg(Xtr, ytr, Xte):
|
||||
# check "Direct Density Ratio Estimation for
|
||||
# Large-scale Covariate Shift Adaptation", Eq.28
|
||||
|
||||
if issparse(Xtr):
|
||||
X = vstack([Xtr, Xte])
|
||||
else:
|
||||
X = np.concatenate([Xtr, Xte])
|
||||
|
||||
y = [0] * Xtr.shape[0] + [1] * Xte.shape[0]
|
||||
|
||||
logreg = GridSearchCV(
|
||||
LogisticRegression(),
|
||||
param_grid={"C": np.logspace(-3, 3, 7), "class_weight": ["balanced", None]},
|
||||
n_jobs=-1,
|
||||
)
|
||||
logreg.fit(X, y)
|
||||
probs = logreg.predict_proba(Xtr)
|
||||
prob_train, prob_test = probs[:, 0], probs[:, 1]
|
||||
prior_train = Xtr.shape[0]
|
||||
prior_test = Xte.shape[0]
|
||||
w = (prior_train / prior_test) * (prob_test / prob_train)
|
||||
return w
|
||||
|
||||
|
||||
kdex2_params = {"bandwidth": np.logspace(-1, 1, 20)}
|
||||
|
||||
|
||||
def kdex2_lltr(Xtr):
|
||||
if issparse(Xtr):
|
||||
Xtr = Xtr.toarray()
|
||||
return GridSearchCV(KernelDensity(), kdex2_params).fit(Xtr).score_samples(Xtr)
|
||||
|
||||
|
||||
def kdex2_weights(Xtr, Xte, log_likelihood_tr):
|
||||
log_likelihood_te = (
|
||||
GridSearchCV(KernelDensity(), kdex2_params).fit(Xte).score_samples(Xtr)
|
||||
)
|
||||
likelihood_tr = np.exp(log_likelihood_tr)
|
||||
likelihood_te = np.exp(log_likelihood_te)
|
||||
return likelihood_te / likelihood_tr
|
||||
|
||||
|
||||
def get_acc(tr_preds, ytr, w):
|
||||
return np.sum((1.0 * (tr_preds == ytr)) * w) / np.sum(w)
|
||||
import numpy as np
|
||||
from scipy.sparse import issparse, vstack
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.neighbors import KernelDensity
|
||||
|
||||
from baselines import densratio
|
||||
from baselines.pykliep import DensityRatioEstimator
|
||||
|
||||
|
||||
def kliep(Xtr, ytr, Xte):
|
||||
kliep = DensityRatioEstimator()
|
||||
kliep.fit(Xtr, Xte)
|
||||
return kliep.predict(Xtr)
|
||||
|
||||
|
||||
def usilf(Xtr, ytr, Xte, alpha=0.0):
|
||||
dense_ratio_obj = densratio(Xtr, Xte, alpha=alpha, verbose=False)
|
||||
return dense_ratio_obj.compute_density_ratio(Xtr)
|
||||
|
||||
|
||||
def logreg(Xtr, ytr, Xte):
|
||||
# check "Direct Density Ratio Estimation for
|
||||
# Large-scale Covariate Shift Adaptation", Eq.28
|
||||
|
||||
if issparse(Xtr):
|
||||
X = vstack([Xtr, Xte])
|
||||
else:
|
||||
X = np.concatenate([Xtr, Xte])
|
||||
|
||||
y = [0] * Xtr.shape[0] + [1] * Xte.shape[0]
|
||||
|
||||
logreg = GridSearchCV(
|
||||
LogisticRegression(),
|
||||
param_grid={"C": np.logspace(-3, 3, 7), "class_weight": ["balanced", None]},
|
||||
n_jobs=-1,
|
||||
)
|
||||
logreg.fit(X, y)
|
||||
probs = logreg.predict_proba(Xtr)
|
||||
prob_train, prob_test = probs[:, 0], probs[:, 1]
|
||||
prior_train = Xtr.shape[0]
|
||||
prior_test = Xte.shape[0]
|
||||
w = (prior_train / prior_test) * (prob_test / prob_train)
|
||||
return w
|
||||
|
||||
|
||||
kdex2_params = {"bandwidth": np.logspace(-1, 1, 20)}
|
||||
|
||||
|
||||
def kdex2_lltr(Xtr):
|
||||
if issparse(Xtr):
|
||||
Xtr = Xtr.toarray()
|
||||
return GridSearchCV(KernelDensity(), kdex2_params).fit(Xtr).score_samples(Xtr)
|
||||
|
||||
|
||||
def kdex2_weights(Xtr, Xte, log_likelihood_tr):
|
||||
log_likelihood_te = (
|
||||
GridSearchCV(KernelDensity(), kdex2_params).fit(Xte).score_samples(Xtr)
|
||||
)
|
||||
likelihood_tr = np.exp(log_likelihood_tr)
|
||||
likelihood_te = np.exp(log_likelihood_te)
|
||||
return likelihood_te / likelihood_tr
|
||||
|
||||
|
||||
def get_acc(tr_preds, ytr, w):
|
||||
return np.sum((1.0 * (tr_preds == ytr)) * w) / np.sum(w)
|
||||
|
|
|
@ -1,140 +1,140 @@
|
|||
# import itertools
|
||||
# from typing import Iterable
|
||||
|
||||
# import quapy as qp
|
||||
# import quapy.functional as F
|
||||
# from densratio import densratio
|
||||
# from quapy.method.aggregative import *
|
||||
# from quapy.protocol import (
|
||||
# AbstractStochasticSeededProtocol,
|
||||
# OnLabelledCollectionProtocol,
|
||||
# )
|
||||
# from scipy.sparse import issparse, vstack
|
||||
# from scipy.spatial.distance import cdist
|
||||
# from scipy.stats import multivariate_normal
|
||||
# from sklearn.linear_model import LogisticRegression
|
||||
# from sklearn.model_selection import GridSearchCV
|
||||
# from sklearn.neighbors import KernelDensity
|
||||
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import sklearn.metrics as metrics
|
||||
from pykliep import DensityRatioEstimator
|
||||
from quapy.protocol import APP
|
||||
from scipy.sparse import issparse, vstack
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.neighbors import KernelDensity
|
||||
|
||||
import baselines.impweight as iw
|
||||
from baselines.densratio import densratio
|
||||
from quacc.dataset import Dataset
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------------------
|
||||
# Methods of "importance weight", e.g., by ratio density estimation (KLIEP, SILF, LogReg)
|
||||
# ---------------------------------------------------------------------------------------
|
||||
class ImportanceWeight:
|
||||
def weights(self, Xtr, ytr, Xte):
|
||||
...
|
||||
|
||||
|
||||
class KLIEP(ImportanceWeight):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def weights(self, Xtr, ytr, Xte):
|
||||
kliep = DensityRatioEstimator()
|
||||
kliep.fit(Xtr, Xte)
|
||||
return kliep.predict(Xtr)
|
||||
|
||||
|
||||
class USILF(ImportanceWeight):
|
||||
def __init__(self, alpha=0.0):
|
||||
self.alpha = alpha
|
||||
|
||||
def weights(self, Xtr, ytr, Xte):
|
||||
dense_ratio_obj = densratio(Xtr, Xte, alpha=self.alpha, verbose=False)
|
||||
return dense_ratio_obj.compute_density_ratio(Xtr)
|
||||
|
||||
|
||||
class LogReg(ImportanceWeight):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def weights(self, Xtr, ytr, Xte):
|
||||
# check "Direct Density Ratio Estimation for
|
||||
# Large-scale Covariate Shift Adaptation", Eq.28
|
||||
|
||||
if issparse(Xtr):
|
||||
X = vstack([Xtr, Xte])
|
||||
else:
|
||||
X = np.concatenate([Xtr, Xte])
|
||||
|
||||
y = [0] * Xtr.shape[0] + [1] * Xte.shape[0]
|
||||
|
||||
logreg = GridSearchCV(
|
||||
LogisticRegression(),
|
||||
param_grid={"C": np.logspace(-3, 3, 7), "class_weight": ["balanced", None]},
|
||||
n_jobs=-1,
|
||||
)
|
||||
logreg.fit(X, y)
|
||||
probs = logreg.predict_proba(Xtr)
|
||||
prob_train, prob_test = probs[:, 0], probs[:, 1]
|
||||
prior_train = Xtr.shape[0]
|
||||
prior_test = Xte.shape[0]
|
||||
w = (prior_train / prior_test) * (prob_test / prob_train)
|
||||
return w
|
||||
|
||||
|
||||
class KDEx2(ImportanceWeight):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def weights(self, Xtr, ytr, Xte):
|
||||
params = {"bandwidth": np.logspace(-1, 1, 20)}
|
||||
log_likelihood_tr = (
|
||||
GridSearchCV(KernelDensity(), params).fit(Xtr).score_samples(Xtr)
|
||||
)
|
||||
log_likelihood_te = (
|
||||
GridSearchCV(KernelDensity(), params).fit(Xte).score_samples(Xtr)
|
||||
)
|
||||
likelihood_tr = np.exp(log_likelihood_tr)
|
||||
likelihood_te = np.exp(log_likelihood_te)
|
||||
return likelihood_te / likelihood_tr
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# d = Dataset("rcv1", target="CCAT").get_raw()
|
||||
d = Dataset("imdb", n_prevalences=1).get()[0]
|
||||
|
||||
tstart = time.time()
|
||||
lr = LogisticRegression()
|
||||
lr.fit(*d.train.Xy)
|
||||
val_preds = lr.predict(d.validation.X)
|
||||
protocol = APP(
|
||||
d.test,
|
||||
n_prevalences=21,
|
||||
repeats=1,
|
||||
sample_size=100,
|
||||
return_type="labelled_collection",
|
||||
)
|
||||
|
||||
results = []
|
||||
for sample in protocol():
|
||||
wx = iw.kliep(d.validation.X, d.validation.y, sample.X)
|
||||
test_preds = lr.predict(sample.X)
|
||||
estim_acc = np.sum((1.0 * (val_preds == d.validation.y)) * wx) / np.sum(wx)
|
||||
true_acc = metrics.accuracy_score(sample.y, test_preds)
|
||||
results.append((sample.prevalence(), estim_acc, true_acc))
|
||||
|
||||
tend = time.time()
|
||||
|
||||
for r in results:
|
||||
print(*r)
|
||||
|
||||
print(f"logreg finished [took {tend-tstart:.3f}s]")
|
||||
import win11toast
|
||||
|
||||
win11toast.notify("models.py", "Completed")
|
||||
# import itertools
|
||||
# from typing import Iterable
|
||||
|
||||
# import quapy as qp
|
||||
# import quapy.functional as F
|
||||
# from densratio import densratio
|
||||
# from quapy.method.aggregative import *
|
||||
# from quapy.protocol import (
|
||||
# AbstractStochasticSeededProtocol,
|
||||
# OnLabelledCollectionProtocol,
|
||||
# )
|
||||
# from scipy.sparse import issparse, vstack
|
||||
# from scipy.spatial.distance import cdist
|
||||
# from scipy.stats import multivariate_normal
|
||||
# from sklearn.linear_model import LogisticRegression
|
||||
# from sklearn.model_selection import GridSearchCV
|
||||
# from sklearn.neighbors import KernelDensity
|
||||
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import sklearn.metrics as metrics
|
||||
from pykliep import DensityRatioEstimator
|
||||
from quapy.protocol import APP
|
||||
from scipy.sparse import issparse, vstack
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.neighbors import KernelDensity
|
||||
|
||||
import baselines.impweight as iw
|
||||
from baselines.densratio import densratio
|
||||
from quacc.dataset import Dataset
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------------------
|
||||
# Methods of "importance weight", e.g., by ratio density estimation (KLIEP, SILF, LogReg)
|
||||
# ---------------------------------------------------------------------------------------
|
||||
class ImportanceWeight:
|
||||
def weights(self, Xtr, ytr, Xte):
|
||||
...
|
||||
|
||||
|
||||
class KLIEP(ImportanceWeight):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def weights(self, Xtr, ytr, Xte):
|
||||
kliep = DensityRatioEstimator()
|
||||
kliep.fit(Xtr, Xte)
|
||||
return kliep.predict(Xtr)
|
||||
|
||||
|
||||
class USILF(ImportanceWeight):
|
||||
def __init__(self, alpha=0.0):
|
||||
self.alpha = alpha
|
||||
|
||||
def weights(self, Xtr, ytr, Xte):
|
||||
dense_ratio_obj = densratio(Xtr, Xte, alpha=self.alpha, verbose=False)
|
||||
return dense_ratio_obj.compute_density_ratio(Xtr)
|
||||
|
||||
|
||||
class LogReg(ImportanceWeight):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def weights(self, Xtr, ytr, Xte):
|
||||
# check "Direct Density Ratio Estimation for
|
||||
# Large-scale Covariate Shift Adaptation", Eq.28
|
||||
|
||||
if issparse(Xtr):
|
||||
X = vstack([Xtr, Xte])
|
||||
else:
|
||||
X = np.concatenate([Xtr, Xte])
|
||||
|
||||
y = [0] * Xtr.shape[0] + [1] * Xte.shape[0]
|
||||
|
||||
logreg = GridSearchCV(
|
||||
LogisticRegression(),
|
||||
param_grid={"C": np.logspace(-3, 3, 7), "class_weight": ["balanced", None]},
|
||||
n_jobs=-1,
|
||||
)
|
||||
logreg.fit(X, y)
|
||||
probs = logreg.predict_proba(Xtr)
|
||||
prob_train, prob_test = probs[:, 0], probs[:, 1]
|
||||
prior_train = Xtr.shape[0]
|
||||
prior_test = Xte.shape[0]
|
||||
w = (prior_train / prior_test) * (prob_test / prob_train)
|
||||
return w
|
||||
|
||||
|
||||
class KDEx2(ImportanceWeight):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def weights(self, Xtr, ytr, Xte):
|
||||
params = {"bandwidth": np.logspace(-1, 1, 20)}
|
||||
log_likelihood_tr = (
|
||||
GridSearchCV(KernelDensity(), params).fit(Xtr).score_samples(Xtr)
|
||||
)
|
||||
log_likelihood_te = (
|
||||
GridSearchCV(KernelDensity(), params).fit(Xte).score_samples(Xtr)
|
||||
)
|
||||
likelihood_tr = np.exp(log_likelihood_tr)
|
||||
likelihood_te = np.exp(log_likelihood_te)
|
||||
return likelihood_te / likelihood_tr
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# d = Dataset("rcv1", target="CCAT").get_raw()
|
||||
d = Dataset("imdb", n_prevalences=1).get()[0]
|
||||
|
||||
tstart = time.time()
|
||||
lr = LogisticRegression()
|
||||
lr.fit(*d.train.Xy)
|
||||
val_preds = lr.predict(d.validation.X)
|
||||
protocol = APP(
|
||||
d.test,
|
||||
n_prevalences=21,
|
||||
repeats=1,
|
||||
sample_size=100,
|
||||
return_type="labelled_collection",
|
||||
)
|
||||
|
||||
results = []
|
||||
for sample in protocol():
|
||||
wx = iw.kliep(d.validation.X, d.validation.y, sample.X)
|
||||
test_preds = lr.predict(sample.X)
|
||||
estim_acc = np.sum((1.0 * (val_preds == d.validation.y)) * wx) / np.sum(wx)
|
||||
true_acc = metrics.accuracy_score(sample.y, test_preds)
|
||||
results.append((sample.prevalence(), estim_acc, true_acc))
|
||||
|
||||
tend = time.time()
|
||||
|
||||
for r in results:
|
||||
print(*r)
|
||||
|
||||
print(f"logreg finished [took {tend-tstart:.3f}s]")
|
||||
import win11toast
|
||||
|
||||
win11toast.notify("models.py", "Completed")
|
||||
|
|
|
@ -1,221 +1,221 @@
|
|||
import warnings
|
||||
|
||||
import numpy as np
|
||||
from scipy.sparse import csr_matrix
|
||||
|
||||
|
||||
class DensityRatioEstimator:
|
||||
"""
|
||||
Class to accomplish direct density estimation implementing the original KLIEP
|
||||
algorithm from Direct Importance Estimation with Model Selection
|
||||
and Its Application to Covariate Shift Adaptation by Sugiyama et al.
|
||||
|
||||
The training set is distributed via
|
||||
train ~ p(x)
|
||||
and the test set is distributed via
|
||||
test ~ q(x).
|
||||
|
||||
The KLIEP algorithm and its variants approximate w(x) = q(x) / p(x) directly. The predict function returns the
|
||||
estimate of w(x). The function w(x) can serve as sample weights for the training set during
|
||||
training to modify the expectation function that the model's loss function is optimized via,
|
||||
i.e.
|
||||
|
||||
E_{x ~ w(x)p(x)} loss(x) = E_{x ~ q(x)} loss(x).
|
||||
|
||||
Usage :
|
||||
The fit method is used to run the KLIEP algorithm using LCV and returns value of J
|
||||
trained on the entire training/test set with the best sigma found.
|
||||
Use the predict method on the training set to determine the sample weights from the KLIEP algorithm.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
max_iter=5000,
|
||||
num_params=[0.1, 0.2],
|
||||
epsilon=1e-4,
|
||||
cv=3,
|
||||
sigmas=[0.01, 0.1, 0.25, 0.5, 0.75, 1],
|
||||
random_state=None,
|
||||
verbose=0,
|
||||
):
|
||||
"""
|
||||
Direct density estimation using an inner LCV loop to estimate the proper model. Can be used with sklearn
|
||||
cross validation methods with or without storing the inner CV. To use a standard grid search.
|
||||
|
||||
|
||||
max_iter : Number of iterations to perform
|
||||
num_params : List of number of test set vectors used to construct the approximation for inner LCV.
|
||||
Must be a float. Original paper used 10%, i.e. =.1
|
||||
sigmas : List of sigmas to be used in inner LCV loop.
|
||||
epsilon : Additive factor in the iterative algorithm for numerical stability.
|
||||
"""
|
||||
self.max_iter = max_iter
|
||||
self.num_params = num_params
|
||||
self.epsilon = epsilon
|
||||
self.verbose = verbose
|
||||
self.sigmas = sigmas
|
||||
self.cv = cv
|
||||
self.random_state = 0
|
||||
|
||||
def fit(self, X_train, X_test, alpha_0=None):
|
||||
"""Uses cross validation to select sigma as in the original paper (LCV).
|
||||
In a break from sklearn convention, y=X_test.
|
||||
The parameter cv corresponds to R in the original paper.
|
||||
Once found, the best sigma is used to train on the full set."""
|
||||
|
||||
# LCV loop, shuffle a copy in place for performance.
|
||||
cv = self.cv
|
||||
chunk = int(X_test.shape[0] / float(cv))
|
||||
if self.random_state is not None:
|
||||
np.random.seed(self.random_state)
|
||||
# if isinstance(X_test, csr_matrix):
|
||||
# X_test_shuffled = X_test.toarray()
|
||||
# else:
|
||||
# X_test_shuffled = X_test.copy()
|
||||
X_test_shuffled = X_test.copy()
|
||||
|
||||
X_test_index = np.arange(X_test_shuffled.shape[0])
|
||||
np.random.shuffle(X_test_index)
|
||||
X_test_shuffled = X_test_shuffled[X_test_index, :]
|
||||
|
||||
j_scores = {}
|
||||
|
||||
if type(self.sigmas) != list:
|
||||
self.sigmas = [self.sigmas]
|
||||
|
||||
if type(self.num_params) != list:
|
||||
self.num_params = [self.num_params]
|
||||
|
||||
if len(self.sigmas) * len(self.num_params) > 1:
|
||||
# Inner LCV loop
|
||||
for num_param in self.num_params:
|
||||
for sigma in self.sigmas:
|
||||
j_scores[(num_param, sigma)] = np.zeros(cv)
|
||||
for k in range(1, cv + 1):
|
||||
if self.verbose > 0:
|
||||
print("Training: sigma: %s R: %s" % (sigma, k))
|
||||
X_test_fold = X_test_shuffled[(k - 1) * chunk : k * chunk, :]
|
||||
j_scores[(num_param, sigma)][k - 1] = self._fit(
|
||||
X_train=X_train,
|
||||
X_test=X_test_fold,
|
||||
num_parameters=num_param,
|
||||
sigma=sigma,
|
||||
)
|
||||
j_scores[(num_param, sigma)] = np.mean(j_scores[(num_param, sigma)])
|
||||
|
||||
sorted_scores = sorted(
|
||||
[x for x in j_scores.items() if np.isfinite(x[1])],
|
||||
key=lambda x: x[1],
|
||||
reverse=True,
|
||||
)
|
||||
if len(sorted_scores) == 0:
|
||||
warnings.warn("LCV failed to converge for all values of sigma.")
|
||||
return self
|
||||
self._sigma = sorted_scores[0][0][1]
|
||||
self._num_parameters = sorted_scores[0][0][0]
|
||||
self._j_scores = sorted_scores
|
||||
else:
|
||||
self._sigma = self.sigmas[0]
|
||||
self._num_parameters = self.num_params[0]
|
||||
# best sigma
|
||||
self._j = self._fit(
|
||||
X_train=X_train,
|
||||
X_test=X_test_shuffled,
|
||||
num_parameters=self._num_parameters,
|
||||
sigma=self._sigma,
|
||||
)
|
||||
|
||||
return self # Compatibility with sklearn
|
||||
|
||||
def _fit(self, X_train, X_test, num_parameters, sigma, alpha_0=None):
|
||||
"""Fits the estimator with the given parameters w-hat and returns J"""
|
||||
|
||||
num_parameters = num_parameters
|
||||
|
||||
if type(num_parameters) == float:
|
||||
num_parameters = int(X_test.shape[0] * num_parameters)
|
||||
|
||||
self._select_param_vectors(
|
||||
X_test=X_test, sigma=sigma, num_parameters=num_parameters
|
||||
)
|
||||
|
||||
# if isinstance(X_train, csr_matrix):
|
||||
# X_train = X_train.toarray()
|
||||
X_train = self._reshape_X(X_train)
|
||||
X_test = self._reshape_X(X_test)
|
||||
|
||||
if alpha_0 is None:
|
||||
alpha_0 = np.ones(shape=(num_parameters, 1)) / float(num_parameters)
|
||||
|
||||
self._find_alpha(
|
||||
X_train=X_train,
|
||||
X_test=X_test,
|
||||
num_parameters=num_parameters,
|
||||
epsilon=self.epsilon,
|
||||
alpha_0=alpha_0,
|
||||
sigma=sigma,
|
||||
)
|
||||
|
||||
return self._calculate_j(X_test, sigma=sigma)
|
||||
|
||||
def _calculate_j(self, X_test, sigma):
|
||||
pred = self.predict(X_test, sigma=sigma) + 0.0000001
|
||||
log = np.log(pred).sum()
|
||||
return log / (X_test.shape[0])
|
||||
|
||||
def score(self, X_test):
|
||||
"""Return the J score, similar to sklearn's API"""
|
||||
return self._calculate_j(X_test=X_test, sigma=self._sigma)
|
||||
|
||||
@staticmethod
|
||||
def _reshape_X(X):
|
||||
"""Reshape input from mxn to mx1xn to take advantage of numpy broadcasting."""
|
||||
if len(X.shape) != 3:
|
||||
return X.reshape((X.shape[0], 1, X.shape[1]))
|
||||
return X
|
||||
|
||||
def _select_param_vectors(self, X_test, sigma, num_parameters):
|
||||
"""X_test is the test set. b is the number of parameters."""
|
||||
indices = np.random.choice(X_test.shape[0], size=num_parameters, replace=False)
|
||||
self._test_vectors = X_test[indices, :].copy()
|
||||
self._phi_fitted = True
|
||||
|
||||
def _phi(self, X, sigma=None):
|
||||
if sigma is None:
|
||||
sigma = self._sigma
|
||||
|
||||
if self._phi_fitted:
|
||||
return np.exp(
|
||||
-np.sum((X - self._test_vectors) ** 2, axis=-1) / (2 * sigma**2)
|
||||
)
|
||||
raise Exception("Phi not fitted.")
|
||||
|
||||
def _find_alpha(self, alpha_0, X_train, X_test, num_parameters, sigma, epsilon):
|
||||
A = np.zeros(shape=(X_test.shape[0], num_parameters))
|
||||
b = np.zeros(shape=(num_parameters, 1))
|
||||
|
||||
A = self._phi(X_test, sigma)
|
||||
b = self._phi(X_train, sigma).sum(axis=0) / X_train.shape[0]
|
||||
b = b.reshape((num_parameters, 1))
|
||||
|
||||
out = alpha_0.copy()
|
||||
for k in range(self.max_iter):
|
||||
mat = np.dot(A, out)
|
||||
mat += 0.000000001
|
||||
out += epsilon * np.dot(np.transpose(A), 1.0 / mat)
|
||||
out += b * (
|
||||
((1 - np.dot(np.transpose(b), out)) / np.dot(np.transpose(b), b))
|
||||
)
|
||||
out = np.maximum(0, out)
|
||||
out /= np.dot(np.transpose(b), out)
|
||||
|
||||
self._alpha = out
|
||||
self._fitted = True
|
||||
|
||||
def predict(self, X, sigma=None):
|
||||
"""Equivalent of w(X) from the original paper."""
|
||||
|
||||
X = self._reshape_X(X)
|
||||
if not self._fitted:
|
||||
raise Exception("Not fitted!")
|
||||
return np.dot(self._phi(X, sigma=sigma), self._alpha).reshape((X.shape[0],))
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
from scipy.sparse import csr_matrix
|
||||
|
||||
|
||||
class DensityRatioEstimator:
|
||||
"""
|
||||
Class to accomplish direct density estimation implementing the original KLIEP
|
||||
algorithm from Direct Importance Estimation with Model Selection
|
||||
and Its Application to Covariate Shift Adaptation by Sugiyama et al.
|
||||
|
||||
The training set is distributed via
|
||||
train ~ p(x)
|
||||
and the test set is distributed via
|
||||
test ~ q(x).
|
||||
|
||||
The KLIEP algorithm and its variants approximate w(x) = q(x) / p(x) directly. The predict function returns the
|
||||
estimate of w(x). The function w(x) can serve as sample weights for the training set during
|
||||
training to modify the expectation function that the model's loss function is optimized via,
|
||||
i.e.
|
||||
|
||||
E_{x ~ w(x)p(x)} loss(x) = E_{x ~ q(x)} loss(x).
|
||||
|
||||
Usage :
|
||||
The fit method is used to run the KLIEP algorithm using LCV and returns value of J
|
||||
trained on the entire training/test set with the best sigma found.
|
||||
Use the predict method on the training set to determine the sample weights from the KLIEP algorithm.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
max_iter=5000,
|
||||
num_params=[0.1, 0.2],
|
||||
epsilon=1e-4,
|
||||
cv=3,
|
||||
sigmas=[0.01, 0.1, 0.25, 0.5, 0.75, 1],
|
||||
random_state=None,
|
||||
verbose=0,
|
||||
):
|
||||
"""
|
||||
Direct density estimation using an inner LCV loop to estimate the proper model. Can be used with sklearn
|
||||
cross validation methods with or without storing the inner CV. To use a standard grid search.
|
||||
|
||||
|
||||
max_iter : Number of iterations to perform
|
||||
num_params : List of number of test set vectors used to construct the approximation for inner LCV.
|
||||
Must be a float. Original paper used 10%, i.e. =.1
|
||||
sigmas : List of sigmas to be used in inner LCV loop.
|
||||
epsilon : Additive factor in the iterative algorithm for numerical stability.
|
||||
"""
|
||||
self.max_iter = max_iter
|
||||
self.num_params = num_params
|
||||
self.epsilon = epsilon
|
||||
self.verbose = verbose
|
||||
self.sigmas = sigmas
|
||||
self.cv = cv
|
||||
self.random_state = 0
|
||||
|
||||
def fit(self, X_train, X_test, alpha_0=None):
|
||||
"""Uses cross validation to select sigma as in the original paper (LCV).
|
||||
In a break from sklearn convention, y=X_test.
|
||||
The parameter cv corresponds to R in the original paper.
|
||||
Once found, the best sigma is used to train on the full set."""
|
||||
|
||||
# LCV loop, shuffle a copy in place for performance.
|
||||
cv = self.cv
|
||||
chunk = int(X_test.shape[0] / float(cv))
|
||||
if self.random_state is not None:
|
||||
np.random.seed(self.random_state)
|
||||
# if isinstance(X_test, csr_matrix):
|
||||
# X_test_shuffled = X_test.toarray()
|
||||
# else:
|
||||
# X_test_shuffled = X_test.copy()
|
||||
X_test_shuffled = X_test.copy()
|
||||
|
||||
X_test_index = np.arange(X_test_shuffled.shape[0])
|
||||
np.random.shuffle(X_test_index)
|
||||
X_test_shuffled = X_test_shuffled[X_test_index, :]
|
||||
|
||||
j_scores = {}
|
||||
|
||||
if type(self.sigmas) != list:
|
||||
self.sigmas = [self.sigmas]
|
||||
|
||||
if type(self.num_params) != list:
|
||||
self.num_params = [self.num_params]
|
||||
|
||||
if len(self.sigmas) * len(self.num_params) > 1:
|
||||
# Inner LCV loop
|
||||
for num_param in self.num_params:
|
||||
for sigma in self.sigmas:
|
||||
j_scores[(num_param, sigma)] = np.zeros(cv)
|
||||
for k in range(1, cv + 1):
|
||||
if self.verbose > 0:
|
||||
print("Training: sigma: %s R: %s" % (sigma, k))
|
||||
X_test_fold = X_test_shuffled[(k - 1) * chunk : k * chunk, :]
|
||||
j_scores[(num_param, sigma)][k - 1] = self._fit(
|
||||
X_train=X_train,
|
||||
X_test=X_test_fold,
|
||||
num_parameters=num_param,
|
||||
sigma=sigma,
|
||||
)
|
||||
j_scores[(num_param, sigma)] = np.mean(j_scores[(num_param, sigma)])
|
||||
|
||||
sorted_scores = sorted(
|
||||
[x for x in j_scores.items() if np.isfinite(x[1])],
|
||||
key=lambda x: x[1],
|
||||
reverse=True,
|
||||
)
|
||||
if len(sorted_scores) == 0:
|
||||
warnings.warn("LCV failed to converge for all values of sigma.")
|
||||
return self
|
||||
self._sigma = sorted_scores[0][0][1]
|
||||
self._num_parameters = sorted_scores[0][0][0]
|
||||
self._j_scores = sorted_scores
|
||||
else:
|
||||
self._sigma = self.sigmas[0]
|
||||
self._num_parameters = self.num_params[0]
|
||||
# best sigma
|
||||
self._j = self._fit(
|
||||
X_train=X_train,
|
||||
X_test=X_test_shuffled,
|
||||
num_parameters=self._num_parameters,
|
||||
sigma=self._sigma,
|
||||
)
|
||||
|
||||
return self # Compatibility with sklearn
|
||||
|
||||
def _fit(self, X_train, X_test, num_parameters, sigma, alpha_0=None):
|
||||
"""Fits the estimator with the given parameters w-hat and returns J"""
|
||||
|
||||
num_parameters = num_parameters
|
||||
|
||||
if type(num_parameters) == float:
|
||||
num_parameters = int(X_test.shape[0] * num_parameters)
|
||||
|
||||
self._select_param_vectors(
|
||||
X_test=X_test, sigma=sigma, num_parameters=num_parameters
|
||||
)
|
||||
|
||||
# if isinstance(X_train, csr_matrix):
|
||||
# X_train = X_train.toarray()
|
||||
X_train = self._reshape_X(X_train)
|
||||
X_test = self._reshape_X(X_test)
|
||||
|
||||
if alpha_0 is None:
|
||||
alpha_0 = np.ones(shape=(num_parameters, 1)) / float(num_parameters)
|
||||
|
||||
self._find_alpha(
|
||||
X_train=X_train,
|
||||
X_test=X_test,
|
||||
num_parameters=num_parameters,
|
||||
epsilon=self.epsilon,
|
||||
alpha_0=alpha_0,
|
||||
sigma=sigma,
|
||||
)
|
||||
|
||||
return self._calculate_j(X_test, sigma=sigma)
|
||||
|
||||
def _calculate_j(self, X_test, sigma):
|
||||
pred = self.predict(X_test, sigma=sigma) + 0.0000001
|
||||
log = np.log(pred).sum()
|
||||
return log / (X_test.shape[0])
|
||||
|
||||
def score(self, X_test):
|
||||
"""Return the J score, similar to sklearn's API"""
|
||||
return self._calculate_j(X_test=X_test, sigma=self._sigma)
|
||||
|
||||
@staticmethod
|
||||
def _reshape_X(X):
|
||||
"""Reshape input from mxn to mx1xn to take advantage of numpy broadcasting."""
|
||||
if len(X.shape) != 3:
|
||||
return X.reshape((X.shape[0], 1, X.shape[1]))
|
||||
return X
|
||||
|
||||
def _select_param_vectors(self, X_test, sigma, num_parameters):
|
||||
"""X_test is the test set. b is the number of parameters."""
|
||||
indices = np.random.choice(X_test.shape[0], size=num_parameters, replace=False)
|
||||
self._test_vectors = X_test[indices, :].copy()
|
||||
self._phi_fitted = True
|
||||
|
||||
def _phi(self, X, sigma=None):
|
||||
if sigma is None:
|
||||
sigma = self._sigma
|
||||
|
||||
if self._phi_fitted:
|
||||
return np.exp(
|
||||
-np.sum((X - self._test_vectors) ** 2, axis=-1) / (2 * sigma**2)
|
||||
)
|
||||
raise Exception("Phi not fitted.")
|
||||
|
||||
def _find_alpha(self, alpha_0, X_train, X_test, num_parameters, sigma, epsilon):
|
||||
A = np.zeros(shape=(X_test.shape[0], num_parameters))
|
||||
b = np.zeros(shape=(num_parameters, 1))
|
||||
|
||||
A = self._phi(X_test, sigma)
|
||||
b = self._phi(X_train, sigma).sum(axis=0) / X_train.shape[0]
|
||||
b = b.reshape((num_parameters, 1))
|
||||
|
||||
out = alpha_0.copy()
|
||||
for k in range(self.max_iter):
|
||||
mat = np.dot(A, out)
|
||||
mat += 0.000000001
|
||||
out += epsilon * np.dot(np.transpose(A), 1.0 / mat)
|
||||
out += b * (
|
||||
((1 - np.dot(np.transpose(b), out)) / np.dot(np.transpose(b), b))
|
||||
)
|
||||
out = np.maximum(0, out)
|
||||
out /= np.dot(np.transpose(b), out)
|
||||
|
||||
self._alpha = out
|
||||
self._fitted = True
|
||||
|
||||
def predict(self, X, sigma=None):
|
||||
"""Equivalent of w(X) from the original paper."""
|
||||
|
||||
X = self._reshape_X(X)
|
||||
if not self._fitted:
|
||||
raise Exception("Not fitted!")
|
||||
return np.dot(self._phi(X, sigma=sigma), self._alpha).reshape((X.shape[0],))
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
import numpy as np
|
||||
from sklearn import clone
|
||||
from sklearn.base import BaseEstimator
|
||||
|
||||
|
||||
def clone_fit(c_model: BaseEstimator, data, labels):
|
||||
c_model2 = clone(c_model)
|
||||
c_model2.fit(data, labels)
|
||||
return c_model2
|
||||
|
||||
def get_score(pred1, pred2, labels):
|
||||
return np.mean((pred1 == labels).astype(int) - (pred2 == labels).astype(int))
|
||||
|
||||
|
||||
import numpy as np
|
||||
from sklearn import clone
|
||||
from sklearn.base import BaseEstimator
|
||||
|
||||
|
||||
def clone_fit(c_model: BaseEstimator, data, labels):
|
||||
c_model2 = clone(c_model)
|
||||
c_model2.fit(data, labels)
|
||||
return c_model2
|
||||
|
||||
def get_score(pred1, pred2, labels):
|
||||
return np.mean((pred1 == labels).astype(int) - (pred2 == labels).astype(int))
|
||||
|
||||
|
||||
|
|
464
conf.yaml
464
conf.yaml
|
@ -1,233 +1,233 @@
|
|||
debug_conf: &debug_conf
|
||||
global:
|
||||
METRICS:
|
||||
- acc
|
||||
DATASET_N_PREVS: 5
|
||||
DATASET_PREVS:
|
||||
# - 0.2
|
||||
- 0.5
|
||||
# - 0.8
|
||||
|
||||
confs:
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
|
||||
plot_confs:
|
||||
debug:
|
||||
PLOT_ESTIMATORS:
|
||||
- mulmc_sld
|
||||
- atc_mc
|
||||
PLOT_STDEV: true
|
||||
|
||||
mc_conf: &mc_conf
|
||||
global:
|
||||
METRICS:
|
||||
- acc
|
||||
DATASET_N_PREVS: 9
|
||||
DATASET_DIR_UPDATE: true
|
||||
|
||||
confs:
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
# - DATASET_NAME: imdb
|
||||
|
||||
plot_confs:
|
||||
debug3:
|
||||
PLOT_ESTIMATORS:
|
||||
- binmc_sld
|
||||
- mulmc_sld
|
||||
- binne_sld
|
||||
- mulne_sld
|
||||
- bin_sld_gs
|
||||
- mul_sld_gs
|
||||
- atc_mc
|
||||
PLOT_STDEV: true
|
||||
|
||||
test_conf: &test_conf
|
||||
global:
|
||||
METRICS:
|
||||
- acc
|
||||
- f1
|
||||
DATASET_N_PREVS: 9
|
||||
|
||||
confs:
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
# - DATASET_NAME: imdb
|
||||
|
||||
plot_confs:
|
||||
gs_vs_gsq:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_sld
|
||||
- bin_sld_gs
|
||||
- bin_sld_gsq
|
||||
- mul_sld
|
||||
- mul_sld_gs
|
||||
- mul_sld_gsq
|
||||
gs_vs_atc:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_sld
|
||||
- bin_sld_gs
|
||||
- mul_sld
|
||||
- mul_sld_gs
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
sld_vs_pacc:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_sld
|
||||
- bin_sld_gs
|
||||
- mul_sld
|
||||
- mul_sld_gs
|
||||
- bin_pacc
|
||||
- bin_pacc_gs
|
||||
- mul_pacc
|
||||
- mul_pacc_gs
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
pacc_vs_atc:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_pacc
|
||||
- bin_pacc_gs
|
||||
- mul_pacc
|
||||
- mul_pacc_gs
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
|
||||
main_conf: &main_conf
|
||||
|
||||
global:
|
||||
METRICS:
|
||||
- acc
|
||||
- f1
|
||||
DATASET_N_PREVS: 9
|
||||
DATASET_DIR_UPDATE: true
|
||||
|
||||
confs:
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
- DATASET_NAME: imdb
|
||||
confs_next:
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: GCAT
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: MCAT
|
||||
|
||||
plot_confs:
|
||||
gs_vs_qgs:
|
||||
PLOT_ESTIMATORS:
|
||||
- mul_sld_gs
|
||||
- bin_sld_gs
|
||||
- mul_sld_gsq
|
||||
- bin_sld_gsq
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
PLOT_STDEV: true
|
||||
plot_confs_completed:
|
||||
max_conf_vs_atc_pacc:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_pacc
|
||||
- binmc_pacc
|
||||
- mul_pacc
|
||||
- mulmc_pacc
|
||||
- atc_mc
|
||||
PLOT_STDEV: true
|
||||
max_conf_vs_entropy_pacc:
|
||||
PLOT_ESTIMATORS:
|
||||
- binmc_pacc
|
||||
- binne_pacc
|
||||
- mulmc_pacc
|
||||
- mulne_pacc
|
||||
- atc_mc
|
||||
PLOT_STDEV: true
|
||||
gs_vs_atc:
|
||||
PLOT_ESTIMATORS:
|
||||
- mul_sld_gs
|
||||
- bin_sld_gs
|
||||
- mul_pacc_gs
|
||||
- bin_pacc_gs
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
PLOT_STDEV: true
|
||||
gs_vs_all:
|
||||
PLOT_ESTIMATORS:
|
||||
- mul_sld_gs
|
||||
- bin_sld_gs
|
||||
- mul_pacc_gs
|
||||
- bin_pacc_gs
|
||||
- atc_mc
|
||||
- doc_feat
|
||||
- kfcv
|
||||
PLOT_STDEV: true
|
||||
gs_vs_qgs:
|
||||
PLOT_ESTIMATORS:
|
||||
- mul_sld_gs
|
||||
- bin_sld_gs
|
||||
- mul_sld_gsq
|
||||
- bin_sld_gsq
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
PLOT_STDEV: true
|
||||
cc_vs_other:
|
||||
PLOT_ESTIMATORS:
|
||||
- mul_cc
|
||||
- bin_cc
|
||||
- mul_sld
|
||||
- bin_sld
|
||||
- mul_pacc
|
||||
- bin_pacc
|
||||
PLOT_STDEV: true
|
||||
max_conf_vs_atc:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_sld
|
||||
- binmc_sld
|
||||
- mul_sld
|
||||
- mulmc_sld
|
||||
- atc_mc
|
||||
PLOT_STDEV: true
|
||||
max_conf_vs_entropy:
|
||||
PLOT_ESTIMATORS:
|
||||
- binmc_sld
|
||||
- binne_sld
|
||||
- mulmc_sld
|
||||
- mulne_sld
|
||||
- atc_mc
|
||||
PLOT_STDEV: true
|
||||
sld_vs_pacc:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_sld
|
||||
- mul_sld
|
||||
- bin_pacc
|
||||
- mul_pacc
|
||||
- atc_mc
|
||||
PLOT_STDEV: true
|
||||
plot_confs_other:
|
||||
best_vs_atc:
|
||||
PLOT_ESTIMATORS:
|
||||
- mul_sld_bcts
|
||||
- mul_sld_gs
|
||||
- bin_sld_bcts
|
||||
- bin_sld_gs
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
all_vs_atc:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_sld
|
||||
- bin_sld_bcts
|
||||
- bin_sld_gs
|
||||
- mul_sld
|
||||
- mul_sld_bcts
|
||||
- mul_sld_gs
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
best_vs_all:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_sld_bcts
|
||||
- bin_sld_gs
|
||||
- mul_sld_bcts
|
||||
- mul_sld_gs
|
||||
- kfcv
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
- doc_feat
|
||||
|
||||
debug_conf: &debug_conf
|
||||
global:
|
||||
METRICS:
|
||||
- acc
|
||||
DATASET_N_PREVS: 5
|
||||
DATASET_PREVS:
|
||||
# - 0.2
|
||||
- 0.5
|
||||
# - 0.8
|
||||
|
||||
confs:
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
|
||||
plot_confs:
|
||||
debug:
|
||||
PLOT_ESTIMATORS:
|
||||
- mulmc_sld
|
||||
- atc_mc
|
||||
PLOT_STDEV: true
|
||||
|
||||
mc_conf: &mc_conf
|
||||
global:
|
||||
METRICS:
|
||||
- acc
|
||||
DATASET_N_PREVS: 9
|
||||
DATASET_DIR_UPDATE: true
|
||||
|
||||
confs:
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
# - DATASET_NAME: imdb
|
||||
|
||||
plot_confs:
|
||||
debug3:
|
||||
PLOT_ESTIMATORS:
|
||||
- binmc_sld
|
||||
- mulmc_sld
|
||||
- binne_sld
|
||||
- mulne_sld
|
||||
- bin_sld_gs
|
||||
- mul_sld_gs
|
||||
- atc_mc
|
||||
PLOT_STDEV: true
|
||||
|
||||
test_conf: &test_conf
|
||||
global:
|
||||
METRICS:
|
||||
- acc
|
||||
- f1
|
||||
DATASET_N_PREVS: 9
|
||||
|
||||
confs:
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
# - DATASET_NAME: imdb
|
||||
|
||||
plot_confs:
|
||||
gs_vs_gsq:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_sld
|
||||
- bin_sld_gs
|
||||
- bin_sld_gsq
|
||||
- mul_sld
|
||||
- mul_sld_gs
|
||||
- mul_sld_gsq
|
||||
gs_vs_atc:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_sld
|
||||
- bin_sld_gs
|
||||
- mul_sld
|
||||
- mul_sld_gs
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
sld_vs_pacc:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_sld
|
||||
- bin_sld_gs
|
||||
- mul_sld
|
||||
- mul_sld_gs
|
||||
- bin_pacc
|
||||
- bin_pacc_gs
|
||||
- mul_pacc
|
||||
- mul_pacc_gs
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
pacc_vs_atc:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_pacc
|
||||
- bin_pacc_gs
|
||||
- mul_pacc
|
||||
- mul_pacc_gs
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
|
||||
main_conf: &main_conf
|
||||
|
||||
global:
|
||||
METRICS:
|
||||
- acc
|
||||
- f1
|
||||
DATASET_N_PREVS: 9
|
||||
DATASET_DIR_UPDATE: true
|
||||
|
||||
confs:
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
- DATASET_NAME: imdb
|
||||
confs_next:
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: GCAT
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: MCAT
|
||||
|
||||
plot_confs:
|
||||
gs_vs_qgs:
|
||||
PLOT_ESTIMATORS:
|
||||
- mul_sld_gs
|
||||
- bin_sld_gs
|
||||
- mul_sld_gsq
|
||||
- bin_sld_gsq
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
PLOT_STDEV: true
|
||||
plot_confs_completed:
|
||||
max_conf_vs_atc_pacc:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_pacc
|
||||
- binmc_pacc
|
||||
- mul_pacc
|
||||
- mulmc_pacc
|
||||
- atc_mc
|
||||
PLOT_STDEV: true
|
||||
max_conf_vs_entropy_pacc:
|
||||
PLOT_ESTIMATORS:
|
||||
- binmc_pacc
|
||||
- binne_pacc
|
||||
- mulmc_pacc
|
||||
- mulne_pacc
|
||||
- atc_mc
|
||||
PLOT_STDEV: true
|
||||
gs_vs_atc:
|
||||
PLOT_ESTIMATORS:
|
||||
- mul_sld_gs
|
||||
- bin_sld_gs
|
||||
- mul_pacc_gs
|
||||
- bin_pacc_gs
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
PLOT_STDEV: true
|
||||
gs_vs_all:
|
||||
PLOT_ESTIMATORS:
|
||||
- mul_sld_gs
|
||||
- bin_sld_gs
|
||||
- mul_pacc_gs
|
||||
- bin_pacc_gs
|
||||
- atc_mc
|
||||
- doc_feat
|
||||
- kfcv
|
||||
PLOT_STDEV: true
|
||||
gs_vs_qgs:
|
||||
PLOT_ESTIMATORS:
|
||||
- mul_sld_gs
|
||||
- bin_sld_gs
|
||||
- mul_sld_gsq
|
||||
- bin_sld_gsq
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
PLOT_STDEV: true
|
||||
cc_vs_other:
|
||||
PLOT_ESTIMATORS:
|
||||
- mul_cc
|
||||
- bin_cc
|
||||
- mul_sld
|
||||
- bin_sld
|
||||
- mul_pacc
|
||||
- bin_pacc
|
||||
PLOT_STDEV: true
|
||||
max_conf_vs_atc:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_sld
|
||||
- binmc_sld
|
||||
- mul_sld
|
||||
- mulmc_sld
|
||||
- atc_mc
|
||||
PLOT_STDEV: true
|
||||
max_conf_vs_entropy:
|
||||
PLOT_ESTIMATORS:
|
||||
- binmc_sld
|
||||
- binne_sld
|
||||
- mulmc_sld
|
||||
- mulne_sld
|
||||
- atc_mc
|
||||
PLOT_STDEV: true
|
||||
sld_vs_pacc:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_sld
|
||||
- mul_sld
|
||||
- bin_pacc
|
||||
- mul_pacc
|
||||
- atc_mc
|
||||
PLOT_STDEV: true
|
||||
plot_confs_other:
|
||||
best_vs_atc:
|
||||
PLOT_ESTIMATORS:
|
||||
- mul_sld_bcts
|
||||
- mul_sld_gs
|
||||
- bin_sld_bcts
|
||||
- bin_sld_gs
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
all_vs_atc:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_sld
|
||||
- bin_sld_bcts
|
||||
- bin_sld_gs
|
||||
- mul_sld
|
||||
- mul_sld_bcts
|
||||
- mul_sld_gs
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
best_vs_all:
|
||||
PLOT_ESTIMATORS:
|
||||
- bin_sld_bcts
|
||||
- bin_sld_gs
|
||||
- mul_sld_bcts
|
||||
- mul_sld_gs
|
||||
- kfcv
|
||||
- atc_mc
|
||||
- atc_ne
|
||||
- doc_feat
|
||||
|
||||
exec: *main_conf
|
890
out_imdb.md
890
out_imdb.md
|
@ -1,445 +1,445 @@
|
|||
|
||||
<div>target: default</div>
|
||||
<div>train: [0.5 0.5]</div>
|
||||
<div>validation: [0.5 0.5]</div>
|
||||
<div>evaluate_binary: 277.300s</div>
|
||||
<div>evaluate_multiclass: 139.986s</div>
|
||||
<div>kfcv: 98.625s</div>
|
||||
<div>atc_mc: 93.304s</div>
|
||||
<div>atc_ne: 91.201s</div>
|
||||
<div>doc_feat: 29.930s</div>
|
||||
<div>rca_score: 1018.341s</div>
|
||||
<div>rca_star_score: 1013.733s</div>
|
||||
<div>tot: 1054.413s</div>
|
||||
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
<th></th>
|
||||
<th>bin</th>
|
||||
<th>mul</th>
|
||||
<th>kfcv</th>
|
||||
<th>atc_mc</th>
|
||||
<th>atc_ne</th>
|
||||
<th>doc_feat</th>
|
||||
<th>rca</th>
|
||||
<th>rca_star</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>(0.0, 1.0)</th>
|
||||
<td>0.0154</td>
|
||||
<td>0.0177</td>
|
||||
<td>0.0249</td>
|
||||
<td>0.0291</td>
|
||||
<td>0.0291</td>
|
||||
<td>0.0248</td>
|
||||
<td>0.2705</td>
|
||||
<td>0.2413</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.05, 0.95)</th>
|
||||
<td>0.0309</td>
|
||||
<td>0.0284</td>
|
||||
<td>0.0252</td>
|
||||
<td>0.0300</td>
|
||||
<td>0.0300</td>
|
||||
<td>0.0247</td>
|
||||
<td>0.2796</td>
|
||||
<td>0.2504</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.1, 0.9)</th>
|
||||
<td>0.0309</td>
|
||||
<td>0.0302</td>
|
||||
<td>0.0251</td>
|
||||
<td>0.0279</td>
|
||||
<td>0.0279</td>
|
||||
<td>0.0250</td>
|
||||
<td>0.2722</td>
|
||||
<td>0.2430</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.15, 0.85)</th>
|
||||
<td>0.0310</td>
|
||||
<td>0.0339</td>
|
||||
<td>0.0245</td>
|
||||
<td>0.0269</td>
|
||||
<td>0.0269</td>
|
||||
<td>0.0244</td>
|
||||
<td>0.2684</td>
|
||||
<td>0.2392</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.2, 0.8)</th>
|
||||
<td>0.0411</td>
|
||||
<td>0.0407</td>
|
||||
<td>0.0259</td>
|
||||
<td>0.0292</td>
|
||||
<td>0.0292</td>
|
||||
<td>0.0257</td>
|
||||
<td>0.2724</td>
|
||||
<td>0.2432</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.25, 0.75)</th>
|
||||
<td>0.0381</td>
|
||||
<td>0.0376</td>
|
||||
<td>0.0262</td>
|
||||
<td>0.0319</td>
|
||||
<td>0.0319</td>
|
||||
<td>0.0259</td>
|
||||
<td>0.2701</td>
|
||||
<td>0.2409</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.3, 0.7)</th>
|
||||
<td>0.0442</td>
|
||||
<td>0.0452</td>
|
||||
<td>0.0254</td>
|
||||
<td>0.0273</td>
|
||||
<td>0.0273</td>
|
||||
<td>0.0256</td>
|
||||
<td>0.2650</td>
|
||||
<td>0.2358</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.35, 0.65)</th>
|
||||
<td>0.0480</td>
|
||||
<td>0.0498</td>
|
||||
<td>0.0236</td>
|
||||
<td>0.0257</td>
|
||||
<td>0.0257</td>
|
||||
<td>0.0235</td>
|
||||
<td>0.2640</td>
|
||||
<td>0.2347</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.4, 0.6)</th>
|
||||
<td>0.0401</td>
|
||||
<td>0.0431</td>
|
||||
<td>0.0222</td>
|
||||
<td>0.0296</td>
|
||||
<td>0.0296</td>
|
||||
<td>0.0220</td>
|
||||
<td>0.2654</td>
|
||||
<td>0.2361</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.45, 0.55)</th>
|
||||
<td>0.0551</td>
|
||||
<td>0.0558</td>
|
||||
<td>0.0243</td>
|
||||
<td>0.0295</td>
|
||||
<td>0.0295</td>
|
||||
<td>0.0246</td>
|
||||
<td>0.1838</td>
|
||||
<td>0.1551</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.5, 0.5)</th>
|
||||
<td>0.0499</td>
|
||||
<td>0.0513</td>
|
||||
<td>0.0308</td>
|
||||
<td>0.0319</td>
|
||||
<td>0.0319</td>
|
||||
<td>0.0309</td>
|
||||
<td>0.1472</td>
|
||||
<td>0.1202</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.55, 0.45)</th>
|
||||
<td>0.0538</td>
|
||||
<td>0.0542</td>
|
||||
<td>0.0278</td>
|
||||
<td>0.0329</td>
|
||||
<td>0.0329</td>
|
||||
<td>0.0280</td>
|
||||
<td>0.1717</td>
|
||||
<td>0.1459</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.6, 0.4)</th>
|
||||
<td>0.0476</td>
|
||||
<td>0.0484</td>
|
||||
<td>0.0258</td>
|
||||
<td>0.0298</td>
|
||||
<td>0.0298</td>
|
||||
<td>0.0259</td>
|
||||
<td>0.2434</td>
|
||||
<td>0.2147</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.65, 0.35)</th>
|
||||
<td>0.0447</td>
|
||||
<td>0.0474</td>
|
||||
<td>0.0287</td>
|
||||
<td>0.0332</td>
|
||||
<td>0.0332</td>
|
||||
<td>0.0288</td>
|
||||
<td>0.2632</td>
|
||||
<td>0.2340</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.7, 0.3)</th>
|
||||
<td>0.0388</td>
|
||||
<td>0.0397</td>
|
||||
<td>0.0295</td>
|
||||
<td>0.0328</td>
|
||||
<td>0.0328</td>
|
||||
<td>0.0296</td>
|
||||
<td>0.2659</td>
|
||||
<td>0.2367</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.75, 0.25)</th>
|
||||
<td>0.0336</td>
|
||||
<td>0.0399</td>
|
||||
<td>0.0241</td>
|
||||
<td>0.0293</td>
|
||||
<td>0.0293</td>
|
||||
<td>0.0244</td>
|
||||
<td>0.2612</td>
|
||||
<td>0.2320</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.8, 0.2)</th>
|
||||
<td>0.0407</td>
|
||||
<td>0.0447</td>
|
||||
<td>0.0266</td>
|
||||
<td>0.0303</td>
|
||||
<td>0.0303</td>
|
||||
<td>0.0271</td>
|
||||
<td>0.2601</td>
|
||||
<td>0.2309</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.85, 0.15)</th>
|
||||
<td>0.0383</td>
|
||||
<td>0.0423</td>
|
||||
<td>0.0219</td>
|
||||
<td>0.0278</td>
|
||||
<td>0.0278</td>
|
||||
<td>0.0220</td>
|
||||
<td>0.2670</td>
|
||||
<td>0.2378</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.9, 0.1)</th>
|
||||
<td>0.0351</td>
|
||||
<td>0.0387</td>
|
||||
<td>0.0244</td>
|
||||
<td>0.0275</td>
|
||||
<td>0.0275</td>
|
||||
<td>0.0245</td>
|
||||
<td>0.2618</td>
|
||||
<td>0.2326</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.95, 0.05)</th>
|
||||
<td>0.0238</td>
|
||||
<td>0.0263</td>
|
||||
<td>0.0269</td>
|
||||
<td>0.0296</td>
|
||||
<td>0.0296</td>
|
||||
<td>0.0272</td>
|
||||
<td>0.2602</td>
|
||||
<td>0.2310</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(1.0, 0.0)</th>
|
||||
<td>0.0118</td>
|
||||
<td>0.0202</td>
|
||||
<td>0.0241</td>
|
||||
<td>0.0279</td>
|
||||
<td>0.0279</td>
|
||||
<td>0.0244</td>
|
||||
<td>0.2571</td>
|
||||
<td>0.2279</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
<th></th>
|
||||
<th>bin</th>
|
||||
<th>mul</th>
|
||||
<th>kfcv</th>
|
||||
<th>atc_mc</th>
|
||||
<th>atc_ne</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>(0.0, 1.0)</th>
|
||||
<td>0.0088</td>
|
||||
<td>0.0100</td>
|
||||
<td>0.0580</td>
|
||||
<td>0.0183</td>
|
||||
<td>0.0183</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.05, 0.95)</th>
|
||||
<td>0.0175</td>
|
||||
<td>0.0159</td>
|
||||
<td>0.0605</td>
|
||||
<td>0.0193</td>
|
||||
<td>0.0193</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.1, 0.9)</th>
|
||||
<td>0.0184</td>
|
||||
<td>0.0176</td>
|
||||
<td>0.0532</td>
|
||||
<td>0.0189</td>
|
||||
<td>0.0189</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.15, 0.85)</th>
|
||||
<td>0.0188</td>
|
||||
<td>0.0204</td>
|
||||
<td>0.0475</td>
|
||||
<td>0.0180</td>
|
||||
<td>0.0180</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.2, 0.8)</th>
|
||||
<td>0.0269</td>
|
||||
<td>0.0266</td>
|
||||
<td>0.0455</td>
|
||||
<td>0.0206</td>
|
||||
<td>0.0206</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.25, 0.75)</th>
|
||||
<td>0.0265</td>
|
||||
<td>0.0261</td>
|
||||
<td>0.0401</td>
|
||||
<td>0.0242</td>
|
||||
<td>0.0242</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.3, 0.7)</th>
|
||||
<td>0.0328</td>
|
||||
<td>0.0336</td>
|
||||
<td>0.0331</td>
|
||||
<td>0.0208</td>
|
||||
<td>0.0208</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.35, 0.65)</th>
|
||||
<td>0.0386</td>
|
||||
<td>0.0394</td>
|
||||
<td>0.0307</td>
|
||||
<td>0.0211</td>
|
||||
<td>0.0211</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.4, 0.6)</th>
|
||||
<td>0.0343</td>
|
||||
<td>0.0371</td>
|
||||
<td>0.0273</td>
|
||||
<td>0.0265</td>
|
||||
<td>0.0265</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.45, 0.55)</th>
|
||||
<td>0.0511</td>
|
||||
<td>0.0512</td>
|
||||
<td>0.0231</td>
|
||||
<td>0.0275</td>
|
||||
<td>0.0275</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.5, 0.5)</th>
|
||||
<td>0.0517</td>
|
||||
<td>0.0529</td>
|
||||
<td>0.0306</td>
|
||||
<td>0.0319</td>
|
||||
<td>0.0319</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.55, 0.45)</th>
|
||||
<td>0.0584</td>
|
||||
<td>0.0583</td>
|
||||
<td>0.0308</td>
|
||||
<td>0.0354</td>
|
||||
<td>0.0354</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.6, 0.4)</th>
|
||||
<td>0.0590</td>
|
||||
<td>0.0599</td>
|
||||
<td>0.0363</td>
|
||||
<td>0.0357</td>
|
||||
<td>0.0357</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.65, 0.35)</th>
|
||||
<td>0.0635</td>
|
||||
<td>0.0662</td>
|
||||
<td>0.0506</td>
|
||||
<td>0.0440</td>
|
||||
<td>0.0440</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.7, 0.3)</th>
|
||||
<td>0.0596</td>
|
||||
<td>0.0638</td>
|
||||
<td>0.0654</td>
|
||||
<td>0.0457</td>
|
||||
<td>0.0457</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.75, 0.25)</th>
|
||||
<td>0.0627</td>
|
||||
<td>0.0744</td>
|
||||
<td>0.0964</td>
|
||||
<td>0.0461</td>
|
||||
<td>0.0461</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.8, 0.2)</th>
|
||||
<td>0.0909</td>
|
||||
<td>0.0999</td>
|
||||
<td>0.1400</td>
|
||||
<td>0.0629</td>
|
||||
<td>0.0629</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.85, 0.15)</th>
|
||||
<td>0.1052</td>
|
||||
<td>0.1126</td>
|
||||
<td>0.1829</td>
|
||||
<td>0.0727</td>
|
||||
<td>0.0727</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.9, 0.1)</th>
|
||||
<td>0.1377</td>
|
||||
<td>0.1481</td>
|
||||
<td>0.2839</td>
|
||||
<td>0.1215</td>
|
||||
<td>0.1215</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.95, 0.05)</th>
|
||||
<td>0.1305</td>
|
||||
<td>0.1450</td>
|
||||
<td>0.4592</td>
|
||||
<td>0.2037</td>
|
||||
<td>0.2037</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(1.0, 0.0)</th>
|
||||
<td>0.1092</td>
|
||||
<td>0.1387</td>
|
||||
<td>0.8818</td>
|
||||
<td>0.5267</td>
|
||||
<td>0.5267</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<div>target: default</div>
|
||||
<div>train: [0.5 0.5]</div>
|
||||
<div>validation: [0.5 0.5]</div>
|
||||
<div>evaluate_binary: 277.300s</div>
|
||||
<div>evaluate_multiclass: 139.986s</div>
|
||||
<div>kfcv: 98.625s</div>
|
||||
<div>atc_mc: 93.304s</div>
|
||||
<div>atc_ne: 91.201s</div>
|
||||
<div>doc_feat: 29.930s</div>
|
||||
<div>rca_score: 1018.341s</div>
|
||||
<div>rca_star_score: 1013.733s</div>
|
||||
<div>tot: 1054.413s</div>
|
||||
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
<th></th>
|
||||
<th>bin</th>
|
||||
<th>mul</th>
|
||||
<th>kfcv</th>
|
||||
<th>atc_mc</th>
|
||||
<th>atc_ne</th>
|
||||
<th>doc_feat</th>
|
||||
<th>rca</th>
|
||||
<th>rca_star</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>(0.0, 1.0)</th>
|
||||
<td>0.0154</td>
|
||||
<td>0.0177</td>
|
||||
<td>0.0249</td>
|
||||
<td>0.0291</td>
|
||||
<td>0.0291</td>
|
||||
<td>0.0248</td>
|
||||
<td>0.2705</td>
|
||||
<td>0.2413</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.05, 0.95)</th>
|
||||
<td>0.0309</td>
|
||||
<td>0.0284</td>
|
||||
<td>0.0252</td>
|
||||
<td>0.0300</td>
|
||||
<td>0.0300</td>
|
||||
<td>0.0247</td>
|
||||
<td>0.2796</td>
|
||||
<td>0.2504</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.1, 0.9)</th>
|
||||
<td>0.0309</td>
|
||||
<td>0.0302</td>
|
||||
<td>0.0251</td>
|
||||
<td>0.0279</td>
|
||||
<td>0.0279</td>
|
||||
<td>0.0250</td>
|
||||
<td>0.2722</td>
|
||||
<td>0.2430</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.15, 0.85)</th>
|
||||
<td>0.0310</td>
|
||||
<td>0.0339</td>
|
||||
<td>0.0245</td>
|
||||
<td>0.0269</td>
|
||||
<td>0.0269</td>
|
||||
<td>0.0244</td>
|
||||
<td>0.2684</td>
|
||||
<td>0.2392</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.2, 0.8)</th>
|
||||
<td>0.0411</td>
|
||||
<td>0.0407</td>
|
||||
<td>0.0259</td>
|
||||
<td>0.0292</td>
|
||||
<td>0.0292</td>
|
||||
<td>0.0257</td>
|
||||
<td>0.2724</td>
|
||||
<td>0.2432</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.25, 0.75)</th>
|
||||
<td>0.0381</td>
|
||||
<td>0.0376</td>
|
||||
<td>0.0262</td>
|
||||
<td>0.0319</td>
|
||||
<td>0.0319</td>
|
||||
<td>0.0259</td>
|
||||
<td>0.2701</td>
|
||||
<td>0.2409</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.3, 0.7)</th>
|
||||
<td>0.0442</td>
|
||||
<td>0.0452</td>
|
||||
<td>0.0254</td>
|
||||
<td>0.0273</td>
|
||||
<td>0.0273</td>
|
||||
<td>0.0256</td>
|
||||
<td>0.2650</td>
|
||||
<td>0.2358</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.35, 0.65)</th>
|
||||
<td>0.0480</td>
|
||||
<td>0.0498</td>
|
||||
<td>0.0236</td>
|
||||
<td>0.0257</td>
|
||||
<td>0.0257</td>
|
||||
<td>0.0235</td>
|
||||
<td>0.2640</td>
|
||||
<td>0.2347</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.4, 0.6)</th>
|
||||
<td>0.0401</td>
|
||||
<td>0.0431</td>
|
||||
<td>0.0222</td>
|
||||
<td>0.0296</td>
|
||||
<td>0.0296</td>
|
||||
<td>0.0220</td>
|
||||
<td>0.2654</td>
|
||||
<td>0.2361</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.45, 0.55)</th>
|
||||
<td>0.0551</td>
|
||||
<td>0.0558</td>
|
||||
<td>0.0243</td>
|
||||
<td>0.0295</td>
|
||||
<td>0.0295</td>
|
||||
<td>0.0246</td>
|
||||
<td>0.1838</td>
|
||||
<td>0.1551</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.5, 0.5)</th>
|
||||
<td>0.0499</td>
|
||||
<td>0.0513</td>
|
||||
<td>0.0308</td>
|
||||
<td>0.0319</td>
|
||||
<td>0.0319</td>
|
||||
<td>0.0309</td>
|
||||
<td>0.1472</td>
|
||||
<td>0.1202</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.55, 0.45)</th>
|
||||
<td>0.0538</td>
|
||||
<td>0.0542</td>
|
||||
<td>0.0278</td>
|
||||
<td>0.0329</td>
|
||||
<td>0.0329</td>
|
||||
<td>0.0280</td>
|
||||
<td>0.1717</td>
|
||||
<td>0.1459</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.6, 0.4)</th>
|
||||
<td>0.0476</td>
|
||||
<td>0.0484</td>
|
||||
<td>0.0258</td>
|
||||
<td>0.0298</td>
|
||||
<td>0.0298</td>
|
||||
<td>0.0259</td>
|
||||
<td>0.2434</td>
|
||||
<td>0.2147</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.65, 0.35)</th>
|
||||
<td>0.0447</td>
|
||||
<td>0.0474</td>
|
||||
<td>0.0287</td>
|
||||
<td>0.0332</td>
|
||||
<td>0.0332</td>
|
||||
<td>0.0288</td>
|
||||
<td>0.2632</td>
|
||||
<td>0.2340</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.7, 0.3)</th>
|
||||
<td>0.0388</td>
|
||||
<td>0.0397</td>
|
||||
<td>0.0295</td>
|
||||
<td>0.0328</td>
|
||||
<td>0.0328</td>
|
||||
<td>0.0296</td>
|
||||
<td>0.2659</td>
|
||||
<td>0.2367</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.75, 0.25)</th>
|
||||
<td>0.0336</td>
|
||||
<td>0.0399</td>
|
||||
<td>0.0241</td>
|
||||
<td>0.0293</td>
|
||||
<td>0.0293</td>
|
||||
<td>0.0244</td>
|
||||
<td>0.2612</td>
|
||||
<td>0.2320</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.8, 0.2)</th>
|
||||
<td>0.0407</td>
|
||||
<td>0.0447</td>
|
||||
<td>0.0266</td>
|
||||
<td>0.0303</td>
|
||||
<td>0.0303</td>
|
||||
<td>0.0271</td>
|
||||
<td>0.2601</td>
|
||||
<td>0.2309</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.85, 0.15)</th>
|
||||
<td>0.0383</td>
|
||||
<td>0.0423</td>
|
||||
<td>0.0219</td>
|
||||
<td>0.0278</td>
|
||||
<td>0.0278</td>
|
||||
<td>0.0220</td>
|
||||
<td>0.2670</td>
|
||||
<td>0.2378</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.9, 0.1)</th>
|
||||
<td>0.0351</td>
|
||||
<td>0.0387</td>
|
||||
<td>0.0244</td>
|
||||
<td>0.0275</td>
|
||||
<td>0.0275</td>
|
||||
<td>0.0245</td>
|
||||
<td>0.2618</td>
|
||||
<td>0.2326</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.95, 0.05)</th>
|
||||
<td>0.0238</td>
|
||||
<td>0.0263</td>
|
||||
<td>0.0269</td>
|
||||
<td>0.0296</td>
|
||||
<td>0.0296</td>
|
||||
<td>0.0272</td>
|
||||
<td>0.2602</td>
|
||||
<td>0.2310</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(1.0, 0.0)</th>
|
||||
<td>0.0118</td>
|
||||
<td>0.0202</td>
|
||||
<td>0.0241</td>
|
||||
<td>0.0279</td>
|
||||
<td>0.0279</td>
|
||||
<td>0.0244</td>
|
||||
<td>0.2571</td>
|
||||
<td>0.2279</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
<th></th>
|
||||
<th>bin</th>
|
||||
<th>mul</th>
|
||||
<th>kfcv</th>
|
||||
<th>atc_mc</th>
|
||||
<th>atc_ne</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>(0.0, 1.0)</th>
|
||||
<td>0.0088</td>
|
||||
<td>0.0100</td>
|
||||
<td>0.0580</td>
|
||||
<td>0.0183</td>
|
||||
<td>0.0183</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.05, 0.95)</th>
|
||||
<td>0.0175</td>
|
||||
<td>0.0159</td>
|
||||
<td>0.0605</td>
|
||||
<td>0.0193</td>
|
||||
<td>0.0193</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.1, 0.9)</th>
|
||||
<td>0.0184</td>
|
||||
<td>0.0176</td>
|
||||
<td>0.0532</td>
|
||||
<td>0.0189</td>
|
||||
<td>0.0189</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.15, 0.85)</th>
|
||||
<td>0.0188</td>
|
||||
<td>0.0204</td>
|
||||
<td>0.0475</td>
|
||||
<td>0.0180</td>
|
||||
<td>0.0180</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.2, 0.8)</th>
|
||||
<td>0.0269</td>
|
||||
<td>0.0266</td>
|
||||
<td>0.0455</td>
|
||||
<td>0.0206</td>
|
||||
<td>0.0206</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.25, 0.75)</th>
|
||||
<td>0.0265</td>
|
||||
<td>0.0261</td>
|
||||
<td>0.0401</td>
|
||||
<td>0.0242</td>
|
||||
<td>0.0242</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.3, 0.7)</th>
|
||||
<td>0.0328</td>
|
||||
<td>0.0336</td>
|
||||
<td>0.0331</td>
|
||||
<td>0.0208</td>
|
||||
<td>0.0208</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.35, 0.65)</th>
|
||||
<td>0.0386</td>
|
||||
<td>0.0394</td>
|
||||
<td>0.0307</td>
|
||||
<td>0.0211</td>
|
||||
<td>0.0211</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.4, 0.6)</th>
|
||||
<td>0.0343</td>
|
||||
<td>0.0371</td>
|
||||
<td>0.0273</td>
|
||||
<td>0.0265</td>
|
||||
<td>0.0265</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.45, 0.55)</th>
|
||||
<td>0.0511</td>
|
||||
<td>0.0512</td>
|
||||
<td>0.0231</td>
|
||||
<td>0.0275</td>
|
||||
<td>0.0275</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.5, 0.5)</th>
|
||||
<td>0.0517</td>
|
||||
<td>0.0529</td>
|
||||
<td>0.0306</td>
|
||||
<td>0.0319</td>
|
||||
<td>0.0319</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.55, 0.45)</th>
|
||||
<td>0.0584</td>
|
||||
<td>0.0583</td>
|
||||
<td>0.0308</td>
|
||||
<td>0.0354</td>
|
||||
<td>0.0354</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.6, 0.4)</th>
|
||||
<td>0.0590</td>
|
||||
<td>0.0599</td>
|
||||
<td>0.0363</td>
|
||||
<td>0.0357</td>
|
||||
<td>0.0357</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.65, 0.35)</th>
|
||||
<td>0.0635</td>
|
||||
<td>0.0662</td>
|
||||
<td>0.0506</td>
|
||||
<td>0.0440</td>
|
||||
<td>0.0440</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.7, 0.3)</th>
|
||||
<td>0.0596</td>
|
||||
<td>0.0638</td>
|
||||
<td>0.0654</td>
|
||||
<td>0.0457</td>
|
||||
<td>0.0457</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.75, 0.25)</th>
|
||||
<td>0.0627</td>
|
||||
<td>0.0744</td>
|
||||
<td>0.0964</td>
|
||||
<td>0.0461</td>
|
||||
<td>0.0461</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.8, 0.2)</th>
|
||||
<td>0.0909</td>
|
||||
<td>0.0999</td>
|
||||
<td>0.1400</td>
|
||||
<td>0.0629</td>
|
||||
<td>0.0629</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.85, 0.15)</th>
|
||||
<td>0.1052</td>
|
||||
<td>0.1126</td>
|
||||
<td>0.1829</td>
|
||||
<td>0.0727</td>
|
||||
<td>0.0727</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.9, 0.1)</th>
|
||||
<td>0.1377</td>
|
||||
<td>0.1481</td>
|
||||
<td>0.2839</td>
|
||||
<td>0.1215</td>
|
||||
<td>0.1215</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.95, 0.05)</th>
|
||||
<td>0.1305</td>
|
||||
<td>0.1450</td>
|
||||
<td>0.4592</td>
|
||||
<td>0.2037</td>
|
||||
<td>0.2037</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(1.0, 0.0)</th>
|
||||
<td>0.1092</td>
|
||||
<td>0.1387</td>
|
||||
<td>0.8818</td>
|
||||
<td>0.5267</td>
|
||||
<td>0.5267</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
|
34710
out_rcv1.md
34710
out_rcv1.md
File diff suppressed because it is too large
Load Diff
890
out_spambase.md
890
out_spambase.md
|
@ -1,445 +1,445 @@
|
|||
|
||||
<div>target: default</div>
|
||||
<div>train: [0.60621118 0.39378882]</div>
|
||||
<div>validation: [0.60559006 0.39440994]</div>
|
||||
<div>evaluate_binary: 31.883s</div>
|
||||
<div>evaluate_multiclass: 24.748s</div>
|
||||
<div>kfcv: 23.957s</div>
|
||||
<div>atc_mc: 36.062s</div>
|
||||
<div>atc_ne: 37.123s</div>
|
||||
<div>doc_feat: 7.063s</div>
|
||||
<div>rca_score: 148.420s</div>
|
||||
<div>rca_star_score: 145.690s</div>
|
||||
<div>tot: 149.118s</div>
|
||||
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
<th></th>
|
||||
<th>bin</th>
|
||||
<th>mul</th>
|
||||
<th>kfcv</th>
|
||||
<th>atc_mc</th>
|
||||
<th>atc_ne</th>
|
||||
<th>doc_feat</th>
|
||||
<th>rca</th>
|
||||
<th>rca_star</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>(0.0, 1.0)</th>
|
||||
<td>0.0411</td>
|
||||
<td>0.0907</td>
|
||||
<td>0.0208</td>
|
||||
<td>0.0267</td>
|
||||
<td>0.0267</td>
|
||||
<td>0.0204</td>
|
||||
<td>0.1106</td>
|
||||
<td>0.1059</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.05, 0.95)</th>
|
||||
<td>0.0392</td>
|
||||
<td>0.0897</td>
|
||||
<td>0.0216</td>
|
||||
<td>0.0266</td>
|
||||
<td>0.0266</td>
|
||||
<td>0.0211</td>
|
||||
<td>0.0523</td>
|
||||
<td>0.0510</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.1, 0.9)</th>
|
||||
<td>0.0371</td>
|
||||
<td>0.0891</td>
|
||||
<td>0.0232</td>
|
||||
<td>0.0267</td>
|
||||
<td>0.0267</td>
|
||||
<td>0.0227</td>
|
||||
<td>0.0347</td>
|
||||
<td>0.0354</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.15, 0.85)</th>
|
||||
<td>0.0464</td>
|
||||
<td>0.0853</td>
|
||||
<td>0.0226</td>
|
||||
<td>0.0257</td>
|
||||
<td>0.0257</td>
|
||||
<td>0.0222</td>
|
||||
<td>0.0315</td>
|
||||
<td>0.0341</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.2, 0.8)</th>
|
||||
<td>0.0414</td>
|
||||
<td>0.0757</td>
|
||||
<td>0.0202</td>
|
||||
<td>0.0249</td>
|
||||
<td>0.0249</td>
|
||||
<td>0.0200</td>
|
||||
<td>0.0280</td>
|
||||
<td>0.0302</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.25, 0.75)</th>
|
||||
<td>0.0468</td>
|
||||
<td>0.0768</td>
|
||||
<td>0.0204</td>
|
||||
<td>0.0250</td>
|
||||
<td>0.0250</td>
|
||||
<td>0.0201</td>
|
||||
<td>0.0335</td>
|
||||
<td>0.0376</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.3, 0.7)</th>
|
||||
<td>0.0384</td>
|
||||
<td>0.0739</td>
|
||||
<td>0.0201</td>
|
||||
<td>0.0252</td>
|
||||
<td>0.0252</td>
|
||||
<td>0.0200</td>
|
||||
<td>0.0349</td>
|
||||
<td>0.0410</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.35, 0.65)</th>
|
||||
<td>0.0386</td>
|
||||
<td>0.0715</td>
|
||||
<td>0.0198</td>
|
||||
<td>0.0239</td>
|
||||
<td>0.0239</td>
|
||||
<td>0.0196</td>
|
||||
<td>0.0376</td>
|
||||
<td>0.0448</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.4, 0.6)</th>
|
||||
<td>0.0392</td>
|
||||
<td>0.0657</td>
|
||||
<td>0.0199</td>
|
||||
<td>0.0249</td>
|
||||
<td>0.0249</td>
|
||||
<td>0.0197</td>
|
||||
<td>0.0315</td>
|
||||
<td>0.0391</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.45, 0.55)</th>
|
||||
<td>0.0380</td>
|
||||
<td>0.0679</td>
|
||||
<td>0.0213</td>
|
||||
<td>0.0258</td>
|
||||
<td>0.0258</td>
|
||||
<td>0.0212</td>
|
||||
<td>0.0358</td>
|
||||
<td>0.0450</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.5, 0.5)</th>
|
||||
<td>0.0400</td>
|
||||
<td>0.0670</td>
|
||||
<td>0.0218</td>
|
||||
<td>0.0228</td>
|
||||
<td>0.0228</td>
|
||||
<td>0.0217</td>
|
||||
<td>0.0441</td>
|
||||
<td>0.0550</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.55, 0.45)</th>
|
||||
<td>0.0403</td>
|
||||
<td>0.0686</td>
|
||||
<td>0.0203</td>
|
||||
<td>0.0237</td>
|
||||
<td>0.0237</td>
|
||||
<td>0.0200</td>
|
||||
<td>0.0398</td>
|
||||
<td>0.0507</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.6, 0.4)</th>
|
||||
<td>0.0432</td>
|
||||
<td>0.0625</td>
|
||||
<td>0.0201</td>
|
||||
<td>0.0245</td>
|
||||
<td>0.0245</td>
|
||||
<td>0.0200</td>
|
||||
<td>0.0370</td>
|
||||
<td>0.0487</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.65, 0.35)</th>
|
||||
<td>0.0384</td>
|
||||
<td>0.0620</td>
|
||||
<td>0.0195</td>
|
||||
<td>0.0236</td>
|
||||
<td>0.0236</td>
|
||||
<td>0.0195</td>
|
||||
<td>0.0356</td>
|
||||
<td>0.0460</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.7, 0.3)</th>
|
||||
<td>0.0304</td>
|
||||
<td>0.0570</td>
|
||||
<td>0.0236</td>
|
||||
<td>0.0227</td>
|
||||
<td>0.0227</td>
|
||||
<td>0.0236</td>
|
||||
<td>0.0302</td>
|
||||
<td>0.0396</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.75, 0.25)</th>
|
||||
<td>0.0321</td>
|
||||
<td>0.0614</td>
|
||||
<td>0.0187</td>
|
||||
<td>0.0273</td>
|
||||
<td>0.0273</td>
|
||||
<td>0.0187</td>
|
||||
<td>0.0332</td>
|
||||
<td>0.0439</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.8, 0.2)</th>
|
||||
<td>0.0300</td>
|
||||
<td>0.0555</td>
|
||||
<td>0.0221</td>
|
||||
<td>0.0230</td>
|
||||
<td>0.0230</td>
|
||||
<td>0.0222</td>
|
||||
<td>0.0287</td>
|
||||
<td>0.0340</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.85, 0.15)</th>
|
||||
<td>0.0325</td>
|
||||
<td>0.0540</td>
|
||||
<td>0.0224</td>
|
||||
<td>0.0229</td>
|
||||
<td>0.0229</td>
|
||||
<td>0.0225</td>
|
||||
<td>0.0342</td>
|
||||
<td>0.0360</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.9, 0.1)</th>
|
||||
<td>0.0262</td>
|
||||
<td>0.0518</td>
|
||||
<td>0.0211</td>
|
||||
<td>0.0238</td>
|
||||
<td>0.0238</td>
|
||||
<td>0.0211</td>
|
||||
<td>0.0483</td>
|
||||
<td>0.0469</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.95, 0.05)</th>
|
||||
<td>0.0243</td>
|
||||
<td>0.0576</td>
|
||||
<td>0.0197</td>
|
||||
<td>0.0240</td>
|
||||
<td>0.0240</td>
|
||||
<td>0.0196</td>
|
||||
<td>0.0806</td>
|
||||
<td>0.0746</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(1.0, 0.0)</th>
|
||||
<td>0.0146</td>
|
||||
<td>0.0597</td>
|
||||
<td>0.0231</td>
|
||||
<td>0.0244</td>
|
||||
<td>0.0244</td>
|
||||
<td>0.0232</td>
|
||||
<td>0.1600</td>
|
||||
<td>0.1515</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
<th></th>
|
||||
<th>bin</th>
|
||||
<th>mul</th>
|
||||
<th>kfcv</th>
|
||||
<th>atc_mc</th>
|
||||
<th>atc_ne</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>(0.0, 1.0)</th>
|
||||
<td>0.0239</td>
|
||||
<td>0.0477</td>
|
||||
<td>0.0345</td>
|
||||
<td>0.0162</td>
|
||||
<td>0.0162</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.05, 0.95)</th>
|
||||
<td>0.0235</td>
|
||||
<td>0.0496</td>
|
||||
<td>0.0320</td>
|
||||
<td>0.0169</td>
|
||||
<td>0.0169</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.1, 0.9)</th>
|
||||
<td>0.0230</td>
|
||||
<td>0.0520</td>
|
||||
<td>0.0289</td>
|
||||
<td>0.0171</td>
|
||||
<td>0.0171</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.15, 0.85)</th>
|
||||
<td>0.0308</td>
|
||||
<td>0.0528</td>
|
||||
<td>0.0274</td>
|
||||
<td>0.0171</td>
|
||||
<td>0.0171</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.2, 0.8)</th>
|
||||
<td>0.0286</td>
|
||||
<td>0.0490</td>
|
||||
<td>0.0291</td>
|
||||
<td>0.0186</td>
|
||||
<td>0.0186</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.25, 0.75)</th>
|
||||
<td>0.0346</td>
|
||||
<td>0.0534</td>
|
||||
<td>0.0255</td>
|
||||
<td>0.0186</td>
|
||||
<td>0.0186</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.3, 0.7)</th>
|
||||
<td>0.0299</td>
|
||||
<td>0.0545</td>
|
||||
<td>0.0232</td>
|
||||
<td>0.0205</td>
|
||||
<td>0.0205</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.35, 0.65)</th>
|
||||
<td>0.0335</td>
|
||||
<td>0.0566</td>
|
||||
<td>0.0217</td>
|
||||
<td>0.0211</td>
|
||||
<td>0.0211</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.4, 0.6)</th>
|
||||
<td>0.0360</td>
|
||||
<td>0.0562</td>
|
||||
<td>0.0217</td>
|
||||
<td>0.0226</td>
|
||||
<td>0.0226</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.45, 0.55)</th>
|
||||
<td>0.0372</td>
|
||||
<td>0.0626</td>
|
||||
<td>0.0213</td>
|
||||
<td>0.0246</td>
|
||||
<td>0.0246</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.5, 0.5)</th>
|
||||
<td>0.0437</td>
|
||||
<td>0.0677</td>
|
||||
<td>0.0223</td>
|
||||
<td>0.0241</td>
|
||||
<td>0.0241</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.55, 0.45)</th>
|
||||
<td>0.0486</td>
|
||||
<td>0.0762</td>
|
||||
<td>0.0241</td>
|
||||
<td>0.0269</td>
|
||||
<td>0.0269</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.6, 0.4)</th>
|
||||
<td>0.0572</td>
|
||||
<td>0.0779</td>
|
||||
<td>0.0290</td>
|
||||
<td>0.0312</td>
|
||||
<td>0.0312</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.65, 0.35)</th>
|
||||
<td>0.0580</td>
|
||||
<td>0.0866</td>
|
||||
<td>0.0340</td>
|
||||
<td>0.0341</td>
|
||||
<td>0.0341</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.7, 0.3)</th>
|
||||
<td>0.0546</td>
|
||||
<td>0.0919</td>
|
||||
<td>0.0420</td>
|
||||
<td>0.0374</td>
|
||||
<td>0.0374</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.75, 0.25)</th>
|
||||
<td>0.0636</td>
|
||||
<td>0.1161</td>
|
||||
<td>0.0689</td>
|
||||
<td>0.0533</td>
|
||||
<td>0.0533</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.8, 0.2)</th>
|
||||
<td>0.0750</td>
|
||||
<td>0.1192</td>
|
||||
<td>0.0768</td>
|
||||
<td>0.0560</td>
|
||||
<td>0.0560</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.85, 0.15)</th>
|
||||
<td>0.1031</td>
|
||||
<td>0.1580</td>
|
||||
<td>0.1244</td>
|
||||
<td>0.0728</td>
|
||||
<td>0.0728</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.9, 0.1)</th>
|
||||
<td>0.1175</td>
|
||||
<td>0.2412</td>
|
||||
<td>0.1885</td>
|
||||
<td>0.1100</td>
|
||||
<td>0.1100</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.95, 0.05)</th>
|
||||
<td>0.1877</td>
|
||||
<td>0.3434</td>
|
||||
<td>0.3579</td>
|
||||
<td>0.2053</td>
|
||||
<td>0.2053</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(1.0, 0.0)</th>
|
||||
<td>0.2717</td>
|
||||
<td>0.3136</td>
|
||||
<td>0.9178</td>
|
||||
<td>0.6264</td>
|
||||
<td>0.6264</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<div>target: default</div>
|
||||
<div>train: [0.60621118 0.39378882]</div>
|
||||
<div>validation: [0.60559006 0.39440994]</div>
|
||||
<div>evaluate_binary: 31.883s</div>
|
||||
<div>evaluate_multiclass: 24.748s</div>
|
||||
<div>kfcv: 23.957s</div>
|
||||
<div>atc_mc: 36.062s</div>
|
||||
<div>atc_ne: 37.123s</div>
|
||||
<div>doc_feat: 7.063s</div>
|
||||
<div>rca_score: 148.420s</div>
|
||||
<div>rca_star_score: 145.690s</div>
|
||||
<div>tot: 149.118s</div>
|
||||
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
<th></th>
|
||||
<th>bin</th>
|
||||
<th>mul</th>
|
||||
<th>kfcv</th>
|
||||
<th>atc_mc</th>
|
||||
<th>atc_ne</th>
|
||||
<th>doc_feat</th>
|
||||
<th>rca</th>
|
||||
<th>rca_star</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>(0.0, 1.0)</th>
|
||||
<td>0.0411</td>
|
||||
<td>0.0907</td>
|
||||
<td>0.0208</td>
|
||||
<td>0.0267</td>
|
||||
<td>0.0267</td>
|
||||
<td>0.0204</td>
|
||||
<td>0.1106</td>
|
||||
<td>0.1059</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.05, 0.95)</th>
|
||||
<td>0.0392</td>
|
||||
<td>0.0897</td>
|
||||
<td>0.0216</td>
|
||||
<td>0.0266</td>
|
||||
<td>0.0266</td>
|
||||
<td>0.0211</td>
|
||||
<td>0.0523</td>
|
||||
<td>0.0510</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.1, 0.9)</th>
|
||||
<td>0.0371</td>
|
||||
<td>0.0891</td>
|
||||
<td>0.0232</td>
|
||||
<td>0.0267</td>
|
||||
<td>0.0267</td>
|
||||
<td>0.0227</td>
|
||||
<td>0.0347</td>
|
||||
<td>0.0354</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.15, 0.85)</th>
|
||||
<td>0.0464</td>
|
||||
<td>0.0853</td>
|
||||
<td>0.0226</td>
|
||||
<td>0.0257</td>
|
||||
<td>0.0257</td>
|
||||
<td>0.0222</td>
|
||||
<td>0.0315</td>
|
||||
<td>0.0341</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.2, 0.8)</th>
|
||||
<td>0.0414</td>
|
||||
<td>0.0757</td>
|
||||
<td>0.0202</td>
|
||||
<td>0.0249</td>
|
||||
<td>0.0249</td>
|
||||
<td>0.0200</td>
|
||||
<td>0.0280</td>
|
||||
<td>0.0302</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.25, 0.75)</th>
|
||||
<td>0.0468</td>
|
||||
<td>0.0768</td>
|
||||
<td>0.0204</td>
|
||||
<td>0.0250</td>
|
||||
<td>0.0250</td>
|
||||
<td>0.0201</td>
|
||||
<td>0.0335</td>
|
||||
<td>0.0376</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.3, 0.7)</th>
|
||||
<td>0.0384</td>
|
||||
<td>0.0739</td>
|
||||
<td>0.0201</td>
|
||||
<td>0.0252</td>
|
||||
<td>0.0252</td>
|
||||
<td>0.0200</td>
|
||||
<td>0.0349</td>
|
||||
<td>0.0410</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.35, 0.65)</th>
|
||||
<td>0.0386</td>
|
||||
<td>0.0715</td>
|
||||
<td>0.0198</td>
|
||||
<td>0.0239</td>
|
||||
<td>0.0239</td>
|
||||
<td>0.0196</td>
|
||||
<td>0.0376</td>
|
||||
<td>0.0448</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.4, 0.6)</th>
|
||||
<td>0.0392</td>
|
||||
<td>0.0657</td>
|
||||
<td>0.0199</td>
|
||||
<td>0.0249</td>
|
||||
<td>0.0249</td>
|
||||
<td>0.0197</td>
|
||||
<td>0.0315</td>
|
||||
<td>0.0391</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.45, 0.55)</th>
|
||||
<td>0.0380</td>
|
||||
<td>0.0679</td>
|
||||
<td>0.0213</td>
|
||||
<td>0.0258</td>
|
||||
<td>0.0258</td>
|
||||
<td>0.0212</td>
|
||||
<td>0.0358</td>
|
||||
<td>0.0450</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.5, 0.5)</th>
|
||||
<td>0.0400</td>
|
||||
<td>0.0670</td>
|
||||
<td>0.0218</td>
|
||||
<td>0.0228</td>
|
||||
<td>0.0228</td>
|
||||
<td>0.0217</td>
|
||||
<td>0.0441</td>
|
||||
<td>0.0550</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.55, 0.45)</th>
|
||||
<td>0.0403</td>
|
||||
<td>0.0686</td>
|
||||
<td>0.0203</td>
|
||||
<td>0.0237</td>
|
||||
<td>0.0237</td>
|
||||
<td>0.0200</td>
|
||||
<td>0.0398</td>
|
||||
<td>0.0507</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.6, 0.4)</th>
|
||||
<td>0.0432</td>
|
||||
<td>0.0625</td>
|
||||
<td>0.0201</td>
|
||||
<td>0.0245</td>
|
||||
<td>0.0245</td>
|
||||
<td>0.0200</td>
|
||||
<td>0.0370</td>
|
||||
<td>0.0487</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.65, 0.35)</th>
|
||||
<td>0.0384</td>
|
||||
<td>0.0620</td>
|
||||
<td>0.0195</td>
|
||||
<td>0.0236</td>
|
||||
<td>0.0236</td>
|
||||
<td>0.0195</td>
|
||||
<td>0.0356</td>
|
||||
<td>0.0460</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.7, 0.3)</th>
|
||||
<td>0.0304</td>
|
||||
<td>0.0570</td>
|
||||
<td>0.0236</td>
|
||||
<td>0.0227</td>
|
||||
<td>0.0227</td>
|
||||
<td>0.0236</td>
|
||||
<td>0.0302</td>
|
||||
<td>0.0396</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.75, 0.25)</th>
|
||||
<td>0.0321</td>
|
||||
<td>0.0614</td>
|
||||
<td>0.0187</td>
|
||||
<td>0.0273</td>
|
||||
<td>0.0273</td>
|
||||
<td>0.0187</td>
|
||||
<td>0.0332</td>
|
||||
<td>0.0439</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.8, 0.2)</th>
|
||||
<td>0.0300</td>
|
||||
<td>0.0555</td>
|
||||
<td>0.0221</td>
|
||||
<td>0.0230</td>
|
||||
<td>0.0230</td>
|
||||
<td>0.0222</td>
|
||||
<td>0.0287</td>
|
||||
<td>0.0340</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.85, 0.15)</th>
|
||||
<td>0.0325</td>
|
||||
<td>0.0540</td>
|
||||
<td>0.0224</td>
|
||||
<td>0.0229</td>
|
||||
<td>0.0229</td>
|
||||
<td>0.0225</td>
|
||||
<td>0.0342</td>
|
||||
<td>0.0360</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.9, 0.1)</th>
|
||||
<td>0.0262</td>
|
||||
<td>0.0518</td>
|
||||
<td>0.0211</td>
|
||||
<td>0.0238</td>
|
||||
<td>0.0238</td>
|
||||
<td>0.0211</td>
|
||||
<td>0.0483</td>
|
||||
<td>0.0469</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.95, 0.05)</th>
|
||||
<td>0.0243</td>
|
||||
<td>0.0576</td>
|
||||
<td>0.0197</td>
|
||||
<td>0.0240</td>
|
||||
<td>0.0240</td>
|
||||
<td>0.0196</td>
|
||||
<td>0.0806</td>
|
||||
<td>0.0746</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(1.0, 0.0)</th>
|
||||
<td>0.0146</td>
|
||||
<td>0.0597</td>
|
||||
<td>0.0231</td>
|
||||
<td>0.0244</td>
|
||||
<td>0.0244</td>
|
||||
<td>0.0232</td>
|
||||
<td>0.1600</td>
|
||||
<td>0.1515</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
<th></th>
|
||||
<th>bin</th>
|
||||
<th>mul</th>
|
||||
<th>kfcv</th>
|
||||
<th>atc_mc</th>
|
||||
<th>atc_ne</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>(0.0, 1.0)</th>
|
||||
<td>0.0239</td>
|
||||
<td>0.0477</td>
|
||||
<td>0.0345</td>
|
||||
<td>0.0162</td>
|
||||
<td>0.0162</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.05, 0.95)</th>
|
||||
<td>0.0235</td>
|
||||
<td>0.0496</td>
|
||||
<td>0.0320</td>
|
||||
<td>0.0169</td>
|
||||
<td>0.0169</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.1, 0.9)</th>
|
||||
<td>0.0230</td>
|
||||
<td>0.0520</td>
|
||||
<td>0.0289</td>
|
||||
<td>0.0171</td>
|
||||
<td>0.0171</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.15, 0.85)</th>
|
||||
<td>0.0308</td>
|
||||
<td>0.0528</td>
|
||||
<td>0.0274</td>
|
||||
<td>0.0171</td>
|
||||
<td>0.0171</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.2, 0.8)</th>
|
||||
<td>0.0286</td>
|
||||
<td>0.0490</td>
|
||||
<td>0.0291</td>
|
||||
<td>0.0186</td>
|
||||
<td>0.0186</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.25, 0.75)</th>
|
||||
<td>0.0346</td>
|
||||
<td>0.0534</td>
|
||||
<td>0.0255</td>
|
||||
<td>0.0186</td>
|
||||
<td>0.0186</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.3, 0.7)</th>
|
||||
<td>0.0299</td>
|
||||
<td>0.0545</td>
|
||||
<td>0.0232</td>
|
||||
<td>0.0205</td>
|
||||
<td>0.0205</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.35, 0.65)</th>
|
||||
<td>0.0335</td>
|
||||
<td>0.0566</td>
|
||||
<td>0.0217</td>
|
||||
<td>0.0211</td>
|
||||
<td>0.0211</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.4, 0.6)</th>
|
||||
<td>0.0360</td>
|
||||
<td>0.0562</td>
|
||||
<td>0.0217</td>
|
||||
<td>0.0226</td>
|
||||
<td>0.0226</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.45, 0.55)</th>
|
||||
<td>0.0372</td>
|
||||
<td>0.0626</td>
|
||||
<td>0.0213</td>
|
||||
<td>0.0246</td>
|
||||
<td>0.0246</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.5, 0.5)</th>
|
||||
<td>0.0437</td>
|
||||
<td>0.0677</td>
|
||||
<td>0.0223</td>
|
||||
<td>0.0241</td>
|
||||
<td>0.0241</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.55, 0.45)</th>
|
||||
<td>0.0486</td>
|
||||
<td>0.0762</td>
|
||||
<td>0.0241</td>
|
||||
<td>0.0269</td>
|
||||
<td>0.0269</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.6, 0.4)</th>
|
||||
<td>0.0572</td>
|
||||
<td>0.0779</td>
|
||||
<td>0.0290</td>
|
||||
<td>0.0312</td>
|
||||
<td>0.0312</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.65, 0.35)</th>
|
||||
<td>0.0580</td>
|
||||
<td>0.0866</td>
|
||||
<td>0.0340</td>
|
||||
<td>0.0341</td>
|
||||
<td>0.0341</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.7, 0.3)</th>
|
||||
<td>0.0546</td>
|
||||
<td>0.0919</td>
|
||||
<td>0.0420</td>
|
||||
<td>0.0374</td>
|
||||
<td>0.0374</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.75, 0.25)</th>
|
||||
<td>0.0636</td>
|
||||
<td>0.1161</td>
|
||||
<td>0.0689</td>
|
||||
<td>0.0533</td>
|
||||
<td>0.0533</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.8, 0.2)</th>
|
||||
<td>0.0750</td>
|
||||
<td>0.1192</td>
|
||||
<td>0.0768</td>
|
||||
<td>0.0560</td>
|
||||
<td>0.0560</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.85, 0.15)</th>
|
||||
<td>0.1031</td>
|
||||
<td>0.1580</td>
|
||||
<td>0.1244</td>
|
||||
<td>0.0728</td>
|
||||
<td>0.0728</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.9, 0.1)</th>
|
||||
<td>0.1175</td>
|
||||
<td>0.2412</td>
|
||||
<td>0.1885</td>
|
||||
<td>0.1100</td>
|
||||
<td>0.1100</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(0.95, 0.05)</th>
|
||||
<td>0.1877</td>
|
||||
<td>0.3434</td>
|
||||
<td>0.3579</td>
|
||||
<td>0.2053</td>
|
||||
<td>0.2053</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>(1.0, 0.0)</th>
|
||||
<td>0.2717</td>
|
||||
<td>0.3136</td>
|
||||
<td>0.9178</td>
|
||||
<td>0.6264</td>
|
||||
<td>0.6264</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,40 +1,40 @@
|
|||
[tool.poetry]
|
||||
name = "quacc"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["Lorenzo Volpi <lorenzo.volpi@outlook.com>"]
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.11"
|
||||
quapy = "^0.1.7"
|
||||
pandas = "^2.0.3"
|
||||
jinja2 = "^3.1.2"
|
||||
pyyaml = "^6.0.1"
|
||||
logging = "^0.4.9.6"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
main = "quacc.main:main"
|
||||
comp = "quacc.main:estimate_comparison"
|
||||
tohost = "scp_sync:scp_sync_to_host"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^7.4.0"
|
||||
pylance = "^0.5.9"
|
||||
pytest-mock = "^3.11.1"
|
||||
pytest-cov = "^4.1.0"
|
||||
win11toast = "^0.32"
|
||||
tabulate = "^0.9.0"
|
||||
paramiko = "^3.3.1"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = "--cov=quacc --capture=tee-sys"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[virtualenvs]
|
||||
in-project = true
|
||||
|
||||
[tool.poetry]
|
||||
name = "quacc"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["Lorenzo Volpi <lorenzo.volpi@outlook.com>"]
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.11"
|
||||
quapy = "^0.1.7"
|
||||
pandas = "^2.0.3"
|
||||
jinja2 = "^3.1.2"
|
||||
pyyaml = "^6.0.1"
|
||||
logging = "^0.4.9.6"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
main = "quacc.main:main"
|
||||
comp = "quacc.main:estimate_comparison"
|
||||
tohost = "scp_sync:scp_sync_to_host"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^7.4.0"
|
||||
pylance = "^0.5.9"
|
||||
pytest-mock = "^3.11.1"
|
||||
pytest-cov = "^4.1.0"
|
||||
win11toast = "^0.32"
|
||||
tabulate = "^0.9.0"
|
||||
paramiko = "^3.3.1"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = "--cov=quacc --capture=tee-sys"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[virtualenvs]
|
||||
in-project = true
|
||||
|
||||
|
|
300
quacc/data.py
300
quacc/data.py
|
@ -1,150 +1,150 @@
|
|||
import math
|
||||
from typing import List, Optional
|
||||
|
||||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
from quapy.data import LabelledCollection
|
||||
|
||||
|
||||
# Extended classes
|
||||
#
|
||||
# 0 ~ True 0
|
||||
# 1 ~ False 1
|
||||
# 2 ~ False 0
|
||||
# 3 ~ True 1
|
||||
# _____________________
|
||||
# | | |
|
||||
# | True 0 | False 1 |
|
||||
# |__________|__________|
|
||||
# | | |
|
||||
# | False 0 | True 1 |
|
||||
# |__________|__________|
|
||||
#
|
||||
class ExClassManager:
|
||||
@staticmethod
|
||||
def get_ex(n_classes: int, true_class: int, pred_class: int) -> int:
|
||||
return true_class * n_classes + pred_class
|
||||
|
||||
@staticmethod
|
||||
def get_pred(n_classes: int, ex_class: int) -> int:
|
||||
return ex_class % n_classes
|
||||
|
||||
@staticmethod
|
||||
def get_true(n_classes: int, ex_class: int) -> int:
|
||||
return ex_class // n_classes
|
||||
|
||||
|
||||
class ExtendedCollection(LabelledCollection):
|
||||
def __init__(
|
||||
self,
|
||||
instances: np.ndarray | sp.csr_matrix,
|
||||
labels: np.ndarray,
|
||||
classes: Optional[List] = None,
|
||||
):
|
||||
super().__init__(instances, labels, classes=classes)
|
||||
|
||||
def split_by_pred(self):
|
||||
_ncl = int(math.sqrt(self.n_classes))
|
||||
_indexes = ExtendedCollection._split_index_by_pred(_ncl, self.instances)
|
||||
if isinstance(self.instances, np.ndarray):
|
||||
_instances = [
|
||||
self.instances[ind] if ind.shape[0] > 0 else np.asarray([], dtype=int)
|
||||
for ind in _indexes
|
||||
]
|
||||
elif isinstance(self.instances, sp.csr_matrix):
|
||||
_instances = [
|
||||
self.instances[ind]
|
||||
if ind.shape[0] > 0
|
||||
else sp.csr_matrix(np.empty((0, 0), dtype=int))
|
||||
for ind in _indexes
|
||||
]
|
||||
_labels = [
|
||||
np.asarray(
|
||||
[
|
||||
ExClassManager.get_true(_ncl, lbl)
|
||||
for lbl in (self.labels[ind] if len(ind) > 0 else [])
|
||||
],
|
||||
dtype=int,
|
||||
)
|
||||
for ind in _indexes
|
||||
]
|
||||
return [
|
||||
ExtendedCollection(inst, lbl, classes=range(0, _ncl))
|
||||
for (inst, lbl) in zip(_instances, _labels)
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def split_inst_by_pred(
|
||||
cls, n_classes: int, instances: np.ndarray | sp.csr_matrix
|
||||
) -> (List[np.ndarray | sp.csr_matrix], List[float]):
|
||||
_indexes = cls._split_index_by_pred(n_classes, instances)
|
||||
if isinstance(instances, np.ndarray):
|
||||
_instances = [
|
||||
instances[ind] if ind.shape[0] > 0 else np.asarray([], dtype=int)
|
||||
for ind in _indexes
|
||||
]
|
||||
elif isinstance(instances, sp.csr_matrix):
|
||||
_instances = [
|
||||
instances[ind]
|
||||
if ind.shape[0] > 0
|
||||
else sp.csr_matrix(np.empty((0, 0), dtype=int))
|
||||
for ind in _indexes
|
||||
]
|
||||
norms = [inst.shape[0] / instances.shape[0] for inst in _instances]
|
||||
return _instances, norms
|
||||
|
||||
@classmethod
|
||||
def _split_index_by_pred(
|
||||
cls, n_classes: int, instances: np.ndarray | sp.csr_matrix
|
||||
) -> List[np.ndarray]:
|
||||
if isinstance(instances, np.ndarray):
|
||||
_pred_label = [np.argmax(inst[-n_classes:], axis=0) for inst in instances]
|
||||
elif isinstance(instances, sp.csr_matrix):
|
||||
_pred_label = [
|
||||
np.argmax(inst[:, -n_classes:].toarray().flatten(), axis=0)
|
||||
for inst in instances
|
||||
]
|
||||
else:
|
||||
raise ValueError("Unsupported matrix format")
|
||||
|
||||
return [
|
||||
np.asarray([j for (j, x) in enumerate(_pred_label) if x == i], dtype=int)
|
||||
for i in range(0, n_classes)
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def extend_instances(
|
||||
cls, instances: np.ndarray | sp.csr_matrix, pred_proba: np.ndarray
|
||||
) -> np.ndarray | sp.csr_matrix:
|
||||
if isinstance(instances, sp.csr_matrix):
|
||||
_pred_proba = sp.csr_matrix(pred_proba)
|
||||
n_x = sp.hstack([instances, _pred_proba])
|
||||
elif isinstance(instances, np.ndarray):
|
||||
n_x = np.concatenate((instances, pred_proba), axis=1)
|
||||
else:
|
||||
raise ValueError("Unsupported matrix format")
|
||||
|
||||
return n_x
|
||||
|
||||
@classmethod
|
||||
def extend_collection(
|
||||
cls,
|
||||
base: LabelledCollection,
|
||||
pred_proba: np.ndarray,
|
||||
):
|
||||
n_classes = base.n_classes
|
||||
|
||||
# n_X = [ X | predicted probs. ]
|
||||
n_x = cls.extend_instances(base.X, pred_proba)
|
||||
|
||||
# n_y = (exptected y, predicted y)
|
||||
pred_proba = pred_proba[:, -n_classes:]
|
||||
preds = np.argmax(pred_proba, axis=-1)
|
||||
n_y = np.asarray(
|
||||
[
|
||||
ExClassManager.get_ex(n_classes, true_class, pred_class)
|
||||
for (true_class, pred_class) in zip(base.y, preds)
|
||||
]
|
||||
)
|
||||
|
||||
return ExtendedCollection(n_x, n_y, classes=[*range(0, n_classes * n_classes)])
|
||||
import math
|
||||
from typing import List, Optional
|
||||
|
||||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
from quapy.data import LabelledCollection
|
||||
|
||||
|
||||
# Extended classes
|
||||
#
|
||||
# 0 ~ True 0
|
||||
# 1 ~ False 1
|
||||
# 2 ~ False 0
|
||||
# 3 ~ True 1
|
||||
# _____________________
|
||||
# | | |
|
||||
# | True 0 | False 1 |
|
||||
# |__________|__________|
|
||||
# | | |
|
||||
# | False 0 | True 1 |
|
||||
# |__________|__________|
|
||||
#
|
||||
class ExClassManager:
|
||||
@staticmethod
|
||||
def get_ex(n_classes: int, true_class: int, pred_class: int) -> int:
|
||||
return true_class * n_classes + pred_class
|
||||
|
||||
@staticmethod
|
||||
def get_pred(n_classes: int, ex_class: int) -> int:
|
||||
return ex_class % n_classes
|
||||
|
||||
@staticmethod
|
||||
def get_true(n_classes: int, ex_class: int) -> int:
|
||||
return ex_class // n_classes
|
||||
|
||||
|
||||
class ExtendedCollection(LabelledCollection):
|
||||
def __init__(
|
||||
self,
|
||||
instances: np.ndarray | sp.csr_matrix,
|
||||
labels: np.ndarray,
|
||||
classes: Optional[List] = None,
|
||||
):
|
||||
super().__init__(instances, labels, classes=classes)
|
||||
|
||||
def split_by_pred(self):
|
||||
_ncl = int(math.sqrt(self.n_classes))
|
||||
_indexes = ExtendedCollection._split_index_by_pred(_ncl, self.instances)
|
||||
if isinstance(self.instances, np.ndarray):
|
||||
_instances = [
|
||||
self.instances[ind] if ind.shape[0] > 0 else np.asarray([], dtype=int)
|
||||
for ind in _indexes
|
||||
]
|
||||
elif isinstance(self.instances, sp.csr_matrix):
|
||||
_instances = [
|
||||
self.instances[ind]
|
||||
if ind.shape[0] > 0
|
||||
else sp.csr_matrix(np.empty((0, 0), dtype=int))
|
||||
for ind in _indexes
|
||||
]
|
||||
_labels = [
|
||||
np.asarray(
|
||||
[
|
||||
ExClassManager.get_true(_ncl, lbl)
|
||||
for lbl in (self.labels[ind] if len(ind) > 0 else [])
|
||||
],
|
||||
dtype=int,
|
||||
)
|
||||
for ind in _indexes
|
||||
]
|
||||
return [
|
||||
ExtendedCollection(inst, lbl, classes=range(0, _ncl))
|
||||
for (inst, lbl) in zip(_instances, _labels)
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def split_inst_by_pred(
|
||||
cls, n_classes: int, instances: np.ndarray | sp.csr_matrix
|
||||
) -> (List[np.ndarray | sp.csr_matrix], List[float]):
|
||||
_indexes = cls._split_index_by_pred(n_classes, instances)
|
||||
if isinstance(instances, np.ndarray):
|
||||
_instances = [
|
||||
instances[ind] if ind.shape[0] > 0 else np.asarray([], dtype=int)
|
||||
for ind in _indexes
|
||||
]
|
||||
elif isinstance(instances, sp.csr_matrix):
|
||||
_instances = [
|
||||
instances[ind]
|
||||
if ind.shape[0] > 0
|
||||
else sp.csr_matrix(np.empty((0, 0), dtype=int))
|
||||
for ind in _indexes
|
||||
]
|
||||
norms = [inst.shape[0] / instances.shape[0] for inst in _instances]
|
||||
return _instances, norms
|
||||
|
||||
@classmethod
|
||||
def _split_index_by_pred(
|
||||
cls, n_classes: int, instances: np.ndarray | sp.csr_matrix
|
||||
) -> List[np.ndarray]:
|
||||
if isinstance(instances, np.ndarray):
|
||||
_pred_label = [np.argmax(inst[-n_classes:], axis=0) for inst in instances]
|
||||
elif isinstance(instances, sp.csr_matrix):
|
||||
_pred_label = [
|
||||
np.argmax(inst[:, -n_classes:].toarray().flatten(), axis=0)
|
||||
for inst in instances
|
||||
]
|
||||
else:
|
||||
raise ValueError("Unsupported matrix format")
|
||||
|
||||
return [
|
||||
np.asarray([j for (j, x) in enumerate(_pred_label) if x == i], dtype=int)
|
||||
for i in range(0, n_classes)
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def extend_instances(
|
||||
cls, instances: np.ndarray | sp.csr_matrix, pred_proba: np.ndarray
|
||||
) -> np.ndarray | sp.csr_matrix:
|
||||
if isinstance(instances, sp.csr_matrix):
|
||||
_pred_proba = sp.csr_matrix(pred_proba)
|
||||
n_x = sp.hstack([instances, _pred_proba])
|
||||
elif isinstance(instances, np.ndarray):
|
||||
n_x = np.concatenate((instances, pred_proba), axis=1)
|
||||
else:
|
||||
raise ValueError("Unsupported matrix format")
|
||||
|
||||
return n_x
|
||||
|
||||
@classmethod
|
||||
def extend_collection(
|
||||
cls,
|
||||
base: LabelledCollection,
|
||||
pred_proba: np.ndarray,
|
||||
):
|
||||
n_classes = base.n_classes
|
||||
|
||||
# n_X = [ X | predicted probs. ]
|
||||
n_x = cls.extend_instances(base.X, pred_proba)
|
||||
|
||||
# n_y = (exptected y, predicted y)
|
||||
pred_proba = pred_proba[:, -n_classes:]
|
||||
preds = np.argmax(pred_proba, axis=-1)
|
||||
n_y = np.asarray(
|
||||
[
|
||||
ExClassManager.get_ex(n_classes, true_class, pred_class)
|
||||
for (true_class, pred_class) in zip(base.y, preds)
|
||||
]
|
||||
)
|
||||
|
||||
return ExtendedCollection(n_x, n_y, classes=[*range(0, n_classes * n_classes)])
|
||||
|
|
342
quacc/dataset.py
342
quacc/dataset.py
|
@ -1,171 +1,171 @@
|
|||
import math
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import quapy as qp
|
||||
from quapy.data.base import LabelledCollection
|
||||
from sklearn.conftest import fetch_rcv1
|
||||
|
||||
TRAIN_VAL_PROP = 0.5
|
||||
|
||||
|
||||
class DatasetSample:
|
||||
def __init__(
|
||||
self,
|
||||
train: LabelledCollection,
|
||||
validation: LabelledCollection,
|
||||
test: LabelledCollection,
|
||||
):
|
||||
self.train = train
|
||||
self.validation = validation
|
||||
self.test = test
|
||||
|
||||
@property
|
||||
def train_prev(self):
|
||||
return self.train.prevalence()
|
||||
|
||||
@property
|
||||
def validation_prev(self):
|
||||
return self.validation.prevalence()
|
||||
|
||||
@property
|
||||
def prevs(self):
|
||||
return {"train": self.train_prev, "validation": self.validation_prev}
|
||||
|
||||
|
||||
class Dataset:
|
||||
def __init__(self, name, n_prevalences=9, prevs=None, target=None):
|
||||
self._name = name
|
||||
self._target = target
|
||||
|
||||
self.prevs = None
|
||||
self.n_prevs = n_prevalences
|
||||
if prevs is not None:
|
||||
prevs = np.unique([p for p in prevs if p > 0.0 and p < 1.0])
|
||||
if prevs.shape[0] > 0:
|
||||
self.prevs = np.sort(prevs)
|
||||
self.n_prevs = self.prevs.shape[0]
|
||||
|
||||
def __spambase(self):
|
||||
return qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test
|
||||
|
||||
# provare min_df=5
|
||||
def __imdb(self):
|
||||
return qp.datasets.fetch_reviews("imdb", tfidf=True, min_df=3).train_test
|
||||
|
||||
def __rcv1(self):
|
||||
n_train = 23149
|
||||
available_targets = ["CCAT", "GCAT", "MCAT"]
|
||||
|
||||
if self._target is None or self._target not in available_targets:
|
||||
raise ValueError(f"Invalid target {self._target}")
|
||||
|
||||
dataset = fetch_rcv1()
|
||||
target_index = np.where(dataset.target_names == self._target)[0]
|
||||
all_train_d = dataset.data[:n_train, :]
|
||||
test_d = dataset.data[n_train:, :]
|
||||
labels = dataset.target[:, target_index].toarray().flatten()
|
||||
all_train_l, test_l = labels[:n_train], labels[n_train:]
|
||||
all_train = LabelledCollection(all_train_d, all_train_l, classes=[0, 1])
|
||||
test = LabelledCollection(test_d, test_l, classes=[0, 1])
|
||||
|
||||
return all_train, test
|
||||
|
||||
def get_raw(self) -> DatasetSample:
|
||||
all_train, test = {
|
||||
"spambase": self.__spambase,
|
||||
"imdb": self.__imdb,
|
||||
"rcv1": self.__rcv1,
|
||||
}[self._name]()
|
||||
|
||||
train, val = all_train.split_stratified(
|
||||
train_prop=TRAIN_VAL_PROP, random_state=0
|
||||
)
|
||||
|
||||
return DatasetSample(train, val, test)
|
||||
|
||||
def get(self) -> List[DatasetSample]:
|
||||
(all_train, test) = {
|
||||
"spambase": self.__spambase,
|
||||
"imdb": self.__imdb,
|
||||
"rcv1": self.__rcv1,
|
||||
}[self._name]()
|
||||
|
||||
# resample all_train set to have (0.5, 0.5) prevalence
|
||||
at_positives = np.sum(all_train.y)
|
||||
all_train = all_train.sampling(
|
||||
min(at_positives, len(all_train) - at_positives) * 2, 0.5, random_state=0
|
||||
)
|
||||
|
||||
# sample prevalences
|
||||
if self.prevs is not None:
|
||||
prevs = self.prevs
|
||||
else:
|
||||
prevs = np.linspace(0.0, 1.0, num=self.n_prevs + 1, endpoint=False)[1:]
|
||||
|
||||
at_size = min(math.floor(len(all_train) * 0.5 / p) for p in prevs)
|
||||
datasets = []
|
||||
for p in 1.0 - prevs:
|
||||
all_train_sampled = all_train.sampling(at_size, p, random_state=0)
|
||||
train, validation = all_train_sampled.split_stratified(
|
||||
train_prop=TRAIN_VAL_PROP, random_state=0
|
||||
)
|
||||
datasets.append(DatasetSample(train, validation, test))
|
||||
|
||||
return datasets
|
||||
|
||||
def __call__(self):
|
||||
return self.get()
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return (
|
||||
f"{self._name}_{self._target}_{self.n_prevs}prevs"
|
||||
if self._name == "rcv1"
|
||||
else f"{self._name}_{self.n_prevs}prevs"
|
||||
)
|
||||
|
||||
|
||||
# >>> fetch_rcv1().target_names
|
||||
# array(['C11', 'C12', 'C13', 'C14', 'C15', 'C151', 'C1511', 'C152', 'C16',
|
||||
# 'C17', 'C171', 'C172', 'C173', 'C174', 'C18', 'C181', 'C182',
|
||||
# 'C183', 'C21', 'C22', 'C23', 'C24', 'C31', 'C311', 'C312', 'C313',
|
||||
# 'C32', 'C33', 'C331', 'C34', 'C41', 'C411', 'C42', 'CCAT', 'E11',
|
||||
# 'E12', 'E121', 'E13', 'E131', 'E132', 'E14', 'E141', 'E142',
|
||||
# 'E143', 'E21', 'E211', 'E212', 'E31', 'E311', 'E312', 'E313',
|
||||
# 'E41', 'E411', 'E51', 'E511', 'E512', 'E513', 'E61', 'E71', 'ECAT',
|
||||
# 'G15', 'G151', 'G152', 'G153', 'G154', 'G155', 'G156', 'G157',
|
||||
# 'G158', 'G159', 'GCAT', 'GCRIM', 'GDEF', 'GDIP', 'GDIS', 'GENT',
|
||||
# 'GENV', 'GFAS', 'GHEA', 'GJOB', 'GMIL', 'GOBIT', 'GODD', 'GPOL',
|
||||
# 'GPRO', 'GREL', 'GSCI', 'GSPO', 'GTOUR', 'GVIO', 'GVOTE', 'GWEA',
|
||||
# 'GWELF', 'M11', 'M12', 'M13', 'M131', 'M132', 'M14', 'M141',
|
||||
# 'M142', 'M143', 'MCAT'], dtype=object)
|
||||
|
||||
|
||||
def rcv1_info():
|
||||
dataset = fetch_rcv1()
|
||||
n_train = 23149
|
||||
|
||||
targets = []
|
||||
for target in range(103):
|
||||
train_t_prev = np.average(dataset.target[:n_train, target].toarray().flatten())
|
||||
test_t_prev = np.average(dataset.target[n_train:, target].toarray().flatten())
|
||||
targets.append(
|
||||
(
|
||||
dataset.target_names[target],
|
||||
{
|
||||
"train": (1.0 - train_t_prev, train_t_prev),
|
||||
"test": (1.0 - test_t_prev, test_t_prev),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
targets.sort(key=lambda t: t[1]["train"][1])
|
||||
for n, d in targets:
|
||||
print(f"{n}:")
|
||||
for k, (fp, tp) in d.items():
|
||||
print(f"\t{k}: {fp:.4f}, {tp:.4f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
rcv1_info()
|
||||
import math
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import quapy as qp
|
||||
from quapy.data.base import LabelledCollection
|
||||
from sklearn.conftest import fetch_rcv1
|
||||
|
||||
TRAIN_VAL_PROP = 0.5
|
||||
|
||||
|
||||
class DatasetSample:
|
||||
def __init__(
|
||||
self,
|
||||
train: LabelledCollection,
|
||||
validation: LabelledCollection,
|
||||
test: LabelledCollection,
|
||||
):
|
||||
self.train = train
|
||||
self.validation = validation
|
||||
self.test = test
|
||||
|
||||
@property
|
||||
def train_prev(self):
|
||||
return self.train.prevalence()
|
||||
|
||||
@property
|
||||
def validation_prev(self):
|
||||
return self.validation.prevalence()
|
||||
|
||||
@property
|
||||
def prevs(self):
|
||||
return {"train": self.train_prev, "validation": self.validation_prev}
|
||||
|
||||
|
||||
class Dataset:
|
||||
def __init__(self, name, n_prevalences=9, prevs=None, target=None):
|
||||
self._name = name
|
||||
self._target = target
|
||||
|
||||
self.prevs = None
|
||||
self.n_prevs = n_prevalences
|
||||
if prevs is not None:
|
||||
prevs = np.unique([p for p in prevs if p > 0.0 and p < 1.0])
|
||||
if prevs.shape[0] > 0:
|
||||
self.prevs = np.sort(prevs)
|
||||
self.n_prevs = self.prevs.shape[0]
|
||||
|
||||
def __spambase(self):
|
||||
return qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test
|
||||
|
||||
# provare min_df=5
|
||||
def __imdb(self):
|
||||
return qp.datasets.fetch_reviews("imdb", tfidf=True, min_df=3).train_test
|
||||
|
||||
def __rcv1(self):
|
||||
n_train = 23149
|
||||
available_targets = ["CCAT", "GCAT", "MCAT"]
|
||||
|
||||
if self._target is None or self._target not in available_targets:
|
||||
raise ValueError(f"Invalid target {self._target}")
|
||||
|
||||
dataset = fetch_rcv1()
|
||||
target_index = np.where(dataset.target_names == self._target)[0]
|
||||
all_train_d = dataset.data[:n_train, :]
|
||||
test_d = dataset.data[n_train:, :]
|
||||
labels = dataset.target[:, target_index].toarray().flatten()
|
||||
all_train_l, test_l = labels[:n_train], labels[n_train:]
|
||||
all_train = LabelledCollection(all_train_d, all_train_l, classes=[0, 1])
|
||||
test = LabelledCollection(test_d, test_l, classes=[0, 1])
|
||||
|
||||
return all_train, test
|
||||
|
||||
def get_raw(self) -> DatasetSample:
|
||||
all_train, test = {
|
||||
"spambase": self.__spambase,
|
||||
"imdb": self.__imdb,
|
||||
"rcv1": self.__rcv1,
|
||||
}[self._name]()
|
||||
|
||||
train, val = all_train.split_stratified(
|
||||
train_prop=TRAIN_VAL_PROP, random_state=0
|
||||
)
|
||||
|
||||
return DatasetSample(train, val, test)
|
||||
|
||||
def get(self) -> List[DatasetSample]:
|
||||
(all_train, test) = {
|
||||
"spambase": self.__spambase,
|
||||
"imdb": self.__imdb,
|
||||
"rcv1": self.__rcv1,
|
||||
}[self._name]()
|
||||
|
||||
# resample all_train set to have (0.5, 0.5) prevalence
|
||||
at_positives = np.sum(all_train.y)
|
||||
all_train = all_train.sampling(
|
||||
min(at_positives, len(all_train) - at_positives) * 2, 0.5, random_state=0
|
||||
)
|
||||
|
||||
# sample prevalences
|
||||
if self.prevs is not None:
|
||||
prevs = self.prevs
|
||||
else:
|
||||
prevs = np.linspace(0.0, 1.0, num=self.n_prevs + 1, endpoint=False)[1:]
|
||||
|
||||
at_size = min(math.floor(len(all_train) * 0.5 / p) for p in prevs)
|
||||
datasets = []
|
||||
for p in 1.0 - prevs:
|
||||
all_train_sampled = all_train.sampling(at_size, p, random_state=0)
|
||||
train, validation = all_train_sampled.split_stratified(
|
||||
train_prop=TRAIN_VAL_PROP, random_state=0
|
||||
)
|
||||
datasets.append(DatasetSample(train, validation, test))
|
||||
|
||||
return datasets
|
||||
|
||||
def __call__(self):
|
||||
return self.get()
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return (
|
||||
f"{self._name}_{self._target}_{self.n_prevs}prevs"
|
||||
if self._name == "rcv1"
|
||||
else f"{self._name}_{self.n_prevs}prevs"
|
||||
)
|
||||
|
||||
|
||||
# >>> fetch_rcv1().target_names
|
||||
# array(['C11', 'C12', 'C13', 'C14', 'C15', 'C151', 'C1511', 'C152', 'C16',
|
||||
# 'C17', 'C171', 'C172', 'C173', 'C174', 'C18', 'C181', 'C182',
|
||||
# 'C183', 'C21', 'C22', 'C23', 'C24', 'C31', 'C311', 'C312', 'C313',
|
||||
# 'C32', 'C33', 'C331', 'C34', 'C41', 'C411', 'C42', 'CCAT', 'E11',
|
||||
# 'E12', 'E121', 'E13', 'E131', 'E132', 'E14', 'E141', 'E142',
|
||||
# 'E143', 'E21', 'E211', 'E212', 'E31', 'E311', 'E312', 'E313',
|
||||
# 'E41', 'E411', 'E51', 'E511', 'E512', 'E513', 'E61', 'E71', 'ECAT',
|
||||
# 'G15', 'G151', 'G152', 'G153', 'G154', 'G155', 'G156', 'G157',
|
||||
# 'G158', 'G159', 'GCAT', 'GCRIM', 'GDEF', 'GDIP', 'GDIS', 'GENT',
|
||||
# 'GENV', 'GFAS', 'GHEA', 'GJOB', 'GMIL', 'GOBIT', 'GODD', 'GPOL',
|
||||
# 'GPRO', 'GREL', 'GSCI', 'GSPO', 'GTOUR', 'GVIO', 'GVOTE', 'GWEA',
|
||||
# 'GWELF', 'M11', 'M12', 'M13', 'M131', 'M132', 'M14', 'M141',
|
||||
# 'M142', 'M143', 'MCAT'], dtype=object)
|
||||
|
||||
|
||||
def rcv1_info():
|
||||
dataset = fetch_rcv1()
|
||||
n_train = 23149
|
||||
|
||||
targets = []
|
||||
for target in range(103):
|
||||
train_t_prev = np.average(dataset.target[:n_train, target].toarray().flatten())
|
||||
test_t_prev = np.average(dataset.target[n_train:, target].toarray().flatten())
|
||||
targets.append(
|
||||
(
|
||||
dataset.target_names[target],
|
||||
{
|
||||
"train": (1.0 - train_t_prev, train_t_prev),
|
||||
"test": (1.0 - test_t_prev, test_t_prev),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
targets.sort(key=lambda t: t[1]["train"][1])
|
||||
for n, d in targets:
|
||||
print(f"{n}:")
|
||||
for k, (fp, tp) in d.items():
|
||||
print(f"\t{k}: {fp:.4f}, {tp:.4f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
rcv1_info()
|
||||
|
|
|
@ -1,118 +1,118 @@
|
|||
import collections as C
|
||||
import copy
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
class environ:
|
||||
_instance = None
|
||||
_default_env = {
|
||||
"DATASET_NAME": None,
|
||||
"DATASET_TARGET": None,
|
||||
"METRICS": [],
|
||||
"COMP_ESTIMATORS": [],
|
||||
"DATASET_N_PREVS": 9,
|
||||
"DATASET_PREVS": None,
|
||||
"OUT_DIR_NAME": "output",
|
||||
"OUT_DIR": None,
|
||||
"PLOT_DIR_NAME": "plot",
|
||||
"PLOT_OUT_DIR": None,
|
||||
"DATASET_DIR_UPDATE": False,
|
||||
"PROTOCOL_N_PREVS": 21,
|
||||
"PROTOCOL_REPEATS": 100,
|
||||
"SAMPLE_SIZE": 1000,
|
||||
"PLOT_ESTIMATORS": [],
|
||||
"PLOT_STDEV": False,
|
||||
}
|
||||
_keys = list(_default_env.keys())
|
||||
|
||||
def __init__(self):
|
||||
self.exec = []
|
||||
self.confs = []
|
||||
self.load_conf()
|
||||
self._stack = C.deque([self.__getdict()])
|
||||
|
||||
def __setdict(self, d):
|
||||
for k, v in d.items():
|
||||
super().__setattr__(k, v)
|
||||
|
||||
def __getdict(self):
|
||||
return {k: self.__getattribute__(k) for k in environ._keys}
|
||||
|
||||
def __setattr__(self, __name: str, __value: Any) -> None:
|
||||
if __name in environ._keys:
|
||||
self._stack[-1][__name] = __value
|
||||
super().__setattr__(__name, __value)
|
||||
|
||||
def load_conf(self):
|
||||
self.__setdict(environ._default_env)
|
||||
|
||||
with open("conf.yaml", "r") as f:
|
||||
confs = yaml.safe_load(f)["exec"]
|
||||
|
||||
_global = confs["global"]
|
||||
_estimators = set()
|
||||
for pc in confs["plot_confs"].values():
|
||||
_estimators = _estimators.union(set(pc["PLOT_ESTIMATORS"]))
|
||||
_global["COMP_ESTIMATORS"] = list(_estimators)
|
||||
|
||||
self.__setdict(_global)
|
||||
|
||||
self.confs = confs["confs"]
|
||||
self.plot_confs = confs["plot_confs"]
|
||||
|
||||
def get_confs(self):
|
||||
self._stack.append(None)
|
||||
for _conf in self.confs:
|
||||
self._stack.pop()
|
||||
self.__setdict(self._stack[-1])
|
||||
self.__setdict(_conf)
|
||||
self._stack.append(self.__getdict())
|
||||
|
||||
yield copy.deepcopy(self._stack[-1])
|
||||
|
||||
self._stack.pop()
|
||||
|
||||
def get_plot_confs(self):
|
||||
self._stack.append(None)
|
||||
for k, pc in self.plot_confs.items():
|
||||
self._stack.pop()
|
||||
self.__setdict(self._stack[-1])
|
||||
self.__setdict(pc)
|
||||
self._stack.append(self.__getdict())
|
||||
|
||||
name = self.DATASET_NAME
|
||||
if self.DATASET_TARGET is not None:
|
||||
name += f"_{self.DATASET_TARGET}"
|
||||
name += f"_{k}"
|
||||
yield name
|
||||
|
||||
self._stack.pop()
|
||||
|
||||
@property
|
||||
def current(self):
|
||||
return copy.deepcopy(self.__getdict())
|
||||
|
||||
|
||||
env = environ()
|
||||
|
||||
if __name__ == "__main__":
|
||||
stack = C.deque()
|
||||
stack.append(-1)
|
||||
|
||||
def __gen(stack: C.deque):
|
||||
stack.append(None)
|
||||
for i in range(5):
|
||||
stack.pop()
|
||||
stack.append(i)
|
||||
yield stack[-1]
|
||||
|
||||
stack.pop()
|
||||
|
||||
print(stack)
|
||||
|
||||
for i in __gen(stack):
|
||||
print(stack, i)
|
||||
|
||||
print(stack)
|
||||
import collections as C
|
||||
import copy
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
class environ:
|
||||
_instance = None
|
||||
_default_env = {
|
||||
"DATASET_NAME": None,
|
||||
"DATASET_TARGET": None,
|
||||
"METRICS": [],
|
||||
"COMP_ESTIMATORS": [],
|
||||
"DATASET_N_PREVS": 9,
|
||||
"DATASET_PREVS": None,
|
||||
"OUT_DIR_NAME": "output",
|
||||
"OUT_DIR": None,
|
||||
"PLOT_DIR_NAME": "plot",
|
||||
"PLOT_OUT_DIR": None,
|
||||
"DATASET_DIR_UPDATE": False,
|
||||
"PROTOCOL_N_PREVS": 21,
|
||||
"PROTOCOL_REPEATS": 100,
|
||||
"SAMPLE_SIZE": 1000,
|
||||
"PLOT_ESTIMATORS": [],
|
||||
"PLOT_STDEV": False,
|
||||
}
|
||||
_keys = list(_default_env.keys())
|
||||
|
||||
def __init__(self):
|
||||
self.exec = []
|
||||
self.confs = []
|
||||
self.load_conf()
|
||||
self._stack = C.deque([self.__getdict()])
|
||||
|
||||
def __setdict(self, d):
|
||||
for k, v in d.items():
|
||||
super().__setattr__(k, v)
|
||||
|
||||
def __getdict(self):
|
||||
return {k: self.__getattribute__(k) for k in environ._keys}
|
||||
|
||||
def __setattr__(self, __name: str, __value: Any) -> None:
|
||||
if __name in environ._keys:
|
||||
self._stack[-1][__name] = __value
|
||||
super().__setattr__(__name, __value)
|
||||
|
||||
def load_conf(self):
|
||||
self.__setdict(environ._default_env)
|
||||
|
||||
with open("conf.yaml", "r") as f:
|
||||
confs = yaml.safe_load(f)["exec"]
|
||||
|
||||
_global = confs["global"]
|
||||
_estimators = set()
|
||||
for pc in confs["plot_confs"].values():
|
||||
_estimators = _estimators.union(set(pc["PLOT_ESTIMATORS"]))
|
||||
_global["COMP_ESTIMATORS"] = list(_estimators)
|
||||
|
||||
self.__setdict(_global)
|
||||
|
||||
self.confs = confs["confs"]
|
||||
self.plot_confs = confs["plot_confs"]
|
||||
|
||||
def get_confs(self):
|
||||
self._stack.append(None)
|
||||
for _conf in self.confs:
|
||||
self._stack.pop()
|
||||
self.__setdict(self._stack[-1])
|
||||
self.__setdict(_conf)
|
||||
self._stack.append(self.__getdict())
|
||||
|
||||
yield copy.deepcopy(self._stack[-1])
|
||||
|
||||
self._stack.pop()
|
||||
|
||||
def get_plot_confs(self):
|
||||
self._stack.append(None)
|
||||
for k, pc in self.plot_confs.items():
|
||||
self._stack.pop()
|
||||
self.__setdict(self._stack[-1])
|
||||
self.__setdict(pc)
|
||||
self._stack.append(self.__getdict())
|
||||
|
||||
name = self.DATASET_NAME
|
||||
if self.DATASET_TARGET is not None:
|
||||
name += f"_{self.DATASET_TARGET}"
|
||||
name += f"_{k}"
|
||||
yield name
|
||||
|
||||
self._stack.pop()
|
||||
|
||||
@property
|
||||
def current(self):
|
||||
return copy.deepcopy(self.__getdict())
|
||||
|
||||
|
||||
env = environ()
|
||||
|
||||
if __name__ == "__main__":
|
||||
stack = C.deque()
|
||||
stack.append(-1)
|
||||
|
||||
def __gen(stack: C.deque):
|
||||
stack.append(None)
|
||||
for i in range(5):
|
||||
stack.pop()
|
||||
stack.append(i)
|
||||
yield stack[-1]
|
||||
|
||||
stack.pop()
|
||||
|
||||
print(stack)
|
||||
|
||||
for i in __gen(stack):
|
||||
print(stack, i)
|
||||
|
||||
print(stack)
|
||||
|
|
110
quacc/error.py
110
quacc/error.py
|
@ -1,55 +1,55 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
def from_name(err_name):
|
||||
assert err_name in ERROR_NAMES, f"unknown error {err_name}"
|
||||
callable_error = globals()[err_name]
|
||||
return callable_error
|
||||
|
||||
|
||||
# def f1(prev):
|
||||
# # https://github.com/dice-group/gerbil/wiki/Precision,-Recall-and-F1-measure
|
||||
# if prev[0] == 0 and prev[1] == 0 and prev[2] == 0:
|
||||
# return 1.0
|
||||
# elif prev[0] == 0 and prev[1] > 0 and prev[2] == 0:
|
||||
# return 0.0
|
||||
# elif prev[0] == 0 and prev[1] == 0 and prev[2] > 0:
|
||||
# return float('NaN')
|
||||
# else:
|
||||
# recall = prev[0] / (prev[0] + prev[1])
|
||||
# precision = prev[0] / (prev[0] + prev[2])
|
||||
# return 2 * (precision * recall) / (precision + recall)
|
||||
|
||||
|
||||
def f1(prev):
|
||||
den = (2 * prev[3]) + prev[1] + prev[2]
|
||||
if den == 0:
|
||||
return 0.0
|
||||
else:
|
||||
return (2 * prev[3]) / den
|
||||
|
||||
|
||||
def f1e(prev):
|
||||
return 1 - f1(prev)
|
||||
|
||||
|
||||
def acc(prev: np.ndarray) -> float:
|
||||
return (prev[0] + prev[3]) / np.sum(prev)
|
||||
|
||||
|
||||
def accd(true_prevs: np.ndarray, estim_prevs: np.ndarray) -> np.ndarray:
|
||||
vacc = np.vectorize(acc, signature="(m)->()")
|
||||
a_tp = vacc(true_prevs)
|
||||
a_ep = vacc(estim_prevs)
|
||||
return np.abs(a_tp - a_ep)
|
||||
|
||||
|
||||
def maccd(true_prevs: np.ndarray, estim_prevs: np.ndarray) -> float:
|
||||
return accd(true_prevs, estim_prevs).mean()
|
||||
|
||||
|
||||
ACCURACY_ERROR = {maccd}
|
||||
ACCURACY_ERROR_SINGLE = {accd}
|
||||
ACCURACY_ERROR_NAMES = {func.__name__ for func in ACCURACY_ERROR}
|
||||
ACCURACY_ERROR_SINGLE_NAMES = {func.__name__ for func in ACCURACY_ERROR_SINGLE}
|
||||
ERROR_NAMES = ACCURACY_ERROR_NAMES | ACCURACY_ERROR_SINGLE_NAMES
|
||||
import numpy as np
|
||||
|
||||
|
||||
def from_name(err_name):
|
||||
assert err_name in ERROR_NAMES, f"unknown error {err_name}"
|
||||
callable_error = globals()[err_name]
|
||||
return callable_error
|
||||
|
||||
|
||||
# def f1(prev):
|
||||
# # https://github.com/dice-group/gerbil/wiki/Precision,-Recall-and-F1-measure
|
||||
# if prev[0] == 0 and prev[1] == 0 and prev[2] == 0:
|
||||
# return 1.0
|
||||
# elif prev[0] == 0 and prev[1] > 0 and prev[2] == 0:
|
||||
# return 0.0
|
||||
# elif prev[0] == 0 and prev[1] == 0 and prev[2] > 0:
|
||||
# return float('NaN')
|
||||
# else:
|
||||
# recall = prev[0] / (prev[0] + prev[1])
|
||||
# precision = prev[0] / (prev[0] + prev[2])
|
||||
# return 2 * (precision * recall) / (precision + recall)
|
||||
|
||||
|
||||
def f1(prev):
|
||||
den = (2 * prev[3]) + prev[1] + prev[2]
|
||||
if den == 0:
|
||||
return 0.0
|
||||
else:
|
||||
return (2 * prev[3]) / den
|
||||
|
||||
|
||||
def f1e(prev):
|
||||
return 1 - f1(prev)
|
||||
|
||||
|
||||
def acc(prev: np.ndarray) -> float:
|
||||
return (prev[0] + prev[3]) / np.sum(prev)
|
||||
|
||||
|
||||
def accd(true_prevs: np.ndarray, estim_prevs: np.ndarray) -> np.ndarray:
|
||||
vacc = np.vectorize(acc, signature="(m)->()")
|
||||
a_tp = vacc(true_prevs)
|
||||
a_ep = vacc(estim_prevs)
|
||||
return np.abs(a_tp - a_ep)
|
||||
|
||||
|
||||
def maccd(true_prevs: np.ndarray, estim_prevs: np.ndarray) -> float:
|
||||
return accd(true_prevs, estim_prevs).mean()
|
||||
|
||||
|
||||
ACCURACY_ERROR = {maccd}
|
||||
ACCURACY_ERROR_SINGLE = {accd}
|
||||
ACCURACY_ERROR_NAMES = {func.__name__ for func in ACCURACY_ERROR}
|
||||
ACCURACY_ERROR_SINGLE_NAMES = {func.__name__ for func in ACCURACY_ERROR_SINGLE}
|
||||
ERROR_NAMES = ACCURACY_ERROR_NAMES | ACCURACY_ERROR_SINGLE_NAMES
|
||||
|
|
|
@ -1,34 +1,34 @@
|
|||
from typing import Callable, Union
|
||||
|
||||
import numpy as np
|
||||
from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol
|
||||
|
||||
import quacc as qc
|
||||
|
||||
from ..method.base import BaseAccuracyEstimator
|
||||
|
||||
|
||||
def evaluate(
|
||||
estimator: BaseAccuracyEstimator,
|
||||
protocol: AbstractProtocol,
|
||||
error_metric: Union[Callable | str],
|
||||
) -> float:
|
||||
if isinstance(error_metric, str):
|
||||
error_metric = qc.error.from_name(error_metric)
|
||||
|
||||
collator_bck_ = protocol.collator
|
||||
protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
|
||||
|
||||
estim_prevs, true_prevs = [], []
|
||||
for sample in protocol():
|
||||
e_sample = estimator.extend(sample)
|
||||
estim_prev = estimator.estimate(e_sample.X, ext=True)
|
||||
estim_prevs.append(estim_prev)
|
||||
true_prevs.append(e_sample.prevalence())
|
||||
|
||||
protocol.collator = collator_bck_
|
||||
|
||||
true_prevs = np.array(true_prevs)
|
||||
estim_prevs = np.array(estim_prevs)
|
||||
|
||||
return error_metric(true_prevs, estim_prevs)
|
||||
from typing import Callable, Union
|
||||
|
||||
import numpy as np
|
||||
from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol
|
||||
|
||||
import quacc as qc
|
||||
|
||||
from ..method.base import BaseAccuracyEstimator
|
||||
|
||||
|
||||
def evaluate(
|
||||
estimator: BaseAccuracyEstimator,
|
||||
protocol: AbstractProtocol,
|
||||
error_metric: Union[Callable | str],
|
||||
) -> float:
|
||||
if isinstance(error_metric, str):
|
||||
error_metric = qc.error.from_name(error_metric)
|
||||
|
||||
collator_bck_ = protocol.collator
|
||||
protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
|
||||
|
||||
estim_prevs, true_prevs = [], []
|
||||
for sample in protocol():
|
||||
e_sample = estimator.extend(sample)
|
||||
estim_prev = estimator.estimate(e_sample.X, ext=True)
|
||||
estim_prevs.append(estim_prev)
|
||||
true_prevs.append(e_sample.prevalence())
|
||||
|
||||
protocol.collator = collator_bck_
|
||||
|
||||
true_prevs = np.array(true_prevs)
|
||||
estim_prevs = np.array(estim_prevs)
|
||||
|
||||
return error_metric(true_prevs, estim_prevs)
|
||||
|
|
|
@ -1,299 +1,299 @@
|
|||
from functools import wraps
|
||||
from statistics import mean
|
||||
|
||||
import numpy as np
|
||||
import sklearn.metrics as metrics
|
||||
from quapy.data import LabelledCollection
|
||||
from quapy.protocol import AbstractStochasticSeededProtocol
|
||||
from scipy.sparse import issparse
|
||||
from sklearn.base import BaseEstimator
|
||||
from sklearn.model_selection import cross_validate
|
||||
|
||||
import baselines.atc as atc
|
||||
import baselines.doc as doc
|
||||
import baselines.impweight as iw
|
||||
import baselines.rca as rcalib
|
||||
|
||||
from .report import EvaluationReport
|
||||
|
||||
_baselines = {}
|
||||
|
||||
|
||||
def baseline(func):
|
||||
@wraps(func)
|
||||
def wrapper(c_model, validation, protocol):
|
||||
return func(c_model, validation, protocol)
|
||||
|
||||
_baselines[func.__name__] = wrapper
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@baseline
|
||||
def kfcv(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict",
|
||||
):
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
|
||||
scoring = ["accuracy", "f1_macro"]
|
||||
scores = cross_validate(c_model, validation.X, validation.y, scoring=scoring)
|
||||
acc_score = mean(scores["test_accuracy"])
|
||||
f1_score = mean(scores["test_f1_macro"])
|
||||
|
||||
report = EvaluationReport(name="kfcv")
|
||||
for test in protocol():
|
||||
test_preds = c_model_predict(test.X)
|
||||
meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_preds))
|
||||
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
||||
report.append_row(
|
||||
test.prevalence(),
|
||||
acc_score=acc_score,
|
||||
f1_score=f1_score,
|
||||
acc=meta_acc,
|
||||
f1=meta_f1,
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def ref(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
):
|
||||
c_model_predict = getattr(c_model, "predict")
|
||||
report = EvaluationReport(name="ref")
|
||||
for test in protocol():
|
||||
test_preds = c_model_predict(test.X)
|
||||
report.append_row(
|
||||
test.prevalence(),
|
||||
acc_score=metrics.accuracy_score(test.y, test_preds),
|
||||
f1_score=metrics.f1_score(test.y, test_preds),
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def atc_mc(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict_proba",
|
||||
):
|
||||
"""garg"""
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
|
||||
## Load ID validation data probs and labels
|
||||
val_probs, val_labels = c_model_predict(validation.X), validation.y
|
||||
|
||||
## score function, e.g., negative entropy or argmax confidence
|
||||
val_scores = atc.get_max_conf(val_probs)
|
||||
val_preds = np.argmax(val_probs, axis=-1)
|
||||
_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
|
||||
|
||||
report = EvaluationReport(name="atc_mc")
|
||||
for test in protocol():
|
||||
## Load OOD test data probs
|
||||
test_probs = c_model_predict(test.X)
|
||||
test_preds = np.argmax(test_probs, axis=-1)
|
||||
test_scores = atc.get_max_conf(test_probs)
|
||||
atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
|
||||
meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
|
||||
f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
|
||||
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
||||
report.append_row(
|
||||
test.prevalence(),
|
||||
acc=meta_acc,
|
||||
acc_score=atc_accuracy,
|
||||
f1_score=f1_score,
|
||||
f1=meta_f1,
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def atc_ne(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict_proba",
|
||||
):
|
||||
"""garg"""
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
|
||||
## Load ID validation data probs and labels
|
||||
val_probs, val_labels = c_model_predict(validation.X), validation.y
|
||||
|
||||
## score function, e.g., negative entropy or argmax confidence
|
||||
val_scores = atc.get_entropy(val_probs)
|
||||
val_preds = np.argmax(val_probs, axis=-1)
|
||||
_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
|
||||
|
||||
report = EvaluationReport(name="atc_ne")
|
||||
for test in protocol():
|
||||
## Load OOD test data probs
|
||||
test_probs = c_model_predict(test.X)
|
||||
test_preds = np.argmax(test_probs, axis=-1)
|
||||
test_scores = atc.get_entropy(test_probs)
|
||||
atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
|
||||
meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
|
||||
f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
|
||||
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
||||
report.append_row(
|
||||
test.prevalence(),
|
||||
acc=meta_acc,
|
||||
acc_score=atc_accuracy,
|
||||
f1_score=f1_score,
|
||||
f1=meta_f1,
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def doc_feat(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict_proba",
|
||||
):
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
|
||||
val_probs, val_labels = c_model_predict(validation.X), validation.y
|
||||
val_scores = np.max(val_probs, axis=-1)
|
||||
val_preds = np.argmax(val_probs, axis=-1)
|
||||
v1acc = np.mean(val_preds == val_labels) * 100
|
||||
|
||||
report = EvaluationReport(name="doc_feat")
|
||||
for test in protocol():
|
||||
test_probs = c_model_predict(test.X)
|
||||
test_preds = np.argmax(test_probs, axis=-1)
|
||||
test_scores = np.max(test_probs, axis=-1)
|
||||
score = (v1acc + doc.get_doc(val_scores, test_scores)) / 100.0
|
||||
meta_acc = abs(score - metrics.accuracy_score(test.y, test_preds))
|
||||
report.append_row(test.prevalence(), acc=meta_acc, acc_score=score)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def rca(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict",
|
||||
):
|
||||
"""elsahar19"""
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
val_pred1 = c_model_predict(validation.X)
|
||||
|
||||
report = EvaluationReport(name="rca")
|
||||
for test in protocol():
|
||||
try:
|
||||
test_pred = c_model_predict(test.X)
|
||||
c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
|
||||
c_model2_predict = getattr(c_model2, predict_method)
|
||||
val_pred2 = c_model2_predict(validation.X)
|
||||
rca_score = 1.0 - rcalib.get_score(val_pred1, val_pred2, validation.y)
|
||||
meta_score = abs(rca_score - metrics.accuracy_score(test.y, test_pred))
|
||||
report.append_row(test.prevalence(), acc=meta_score, acc_score=rca_score)
|
||||
except ValueError:
|
||||
report.append_row(
|
||||
test.prevalence(), acc=float("nan"), acc_score=float("nan")
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def rca_star(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict",
|
||||
):
|
||||
"""elsahar19"""
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
validation1, validation2 = validation.split_stratified(
|
||||
train_prop=0.5, random_state=0
|
||||
)
|
||||
val1_pred = c_model_predict(validation1.X)
|
||||
c_model1 = rcalib.clone_fit(c_model, validation1.X, val1_pred)
|
||||
c_model1_predict = getattr(c_model1, predict_method)
|
||||
val2_pred1 = c_model1_predict(validation2.X)
|
||||
|
||||
report = EvaluationReport(name="rca_star")
|
||||
for test in protocol():
|
||||
try:
|
||||
test_pred = c_model_predict(test.X)
|
||||
c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
|
||||
c_model2_predict = getattr(c_model2, predict_method)
|
||||
val2_pred2 = c_model2_predict(validation2.X)
|
||||
rca_star_score = 1.0 - rcalib.get_score(
|
||||
val2_pred1, val2_pred2, validation2.y
|
||||
)
|
||||
meta_score = abs(rca_star_score - metrics.accuracy_score(test.y, test_pred))
|
||||
report.append_row(
|
||||
test.prevalence(), acc=meta_score, acc_score=rca_star_score
|
||||
)
|
||||
except ValueError:
|
||||
report.append_row(
|
||||
test.prevalence(), acc=float("nan"), acc_score=float("nan")
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def logreg(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict",
|
||||
):
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
|
||||
val_preds = c_model_predict(validation.X)
|
||||
|
||||
report = EvaluationReport(name="logreg")
|
||||
for test in protocol():
|
||||
wx = iw.logreg(validation.X, validation.y, test.X)
|
||||
test_preds = c_model_predict(test.X)
|
||||
estim_acc = iw.get_acc(val_preds, validation.y, wx)
|
||||
true_acc = metrics.accuracy_score(test.y, test_preds)
|
||||
meta_score = abs(estim_acc - true_acc)
|
||||
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def kdex2(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict",
|
||||
):
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
|
||||
val_preds = c_model_predict(validation.X)
|
||||
log_likelihood_val = iw.kdex2_lltr(validation.X)
|
||||
Xval = validation.X.toarray() if issparse(validation.X) else validation.X
|
||||
|
||||
report = EvaluationReport(name="kdex2")
|
||||
for test in protocol():
|
||||
Xte = test.X.toarray() if issparse(test.X) else test.X
|
||||
wx = iw.kdex2_weights(Xval, Xte, log_likelihood_val)
|
||||
test_preds = c_model_predict(Xte)
|
||||
estim_acc = iw.get_acc(val_preds, validation.y, wx)
|
||||
true_acc = metrics.accuracy_score(test.y, test_preds)
|
||||
meta_score = abs(estim_acc - true_acc)
|
||||
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
|
||||
|
||||
return report
|
||||
from functools import wraps
|
||||
from statistics import mean
|
||||
|
||||
import numpy as np
|
||||
import sklearn.metrics as metrics
|
||||
from quapy.data import LabelledCollection
|
||||
from quapy.protocol import AbstractStochasticSeededProtocol
|
||||
from scipy.sparse import issparse
|
||||
from sklearn.base import BaseEstimator
|
||||
from sklearn.model_selection import cross_validate
|
||||
|
||||
import baselines.atc as atc
|
||||
import baselines.doc as doc
|
||||
import baselines.impweight as iw
|
||||
import baselines.rca as rcalib
|
||||
|
||||
from .report import EvaluationReport
|
||||
|
||||
_baselines = {}
|
||||
|
||||
|
||||
def baseline(func):
|
||||
@wraps(func)
|
||||
def wrapper(c_model, validation, protocol):
|
||||
return func(c_model, validation, protocol)
|
||||
|
||||
_baselines[func.__name__] = wrapper
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@baseline
|
||||
def kfcv(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict",
|
||||
):
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
|
||||
scoring = ["accuracy", "f1_macro"]
|
||||
scores = cross_validate(c_model, validation.X, validation.y, scoring=scoring)
|
||||
acc_score = mean(scores["test_accuracy"])
|
||||
f1_score = mean(scores["test_f1_macro"])
|
||||
|
||||
report = EvaluationReport(name="kfcv")
|
||||
for test in protocol():
|
||||
test_preds = c_model_predict(test.X)
|
||||
meta_acc = abs(acc_score - metrics.accuracy_score(test.y, test_preds))
|
||||
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
||||
report.append_row(
|
||||
test.prevalence(),
|
||||
acc_score=acc_score,
|
||||
f1_score=f1_score,
|
||||
acc=meta_acc,
|
||||
f1=meta_f1,
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def ref(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
):
|
||||
c_model_predict = getattr(c_model, "predict")
|
||||
report = EvaluationReport(name="ref")
|
||||
for test in protocol():
|
||||
test_preds = c_model_predict(test.X)
|
||||
report.append_row(
|
||||
test.prevalence(),
|
||||
acc_score=metrics.accuracy_score(test.y, test_preds),
|
||||
f1_score=metrics.f1_score(test.y, test_preds),
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def atc_mc(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict_proba",
|
||||
):
|
||||
"""garg"""
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
|
||||
## Load ID validation data probs and labels
|
||||
val_probs, val_labels = c_model_predict(validation.X), validation.y
|
||||
|
||||
## score function, e.g., negative entropy or argmax confidence
|
||||
val_scores = atc.get_max_conf(val_probs)
|
||||
val_preds = np.argmax(val_probs, axis=-1)
|
||||
_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
|
||||
|
||||
report = EvaluationReport(name="atc_mc")
|
||||
for test in protocol():
|
||||
## Load OOD test data probs
|
||||
test_probs = c_model_predict(test.X)
|
||||
test_preds = np.argmax(test_probs, axis=-1)
|
||||
test_scores = atc.get_max_conf(test_probs)
|
||||
atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
|
||||
meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
|
||||
f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
|
||||
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
||||
report.append_row(
|
||||
test.prevalence(),
|
||||
acc=meta_acc,
|
||||
acc_score=atc_accuracy,
|
||||
f1_score=f1_score,
|
||||
f1=meta_f1,
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def atc_ne(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict_proba",
|
||||
):
|
||||
"""garg"""
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
|
||||
## Load ID validation data probs and labels
|
||||
val_probs, val_labels = c_model_predict(validation.X), validation.y
|
||||
|
||||
## score function, e.g., negative entropy or argmax confidence
|
||||
val_scores = atc.get_entropy(val_probs)
|
||||
val_preds = np.argmax(val_probs, axis=-1)
|
||||
_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
|
||||
|
||||
report = EvaluationReport(name="atc_ne")
|
||||
for test in protocol():
|
||||
## Load OOD test data probs
|
||||
test_probs = c_model_predict(test.X)
|
||||
test_preds = np.argmax(test_probs, axis=-1)
|
||||
test_scores = atc.get_entropy(test_probs)
|
||||
atc_accuracy = atc.get_ATC_acc(atc_thres, test_scores)
|
||||
meta_acc = abs(atc_accuracy - metrics.accuracy_score(test.y, test_preds))
|
||||
f1_score = atc.get_ATC_f1(atc_thres, test_scores, test_probs)
|
||||
meta_f1 = abs(f1_score - metrics.f1_score(test.y, test_preds))
|
||||
report.append_row(
|
||||
test.prevalence(),
|
||||
acc=meta_acc,
|
||||
acc_score=atc_accuracy,
|
||||
f1_score=f1_score,
|
||||
f1=meta_f1,
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def doc_feat(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict_proba",
|
||||
):
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
|
||||
val_probs, val_labels = c_model_predict(validation.X), validation.y
|
||||
val_scores = np.max(val_probs, axis=-1)
|
||||
val_preds = np.argmax(val_probs, axis=-1)
|
||||
v1acc = np.mean(val_preds == val_labels) * 100
|
||||
|
||||
report = EvaluationReport(name="doc_feat")
|
||||
for test in protocol():
|
||||
test_probs = c_model_predict(test.X)
|
||||
test_preds = np.argmax(test_probs, axis=-1)
|
||||
test_scores = np.max(test_probs, axis=-1)
|
||||
score = (v1acc + doc.get_doc(val_scores, test_scores)) / 100.0
|
||||
meta_acc = abs(score - metrics.accuracy_score(test.y, test_preds))
|
||||
report.append_row(test.prevalence(), acc=meta_acc, acc_score=score)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def rca(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict",
|
||||
):
|
||||
"""elsahar19"""
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
val_pred1 = c_model_predict(validation.X)
|
||||
|
||||
report = EvaluationReport(name="rca")
|
||||
for test in protocol():
|
||||
try:
|
||||
test_pred = c_model_predict(test.X)
|
||||
c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
|
||||
c_model2_predict = getattr(c_model2, predict_method)
|
||||
val_pred2 = c_model2_predict(validation.X)
|
||||
rca_score = 1.0 - rcalib.get_score(val_pred1, val_pred2, validation.y)
|
||||
meta_score = abs(rca_score - metrics.accuracy_score(test.y, test_pred))
|
||||
report.append_row(test.prevalence(), acc=meta_score, acc_score=rca_score)
|
||||
except ValueError:
|
||||
report.append_row(
|
||||
test.prevalence(), acc=float("nan"), acc_score=float("nan")
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def rca_star(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict",
|
||||
):
|
||||
"""elsahar19"""
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
validation1, validation2 = validation.split_stratified(
|
||||
train_prop=0.5, random_state=0
|
||||
)
|
||||
val1_pred = c_model_predict(validation1.X)
|
||||
c_model1 = rcalib.clone_fit(c_model, validation1.X, val1_pred)
|
||||
c_model1_predict = getattr(c_model1, predict_method)
|
||||
val2_pred1 = c_model1_predict(validation2.X)
|
||||
|
||||
report = EvaluationReport(name="rca_star")
|
||||
for test in protocol():
|
||||
try:
|
||||
test_pred = c_model_predict(test.X)
|
||||
c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
|
||||
c_model2_predict = getattr(c_model2, predict_method)
|
||||
val2_pred2 = c_model2_predict(validation2.X)
|
||||
rca_star_score = 1.0 - rcalib.get_score(
|
||||
val2_pred1, val2_pred2, validation2.y
|
||||
)
|
||||
meta_score = abs(rca_star_score - metrics.accuracy_score(test.y, test_pred))
|
||||
report.append_row(
|
||||
test.prevalence(), acc=meta_score, acc_score=rca_star_score
|
||||
)
|
||||
except ValueError:
|
||||
report.append_row(
|
||||
test.prevalence(), acc=float("nan"), acc_score=float("nan")
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def logreg(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict",
|
||||
):
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
|
||||
val_preds = c_model_predict(validation.X)
|
||||
|
||||
report = EvaluationReport(name="logreg")
|
||||
for test in protocol():
|
||||
wx = iw.logreg(validation.X, validation.y, test.X)
|
||||
test_preds = c_model_predict(test.X)
|
||||
estim_acc = iw.get_acc(val_preds, validation.y, wx)
|
||||
true_acc = metrics.accuracy_score(test.y, test_preds)
|
||||
meta_score = abs(estim_acc - true_acc)
|
||||
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@baseline
|
||||
def kdex2(
|
||||
c_model: BaseEstimator,
|
||||
validation: LabelledCollection,
|
||||
protocol: AbstractStochasticSeededProtocol,
|
||||
predict_method="predict",
|
||||
):
|
||||
c_model_predict = getattr(c_model, predict_method)
|
||||
|
||||
val_preds = c_model_predict(validation.X)
|
||||
log_likelihood_val = iw.kdex2_lltr(validation.X)
|
||||
Xval = validation.X.toarray() if issparse(validation.X) else validation.X
|
||||
|
||||
report = EvaluationReport(name="kdex2")
|
||||
for test in protocol():
|
||||
Xte = test.X.toarray() if issparse(test.X) else test.X
|
||||
wx = iw.kdex2_weights(Xval, Xte, log_likelihood_val)
|
||||
test_preds = c_model_predict(Xte)
|
||||
estim_acc = iw.get_acc(val_preds, validation.y, wx)
|
||||
true_acc = metrics.accuracy_score(test.y, test_preds)
|
||||
meta_score = abs(estim_acc - true_acc)
|
||||
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
|
||||
|
||||
return report
|
||||
|
|
|
@ -1,128 +1,128 @@
|
|||
import multiprocessing
|
||||
import time
|
||||
from traceback import print_exception as traceback
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import quapy as qp
|
||||
|
||||
from quacc.dataset import Dataset
|
||||
from quacc.environment import env
|
||||
from quacc.evaluation import baseline, method
|
||||
from quacc.evaluation.report import CompReport, DatasetReport, EvaluationReport
|
||||
from quacc.evaluation.worker import estimate_worker
|
||||
from quacc.logger import Logger
|
||||
|
||||
pd.set_option("display.float_format", "{:.4f}".format)
|
||||
qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
|
||||
|
||||
|
||||
class CompEstimatorName_:
|
||||
def __init__(self, ce):
|
||||
self.ce = ce
|
||||
|
||||
def __getitem__(self, e: str | List[str]):
|
||||
if isinstance(e, str):
|
||||
return self.ce._CompEstimator__get(e)[0]
|
||||
elif isinstance(e, list):
|
||||
return list(self.ce._CompEstimator__get(e).keys())
|
||||
|
||||
|
||||
class CompEstimatorFunc_:
|
||||
def __init__(self, ce):
|
||||
self.ce = ce
|
||||
|
||||
def __getitem__(self, e: str | List[str]):
|
||||
if isinstance(e, str):
|
||||
return self.ce._CompEstimator__get(e)[1]
|
||||
elif isinstance(e, list):
|
||||
return list(self.ce._CompEstimator__get(e).values())
|
||||
|
||||
|
||||
class CompEstimator:
|
||||
__dict = method._methods | baseline._baselines
|
||||
|
||||
def __get(cls, e: str | List[str]):
|
||||
if isinstance(e, str):
|
||||
try:
|
||||
return (e, cls.__dict[e])
|
||||
except KeyError:
|
||||
raise KeyError(f"Invalid estimator: estimator {e} does not exist")
|
||||
elif isinstance(e, list):
|
||||
_subtr = np.setdiff1d(e, list(cls.__dict.keys()))
|
||||
if len(_subtr) > 0:
|
||||
raise KeyError(
|
||||
f"Invalid estimator: estimator {_subtr[0]} does not exist"
|
||||
)
|
||||
|
||||
e_fun = {k: fun for k, fun in cls.__dict.items() if k in e}
|
||||
if "ref" not in e:
|
||||
e_fun["ref"] = cls.__dict["ref"]
|
||||
|
||||
return e_fun
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return CompEstimatorName_(self)
|
||||
|
||||
@property
|
||||
def func(self):
|
||||
return CompEstimatorFunc_(self)
|
||||
|
||||
|
||||
CE = CompEstimator()
|
||||
|
||||
|
||||
def evaluate_comparison(dataset: Dataset, estimators=None) -> EvaluationReport:
|
||||
log = Logger.logger()
|
||||
# with multiprocessing.Pool(1) as pool:
|
||||
with multiprocessing.Pool(len(estimators)) as pool:
|
||||
dr = DatasetReport(dataset.name)
|
||||
log.info(f"dataset {dataset.name}")
|
||||
for d in dataset():
|
||||
log.info(
|
||||
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} started"
|
||||
)
|
||||
tstart = time.time()
|
||||
tasks = [
|
||||
(estim, d.train, d.validation, d.test) for estim in CE.func[estimators]
|
||||
]
|
||||
results = [
|
||||
pool.apply_async(estimate_worker, t, {"_env": env, "q": Logger.queue()})
|
||||
for t in tasks
|
||||
]
|
||||
|
||||
results_got = []
|
||||
for _r in results:
|
||||
try:
|
||||
r = _r.get()
|
||||
if r["result"] is not None:
|
||||
results_got.append(r)
|
||||
except Exception as e:
|
||||
log.warning(
|
||||
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
|
||||
)
|
||||
|
||||
tend = time.time()
|
||||
times = {r["name"]: r["time"] for r in results_got}
|
||||
times["tot"] = tend - tstart
|
||||
log.info(
|
||||
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} finished [took {times['tot']:.4f}s]"
|
||||
)
|
||||
try:
|
||||
cr = CompReport(
|
||||
[r["result"] for r in results_got],
|
||||
name=dataset.name,
|
||||
train_prev=d.train_prev,
|
||||
valid_prev=d.validation_prev,
|
||||
times=times,
|
||||
)
|
||||
except Exception as e:
|
||||
log.warning(
|
||||
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
|
||||
)
|
||||
traceback(e)
|
||||
cr = None
|
||||
dr += cr
|
||||
return dr
|
||||
import multiprocessing
|
||||
import time
|
||||
from traceback import print_exception as traceback
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import quapy as qp
|
||||
|
||||
from quacc.dataset import Dataset
|
||||
from quacc.environment import env
|
||||
from quacc.evaluation import baseline, method
|
||||
from quacc.evaluation.report import CompReport, DatasetReport, EvaluationReport
|
||||
from quacc.evaluation.worker import estimate_worker
|
||||
from quacc.logger import Logger
|
||||
|
||||
pd.set_option("display.float_format", "{:.4f}".format)
|
||||
qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
|
||||
|
||||
|
||||
class CompEstimatorName_:
|
||||
def __init__(self, ce):
|
||||
self.ce = ce
|
||||
|
||||
def __getitem__(self, e: str | List[str]):
|
||||
if isinstance(e, str):
|
||||
return self.ce._CompEstimator__get(e)[0]
|
||||
elif isinstance(e, list):
|
||||
return list(self.ce._CompEstimator__get(e).keys())
|
||||
|
||||
|
||||
class CompEstimatorFunc_:
|
||||
def __init__(self, ce):
|
||||
self.ce = ce
|
||||
|
||||
def __getitem__(self, e: str | List[str]):
|
||||
if isinstance(e, str):
|
||||
return self.ce._CompEstimator__get(e)[1]
|
||||
elif isinstance(e, list):
|
||||
return list(self.ce._CompEstimator__get(e).values())
|
||||
|
||||
|
||||
class CompEstimator:
|
||||
__dict = method._methods | baseline._baselines
|
||||
|
||||
def __get(cls, e: str | List[str]):
|
||||
if isinstance(e, str):
|
||||
try:
|
||||
return (e, cls.__dict[e])
|
||||
except KeyError:
|
||||
raise KeyError(f"Invalid estimator: estimator {e} does not exist")
|
||||
elif isinstance(e, list):
|
||||
_subtr = np.setdiff1d(e, list(cls.__dict.keys()))
|
||||
if len(_subtr) > 0:
|
||||
raise KeyError(
|
||||
f"Invalid estimator: estimator {_subtr[0]} does not exist"
|
||||
)
|
||||
|
||||
e_fun = {k: fun for k, fun in cls.__dict.items() if k in e}
|
||||
if "ref" not in e:
|
||||
e_fun["ref"] = cls.__dict["ref"]
|
||||
|
||||
return e_fun
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return CompEstimatorName_(self)
|
||||
|
||||
@property
|
||||
def func(self):
|
||||
return CompEstimatorFunc_(self)
|
||||
|
||||
|
||||
CE = CompEstimator()
|
||||
|
||||
|
||||
def evaluate_comparison(dataset: Dataset, estimators=None) -> EvaluationReport:
|
||||
log = Logger.logger()
|
||||
# with multiprocessing.Pool(1) as pool:
|
||||
with multiprocessing.Pool(len(estimators)) as pool:
|
||||
dr = DatasetReport(dataset.name)
|
||||
log.info(f"dataset {dataset.name}")
|
||||
for d in dataset():
|
||||
log.info(
|
||||
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} started"
|
||||
)
|
||||
tstart = time.time()
|
||||
tasks = [
|
||||
(estim, d.train, d.validation, d.test) for estim in CE.func[estimators]
|
||||
]
|
||||
results = [
|
||||
pool.apply_async(estimate_worker, t, {"_env": env, "q": Logger.queue()})
|
||||
for t in tasks
|
||||
]
|
||||
|
||||
results_got = []
|
||||
for _r in results:
|
||||
try:
|
||||
r = _r.get()
|
||||
if r["result"] is not None:
|
||||
results_got.append(r)
|
||||
except Exception as e:
|
||||
log.warning(
|
||||
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
|
||||
)
|
||||
|
||||
tend = time.time()
|
||||
times = {r["name"]: r["time"] for r in results_got}
|
||||
times["tot"] = tend - tstart
|
||||
log.info(
|
||||
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} finished [took {times['tot']:.4f}s]"
|
||||
)
|
||||
try:
|
||||
cr = CompReport(
|
||||
[r["result"] for r in results_got],
|
||||
name=dataset.name,
|
||||
train_prev=d.train_prev,
|
||||
valid_prev=d.validation_prev,
|
||||
times=times,
|
||||
)
|
||||
except Exception as e:
|
||||
log.warning(
|
||||
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
|
||||
)
|
||||
traceback(e)
|
||||
cr = None
|
||||
dr += cr
|
||||
return dr
|
||||
|
|
|
@ -1,305 +1,305 @@
|
|||
import inspect
|
||||
from functools import wraps
|
||||
|
||||
import numpy as np
|
||||
from quapy.method.aggregative import PACC, SLD, CC
|
||||
from quapy.protocol import UPP, AbstractProtocol
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import quacc as qc
|
||||
from quacc.evaluation.report import EvaluationReport
|
||||
from quacc.method.model_selection import BQAEgsq, GridSearchAE, MCAEgsq
|
||||
|
||||
from ..method.base import BQAE, MCAE, BaseAccuracyEstimator
|
||||
|
||||
_methods = {}
|
||||
_sld_param_grid = {
|
||||
"q__classifier__C": np.logspace(-3, 3, 7),
|
||||
"q__classifier__class_weight": [None, "balanced"],
|
||||
"q__recalib": [None, "bcts"],
|
||||
"q__exact_train_prev": [True],
|
||||
"confidence": [None, "max_conf", "entropy"],
|
||||
}
|
||||
_pacc_param_grid = {
|
||||
"q__classifier__C": np.logspace(-3, 3, 7),
|
||||
"q__classifier__class_weight": [None, "balanced"],
|
||||
"confidence": [None, "max_conf", "entropy"],
|
||||
}
|
||||
def method(func):
|
||||
@wraps(func)
|
||||
def wrapper(c_model, validation, protocol):
|
||||
return func(c_model, validation, protocol)
|
||||
|
||||
_methods[func.__name__] = wrapper
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def evaluation_report(
|
||||
estimator: BaseAccuracyEstimator,
|
||||
protocol: AbstractProtocol,
|
||||
) -> EvaluationReport:
|
||||
method_name = inspect.stack()[1].function
|
||||
report = EvaluationReport(name=method_name)
|
||||
for sample in protocol():
|
||||
e_sample = estimator.extend(sample)
|
||||
estim_prev = estimator.estimate(e_sample.X, ext=True)
|
||||
acc_score = qc.error.acc(estim_prev)
|
||||
f1_score = qc.error.f1(estim_prev)
|
||||
report.append_row(
|
||||
sample.prevalence(),
|
||||
acc_score=acc_score,
|
||||
acc=abs(qc.error.acc(e_sample.prevalence()) - acc_score),
|
||||
f1_score=f1_score,
|
||||
f1=abs(qc.error.f1(e_sample.prevalence()) - f1_score),
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@method
|
||||
def bin_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = BQAE(c_model, SLD(LogisticRegression())).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mul_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = MCAE(c_model, SLD(LogisticRegression())).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def binmc_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = BQAE(
|
||||
c_model,
|
||||
SLD(LogisticRegression()),
|
||||
confidence="max_conf",
|
||||
).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mulmc_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = MCAE(
|
||||
c_model,
|
||||
SLD(LogisticRegression()),
|
||||
confidence="max_conf",
|
||||
).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def binne_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = BQAE(
|
||||
c_model,
|
||||
SLD(LogisticRegression()),
|
||||
confidence="entropy",
|
||||
).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mulne_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = MCAE(
|
||||
c_model,
|
||||
SLD(LogisticRegression()),
|
||||
confidence="entropy",
|
||||
).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def bin_sld_gs(c_model, validation, protocol) -> EvaluationReport:
|
||||
v_train, v_val = validation.split_stratified(0.6, random_state=0)
|
||||
model = BQAE(c_model, SLD(LogisticRegression()))
|
||||
est = GridSearchAE(
|
||||
model=model,
|
||||
param_grid=_sld_param_grid,
|
||||
refit=False,
|
||||
protocol=UPP(v_val, repeats=100),
|
||||
verbose=True,
|
||||
).fit(v_train)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mul_sld_gs(c_model, validation, protocol) -> EvaluationReport:
|
||||
v_train, v_val = validation.split_stratified(0.6, random_state=0)
|
||||
model = MCAE(c_model, SLD(LogisticRegression()))
|
||||
est = GridSearchAE(
|
||||
model=model,
|
||||
param_grid=_sld_param_grid,
|
||||
refit=False,
|
||||
protocol=UPP(v_val, repeats=100),
|
||||
verbose=True,
|
||||
).fit(v_train)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def bin_sld_gsq(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = BQAEgsq(
|
||||
c_model,
|
||||
SLD(LogisticRegression()),
|
||||
param_grid={
|
||||
"classifier__C": np.logspace(-3, 3, 7),
|
||||
"classifier__class_weight": [None, "balanced"],
|
||||
"recalib": [None, "bcts", "vs"],
|
||||
},
|
||||
refit=False,
|
||||
verbose=False,
|
||||
).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mul_sld_gsq(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = MCAEgsq(
|
||||
c_model,
|
||||
SLD(LogisticRegression()),
|
||||
param_grid={
|
||||
"classifier__C": np.logspace(-3, 3, 7),
|
||||
"classifier__class_weight": [None, "balanced"],
|
||||
"recalib": [None, "bcts", "vs"],
|
||||
},
|
||||
refit=False,
|
||||
verbose=False,
|
||||
).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def bin_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = BQAE(c_model, PACC(LogisticRegression())).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mul_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = MCAE(c_model, PACC(LogisticRegression())).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def binmc_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = BQAE(c_model, PACC(LogisticRegression()), confidence="max_conf").fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mulmc_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = MCAE(c_model, PACC(LogisticRegression()), confidence="max_conf").fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def binne_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = BQAE(c_model, PACC(LogisticRegression()), confidence="entropy").fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mulne_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = MCAE(c_model, PACC(LogisticRegression()), confidence="entropy").fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def bin_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
|
||||
v_train, v_val = validation.split_stratified(0.6, random_state=0)
|
||||
model = BQAE(c_model, PACC(LogisticRegression()))
|
||||
est = GridSearchAE(
|
||||
model=model,
|
||||
param_grid=_pacc_param_grid,
|
||||
refit=False,
|
||||
protocol=UPP(v_val, repeats=100),
|
||||
verbose=False,
|
||||
).fit(v_train)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mul_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
|
||||
v_train, v_val = validation.split_stratified(0.6, random_state=0)
|
||||
model = MCAE(c_model, PACC(LogisticRegression()))
|
||||
est = GridSearchAE(
|
||||
model=model,
|
||||
param_grid=_pacc_param_grid,
|
||||
refit=False,
|
||||
protocol=UPP(v_val, repeats=100),
|
||||
verbose=False,
|
||||
).fit(v_train)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def bin_cc(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = BQAE(c_model, CC(LogisticRegression())).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mul_cc(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = MCAE(c_model, CC(LogisticRegression())).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
import inspect
|
||||
from functools import wraps
|
||||
|
||||
import numpy as np
|
||||
from quapy.method.aggregative import PACC, SLD, CC
|
||||
from quapy.protocol import UPP, AbstractProtocol
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import quacc as qc
|
||||
from quacc.evaluation.report import EvaluationReport
|
||||
from quacc.method.model_selection import BQAEgsq, GridSearchAE, MCAEgsq
|
||||
|
||||
from ..method.base import BQAE, MCAE, BaseAccuracyEstimator
|
||||
|
||||
_methods = {}
|
||||
_sld_param_grid = {
|
||||
"q__classifier__C": np.logspace(-3, 3, 7),
|
||||
"q__classifier__class_weight": [None, "balanced"],
|
||||
"q__recalib": [None, "bcts"],
|
||||
"q__exact_train_prev": [True],
|
||||
"confidence": [None, "max_conf", "entropy"],
|
||||
}
|
||||
_pacc_param_grid = {
|
||||
"q__classifier__C": np.logspace(-3, 3, 7),
|
||||
"q__classifier__class_weight": [None, "balanced"],
|
||||
"confidence": [None, "max_conf", "entropy"],
|
||||
}
|
||||
def method(func):
|
||||
@wraps(func)
|
||||
def wrapper(c_model, validation, protocol):
|
||||
return func(c_model, validation, protocol)
|
||||
|
||||
_methods[func.__name__] = wrapper
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def evaluation_report(
|
||||
estimator: BaseAccuracyEstimator,
|
||||
protocol: AbstractProtocol,
|
||||
) -> EvaluationReport:
|
||||
method_name = inspect.stack()[1].function
|
||||
report = EvaluationReport(name=method_name)
|
||||
for sample in protocol():
|
||||
e_sample = estimator.extend(sample)
|
||||
estim_prev = estimator.estimate(e_sample.X, ext=True)
|
||||
acc_score = qc.error.acc(estim_prev)
|
||||
f1_score = qc.error.f1(estim_prev)
|
||||
report.append_row(
|
||||
sample.prevalence(),
|
||||
acc_score=acc_score,
|
||||
acc=abs(qc.error.acc(e_sample.prevalence()) - acc_score),
|
||||
f1_score=f1_score,
|
||||
f1=abs(qc.error.f1(e_sample.prevalence()) - f1_score),
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@method
|
||||
def bin_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = BQAE(c_model, SLD(LogisticRegression())).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mul_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = MCAE(c_model, SLD(LogisticRegression())).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def binmc_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = BQAE(
|
||||
c_model,
|
||||
SLD(LogisticRegression()),
|
||||
confidence="max_conf",
|
||||
).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mulmc_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = MCAE(
|
||||
c_model,
|
||||
SLD(LogisticRegression()),
|
||||
confidence="max_conf",
|
||||
).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def binne_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = BQAE(
|
||||
c_model,
|
||||
SLD(LogisticRegression()),
|
||||
confidence="entropy",
|
||||
).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mulne_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = MCAE(
|
||||
c_model,
|
||||
SLD(LogisticRegression()),
|
||||
confidence="entropy",
|
||||
).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def bin_sld_gs(c_model, validation, protocol) -> EvaluationReport:
|
||||
v_train, v_val = validation.split_stratified(0.6, random_state=0)
|
||||
model = BQAE(c_model, SLD(LogisticRegression()))
|
||||
est = GridSearchAE(
|
||||
model=model,
|
||||
param_grid=_sld_param_grid,
|
||||
refit=False,
|
||||
protocol=UPP(v_val, repeats=100),
|
||||
verbose=True,
|
||||
).fit(v_train)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mul_sld_gs(c_model, validation, protocol) -> EvaluationReport:
|
||||
v_train, v_val = validation.split_stratified(0.6, random_state=0)
|
||||
model = MCAE(c_model, SLD(LogisticRegression()))
|
||||
est = GridSearchAE(
|
||||
model=model,
|
||||
param_grid=_sld_param_grid,
|
||||
refit=False,
|
||||
protocol=UPP(v_val, repeats=100),
|
||||
verbose=True,
|
||||
).fit(v_train)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def bin_sld_gsq(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = BQAEgsq(
|
||||
c_model,
|
||||
SLD(LogisticRegression()),
|
||||
param_grid={
|
||||
"classifier__C": np.logspace(-3, 3, 7),
|
||||
"classifier__class_weight": [None, "balanced"],
|
||||
"recalib": [None, "bcts", "vs"],
|
||||
},
|
||||
refit=False,
|
||||
verbose=False,
|
||||
).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mul_sld_gsq(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = MCAEgsq(
|
||||
c_model,
|
||||
SLD(LogisticRegression()),
|
||||
param_grid={
|
||||
"classifier__C": np.logspace(-3, 3, 7),
|
||||
"classifier__class_weight": [None, "balanced"],
|
||||
"recalib": [None, "bcts", "vs"],
|
||||
},
|
||||
refit=False,
|
||||
verbose=False,
|
||||
).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def bin_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = BQAE(c_model, PACC(LogisticRegression())).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mul_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = MCAE(c_model, PACC(LogisticRegression())).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def binmc_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = BQAE(c_model, PACC(LogisticRegression()), confidence="max_conf").fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mulmc_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = MCAE(c_model, PACC(LogisticRegression()), confidence="max_conf").fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def binne_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = BQAE(c_model, PACC(LogisticRegression()), confidence="entropy").fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mulne_pacc(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = MCAE(c_model, PACC(LogisticRegression()), confidence="entropy").fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def bin_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
|
||||
v_train, v_val = validation.split_stratified(0.6, random_state=0)
|
||||
model = BQAE(c_model, PACC(LogisticRegression()))
|
||||
est = GridSearchAE(
|
||||
model=model,
|
||||
param_grid=_pacc_param_grid,
|
||||
refit=False,
|
||||
protocol=UPP(v_val, repeats=100),
|
||||
verbose=False,
|
||||
).fit(v_train)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mul_pacc_gs(c_model, validation, protocol) -> EvaluationReport:
|
||||
v_train, v_val = validation.split_stratified(0.6, random_state=0)
|
||||
model = MCAE(c_model, PACC(LogisticRegression()))
|
||||
est = GridSearchAE(
|
||||
model=model,
|
||||
param_grid=_pacc_param_grid,
|
||||
refit=False,
|
||||
protocol=UPP(v_val, repeats=100),
|
||||
verbose=False,
|
||||
).fit(v_train)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def bin_cc(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = BQAE(c_model, CC(LogisticRegression())).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
||||
|
||||
@method
|
||||
def mul_cc(c_model, validation, protocol) -> EvaluationReport:
|
||||
est = MCAE(c_model, CC(LogisticRegression())).fit(validation)
|
||||
return evaluation_report(
|
||||
estimator=est,
|
||||
protocol=protocol,
|
||||
)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,44 +1,44 @@
|
|||
import time
|
||||
from traceback import print_exception as traceback
|
||||
|
||||
import quapy as qp
|
||||
from quapy.protocol import APP
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
from quacc.logger import SubLogger
|
||||
|
||||
|
||||
def estimate_worker(_estimate, train, validation, test, _env=None, q=None):
|
||||
qp.environ["SAMPLE_SIZE"] = _env.SAMPLE_SIZE
|
||||
SubLogger.setup(q)
|
||||
log = SubLogger.logger()
|
||||
|
||||
model = LogisticRegression()
|
||||
|
||||
model.fit(*train.Xy)
|
||||
protocol = APP(
|
||||
test,
|
||||
n_prevalences=_env.PROTOCOL_N_PREVS,
|
||||
repeats=_env.PROTOCOL_REPEATS,
|
||||
return_type="labelled_collection",
|
||||
)
|
||||
start = time.time()
|
||||
try:
|
||||
result = _estimate(model, validation, protocol)
|
||||
except Exception as e:
|
||||
log.warning(f"Method {_estimate.__name__} failed. Exception: {e}")
|
||||
traceback(e)
|
||||
return {
|
||||
"name": _estimate.__name__,
|
||||
"result": None,
|
||||
"time": 0,
|
||||
}
|
||||
|
||||
end = time.time()
|
||||
log.info(f"{_estimate.__name__} finished [took {end-start:.4f}s]")
|
||||
|
||||
return {
|
||||
"name": _estimate.__name__,
|
||||
"result": result,
|
||||
"time": end - start,
|
||||
}
|
||||
import time
|
||||
from traceback import print_exception as traceback
|
||||
|
||||
import quapy as qp
|
||||
from quapy.protocol import APP
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
from quacc.logger import SubLogger
|
||||
|
||||
|
||||
def estimate_worker(_estimate, train, validation, test, _env=None, q=None):
|
||||
qp.environ["SAMPLE_SIZE"] = _env.SAMPLE_SIZE
|
||||
SubLogger.setup(q)
|
||||
log = SubLogger.logger()
|
||||
|
||||
model = LogisticRegression()
|
||||
|
||||
model.fit(*train.Xy)
|
||||
protocol = APP(
|
||||
test,
|
||||
n_prevalences=_env.PROTOCOL_N_PREVS,
|
||||
repeats=_env.PROTOCOL_REPEATS,
|
||||
return_type="labelled_collection",
|
||||
)
|
||||
start = time.time()
|
||||
try:
|
||||
result = _estimate(model, validation, protocol)
|
||||
except Exception as e:
|
||||
log.warning(f"Method {_estimate.__name__} failed. Exception: {e}")
|
||||
traceback(e)
|
||||
return {
|
||||
"name": _estimate.__name__,
|
||||
"result": None,
|
||||
"time": 0,
|
||||
}
|
||||
|
||||
end = time.time()
|
||||
log.info(f"{_estimate.__name__} finished [took {end-start:.4f}s]")
|
||||
|
||||
return {
|
||||
"name": _estimate.__name__,
|
||||
"result": result,
|
||||
"time": end - start,
|
||||
}
|
||||
|
|
272
quacc/logger.py
272
quacc/logger.py
|
@ -1,136 +1,136 @@
|
|||
import logging
|
||||
import logging.handlers
|
||||
import multiprocessing
|
||||
import threading
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class Logger:
|
||||
__logger_file = "quacc.log"
|
||||
__logger_name = "queue_logger"
|
||||
__manager = None
|
||||
__queue = None
|
||||
__thread = None
|
||||
__setup = False
|
||||
__handlers = []
|
||||
|
||||
@classmethod
|
||||
def __logger_listener(cls, q):
|
||||
while True:
|
||||
record = q.get()
|
||||
if record is None:
|
||||
break
|
||||
root = logging.getLogger("listener")
|
||||
root.handle(record)
|
||||
|
||||
@classmethod
|
||||
def setup(cls):
|
||||
if cls.__setup:
|
||||
return
|
||||
|
||||
# setup root
|
||||
root = logging.getLogger("listener")
|
||||
root.setLevel(logging.DEBUG)
|
||||
rh = logging.FileHandler(cls.__logger_file, mode="a")
|
||||
rh.setLevel(logging.DEBUG)
|
||||
root.addHandler(rh)
|
||||
|
||||
# setup logger
|
||||
if cls.__manager is None:
|
||||
cls.__manager = multiprocessing.Manager()
|
||||
|
||||
if cls.__queue is None:
|
||||
cls.__queue = cls.__manager.Queue()
|
||||
|
||||
logger = logging.getLogger(cls.__logger_name)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
qh = logging.handlers.QueueHandler(cls.__queue)
|
||||
qh.setLevel(logging.DEBUG)
|
||||
qh.setFormatter(
|
||||
logging.Formatter(
|
||||
fmt="%(asctime)s| %(levelname)-8s %(message)s",
|
||||
datefmt="%d/%m/%y %H:%M:%S",
|
||||
)
|
||||
)
|
||||
logger.addHandler(qh)
|
||||
|
||||
# start listener
|
||||
cls.__thread = threading.Thread(
|
||||
target=cls.__logger_listener,
|
||||
args=(cls.__queue,),
|
||||
)
|
||||
cls.__thread.start()
|
||||
|
||||
cls.__setup = True
|
||||
|
||||
@classmethod
|
||||
def add_handler(cls, path: Path):
|
||||
root = logging.getLogger("listener")
|
||||
rh = logging.FileHandler(path, mode="a")
|
||||
rh.setLevel(logging.DEBUG)
|
||||
cls.__handlers.append(rh)
|
||||
root.addHandler(rh)
|
||||
|
||||
@classmethod
|
||||
def clear_handlers(cls):
|
||||
root = logging.getLogger("listener")
|
||||
for h in cls.__handlers:
|
||||
root.removeHandler(h)
|
||||
cls.__handlers.clear()
|
||||
|
||||
@classmethod
|
||||
def queue(cls):
|
||||
if not cls.__setup:
|
||||
cls.setup()
|
||||
|
||||
return cls.__queue
|
||||
|
||||
@classmethod
|
||||
def logger(cls):
|
||||
if not cls.__setup:
|
||||
cls.setup()
|
||||
|
||||
return logging.getLogger(cls.__logger_name)
|
||||
|
||||
@classmethod
|
||||
def close(cls):
|
||||
if cls.__setup and cls.__thread is not None:
|
||||
root = logging.getLogger("listener")
|
||||
root.info("-" * 100)
|
||||
cls.__queue.put(None)
|
||||
cls.__thread.join()
|
||||
# cls.__manager.close()
|
||||
|
||||
|
||||
class SubLogger:
|
||||
__queue = None
|
||||
__setup = False
|
||||
|
||||
@classmethod
|
||||
def setup(cls, q):
|
||||
if cls.__setup:
|
||||
return
|
||||
|
||||
cls.__queue = q
|
||||
|
||||
# setup root
|
||||
root = logging.getLogger()
|
||||
root.setLevel(logging.DEBUG)
|
||||
rh = logging.handlers.QueueHandler(q)
|
||||
rh.setLevel(logging.DEBUG)
|
||||
rh.setFormatter(
|
||||
logging.Formatter(
|
||||
fmt="%(asctime)s| %(levelname)-12s%(message)s",
|
||||
datefmt="%d/%m/%y %H:%M:%S",
|
||||
)
|
||||
)
|
||||
root.addHandler(rh)
|
||||
|
||||
cls.__setup = True
|
||||
|
||||
@classmethod
|
||||
def logger(cls):
|
||||
if not cls.__setup:
|
||||
return None
|
||||
|
||||
return logging.getLogger()
|
||||
import logging
|
||||
import logging.handlers
|
||||
import multiprocessing
|
||||
import threading
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class Logger:
|
||||
__logger_file = "quacc.log"
|
||||
__logger_name = "queue_logger"
|
||||
__manager = None
|
||||
__queue = None
|
||||
__thread = None
|
||||
__setup = False
|
||||
__handlers = []
|
||||
|
||||
@classmethod
|
||||
def __logger_listener(cls, q):
|
||||
while True:
|
||||
record = q.get()
|
||||
if record is None:
|
||||
break
|
||||
root = logging.getLogger("listener")
|
||||
root.handle(record)
|
||||
|
||||
@classmethod
|
||||
def setup(cls):
|
||||
if cls.__setup:
|
||||
return
|
||||
|
||||
# setup root
|
||||
root = logging.getLogger("listener")
|
||||
root.setLevel(logging.DEBUG)
|
||||
rh = logging.FileHandler(cls.__logger_file, mode="a")
|
||||
rh.setLevel(logging.DEBUG)
|
||||
root.addHandler(rh)
|
||||
|
||||
# setup logger
|
||||
if cls.__manager is None:
|
||||
cls.__manager = multiprocessing.Manager()
|
||||
|
||||
if cls.__queue is None:
|
||||
cls.__queue = cls.__manager.Queue()
|
||||
|
||||
logger = logging.getLogger(cls.__logger_name)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
qh = logging.handlers.QueueHandler(cls.__queue)
|
||||
qh.setLevel(logging.DEBUG)
|
||||
qh.setFormatter(
|
||||
logging.Formatter(
|
||||
fmt="%(asctime)s| %(levelname)-8s %(message)s",
|
||||
datefmt="%d/%m/%y %H:%M:%S",
|
||||
)
|
||||
)
|
||||
logger.addHandler(qh)
|
||||
|
||||
# start listener
|
||||
cls.__thread = threading.Thread(
|
||||
target=cls.__logger_listener,
|
||||
args=(cls.__queue,),
|
||||
)
|
||||
cls.__thread.start()
|
||||
|
||||
cls.__setup = True
|
||||
|
||||
@classmethod
|
||||
def add_handler(cls, path: Path):
|
||||
root = logging.getLogger("listener")
|
||||
rh = logging.FileHandler(path, mode="a")
|
||||
rh.setLevel(logging.DEBUG)
|
||||
cls.__handlers.append(rh)
|
||||
root.addHandler(rh)
|
||||
|
||||
@classmethod
|
||||
def clear_handlers(cls):
|
||||
root = logging.getLogger("listener")
|
||||
for h in cls.__handlers:
|
||||
root.removeHandler(h)
|
||||
cls.__handlers.clear()
|
||||
|
||||
@classmethod
|
||||
def queue(cls):
|
||||
if not cls.__setup:
|
||||
cls.setup()
|
||||
|
||||
return cls.__queue
|
||||
|
||||
@classmethod
|
||||
def logger(cls):
|
||||
if not cls.__setup:
|
||||
cls.setup()
|
||||
|
||||
return logging.getLogger(cls.__logger_name)
|
||||
|
||||
@classmethod
|
||||
def close(cls):
|
||||
if cls.__setup and cls.__thread is not None:
|
||||
root = logging.getLogger("listener")
|
||||
root.info("-" * 100)
|
||||
cls.__queue.put(None)
|
||||
cls.__thread.join()
|
||||
# cls.__manager.close()
|
||||
|
||||
|
||||
class SubLogger:
|
||||
__queue = None
|
||||
__setup = False
|
||||
|
||||
@classmethod
|
||||
def setup(cls, q):
|
||||
if cls.__setup:
|
||||
return
|
||||
|
||||
cls.__queue = q
|
||||
|
||||
# setup root
|
||||
root = logging.getLogger()
|
||||
root.setLevel(logging.DEBUG)
|
||||
rh = logging.handlers.QueueHandler(q)
|
||||
rh.setLevel(logging.DEBUG)
|
||||
rh.setFormatter(
|
||||
logging.Formatter(
|
||||
fmt="%(asctime)s| %(levelname)-12s%(message)s",
|
||||
datefmt="%d/%m/%y %H:%M:%S",
|
||||
)
|
||||
)
|
||||
root.addHandler(rh)
|
||||
|
||||
cls.__setup = True
|
||||
|
||||
@classmethod
|
||||
def logger(cls):
|
||||
if not cls.__setup:
|
||||
return None
|
||||
|
||||
return logging.getLogger()
|
||||
|
|
150
quacc/main.py
150
quacc/main.py
|
@ -1,75 +1,75 @@
|
|||
from sys import platform
|
||||
from traceback import print_exception as traceback
|
||||
|
||||
import quacc.evaluation.comp as comp
|
||||
from quacc.dataset import Dataset
|
||||
from quacc.environment import env
|
||||
from quacc.logger import Logger
|
||||
from quacc.utils import create_dataser_dir
|
||||
|
||||
CE = comp.CompEstimator()
|
||||
|
||||
|
||||
def toast():
|
||||
if platform == "win32":
|
||||
import win11toast
|
||||
|
||||
win11toast.notify("Comp", "Completed Execution")
|
||||
|
||||
|
||||
def estimate_comparison():
|
||||
log = Logger.logger()
|
||||
for conf in env.get_confs():
|
||||
dataset = Dataset(
|
||||
env.DATASET_NAME,
|
||||
target=env.DATASET_TARGET,
|
||||
n_prevalences=env.DATASET_N_PREVS,
|
||||
prevs=env.DATASET_PREVS,
|
||||
)
|
||||
create_dataser_dir(dataset.name, update=env.DATASET_DIR_UPDATE)
|
||||
Logger.add_handler(env.OUT_DIR / f"{dataset.name}.log")
|
||||
try:
|
||||
dr = comp.evaluate_comparison(
|
||||
dataset,
|
||||
estimators=CE.name[env.COMP_ESTIMATORS],
|
||||
)
|
||||
except Exception as e:
|
||||
log.error(f"Evaluation over {dataset.name} failed. Exception: {e}")
|
||||
traceback(e)
|
||||
for plot_conf in env.get_plot_confs():
|
||||
for m in env.METRICS:
|
||||
output_path = env.OUT_DIR / f"{plot_conf}_{m}.md"
|
||||
try:
|
||||
_repr = dr.to_md(
|
||||
conf=plot_conf,
|
||||
metric=m,
|
||||
estimators=CE.name[env.PLOT_ESTIMATORS],
|
||||
stdev=env.PLOT_STDEV,
|
||||
)
|
||||
with open(output_path, "w") as f:
|
||||
f.write(_repr)
|
||||
except Exception as e:
|
||||
log.error(
|
||||
f"Failed while saving configuration {plot_conf} of {dataset.name}. Exception: {e}"
|
||||
)
|
||||
traceback(e)
|
||||
Logger.clear_handlers()
|
||||
|
||||
# print(df.to_latex(float_format="{:.4f}".format))
|
||||
# print(utils.avg_group_report(df).to_latex(float_format="{:.4f}".format))
|
||||
|
||||
|
||||
def main():
|
||||
log = Logger.logger()
|
||||
try:
|
||||
estimate_comparison()
|
||||
except Exception as e:
|
||||
log.error(f"estimate comparison failed. Exceprion: {e}")
|
||||
traceback(e)
|
||||
|
||||
toast()
|
||||
Logger.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
from sys import platform
|
||||
from traceback import print_exception as traceback
|
||||
|
||||
import quacc.evaluation.comp as comp
|
||||
from quacc.dataset import Dataset
|
||||
from quacc.environment import env
|
||||
from quacc.logger import Logger
|
||||
from quacc.utils import create_dataser_dir
|
||||
|
||||
CE = comp.CompEstimator()
|
||||
|
||||
|
||||
def toast():
|
||||
if platform == "win32":
|
||||
import win11toast
|
||||
|
||||
win11toast.notify("Comp", "Completed Execution")
|
||||
|
||||
|
||||
def estimate_comparison():
|
||||
log = Logger.logger()
|
||||
for conf in env.get_confs():
|
||||
dataset = Dataset(
|
||||
env.DATASET_NAME,
|
||||
target=env.DATASET_TARGET,
|
||||
n_prevalences=env.DATASET_N_PREVS,
|
||||
prevs=env.DATASET_PREVS,
|
||||
)
|
||||
create_dataser_dir(dataset.name, update=env.DATASET_DIR_UPDATE)
|
||||
Logger.add_handler(env.OUT_DIR / f"{dataset.name}.log")
|
||||
try:
|
||||
dr = comp.evaluate_comparison(
|
||||
dataset,
|
||||
estimators=CE.name[env.COMP_ESTIMATORS],
|
||||
)
|
||||
except Exception as e:
|
||||
log.error(f"Evaluation over {dataset.name} failed. Exception: {e}")
|
||||
traceback(e)
|
||||
for plot_conf in env.get_plot_confs():
|
||||
for m in env.METRICS:
|
||||
output_path = env.OUT_DIR / f"{plot_conf}_{m}.md"
|
||||
try:
|
||||
_repr = dr.to_md(
|
||||
conf=plot_conf,
|
||||
metric=m,
|
||||
estimators=CE.name[env.PLOT_ESTIMATORS],
|
||||
stdev=env.PLOT_STDEV,
|
||||
)
|
||||
with open(output_path, "w") as f:
|
||||
f.write(_repr)
|
||||
except Exception as e:
|
||||
log.error(
|
||||
f"Failed while saving configuration {plot_conf} of {dataset.name}. Exception: {e}"
|
||||
)
|
||||
traceback(e)
|
||||
Logger.clear_handlers()
|
||||
|
||||
# print(df.to_latex(float_format="{:.4f}".format))
|
||||
# print(utils.avg_group_report(df).to_latex(float_format="{:.4f}".format))
|
||||
|
||||
|
||||
def main():
|
||||
log = Logger.logger()
|
||||
try:
|
||||
estimate_comparison()
|
||||
except Exception as e:
|
||||
log.error(f"estimate comparison failed. Exceprion: {e}")
|
||||
traceback(e)
|
||||
|
||||
toast()
|
||||
Logger.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -1,120 +1,120 @@
|
|||
from copy import deepcopy
|
||||
from time import time
|
||||
|
||||
import numpy as np
|
||||
import win11toast
|
||||
from quapy.method.aggregative import SLD
|
||||
from quapy.protocol import APP, UPP
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import quacc as qc
|
||||
from quacc.dataset import Dataset
|
||||
from quacc.error import acc
|
||||
from quacc.evaluation.baseline import ref
|
||||
from quacc.evaluation.method import mulmc_sld
|
||||
from quacc.evaluation.report import CompReport, EvaluationReport
|
||||
from quacc.method.base import MCAE, BinaryQuantifierAccuracyEstimator
|
||||
from quacc.method.model_selection import GridSearchAE
|
||||
|
||||
|
||||
def test_gs():
|
||||
d = Dataset(name="rcv1", target="CCAT", n_prevalences=1).get_raw()
|
||||
|
||||
classifier = LogisticRegression()
|
||||
classifier.fit(*d.train.Xy)
|
||||
|
||||
quantifier = SLD(LogisticRegression())
|
||||
# estimator = MultiClassAccuracyEstimator(classifier, quantifier)
|
||||
estimator = BinaryQuantifierAccuracyEstimator(classifier, quantifier)
|
||||
|
||||
v_train, v_val = d.validation.split_stratified(0.6, random_state=0)
|
||||
gs_protocol = UPP(v_val, sample_size=1000, repeats=100)
|
||||
gs_estimator = GridSearchAE(
|
||||
model=deepcopy(estimator),
|
||||
param_grid={
|
||||
"q__classifier__C": np.logspace(-3, 3, 7),
|
||||
"q__classifier__class_weight": [None, "balanced"],
|
||||
"q__recalib": [None, "bcts", "ts"],
|
||||
},
|
||||
refit=False,
|
||||
protocol=gs_protocol,
|
||||
verbose=True,
|
||||
).fit(v_train)
|
||||
|
||||
estimator.fit(d.validation)
|
||||
|
||||
tstart = time()
|
||||
erb, ergs = EvaluationReport("base"), EvaluationReport("gs")
|
||||
protocol = APP(
|
||||
d.test,
|
||||
sample_size=1000,
|
||||
n_prevalences=21,
|
||||
repeats=100,
|
||||
return_type="labelled_collection",
|
||||
)
|
||||
for sample in protocol():
|
||||
e_sample = gs_estimator.extend(sample)
|
||||
estim_prev_b = estimator.estimate(e_sample.X, ext=True)
|
||||
estim_prev_gs = gs_estimator.estimate(e_sample.X, ext=True)
|
||||
erb.append_row(
|
||||
sample.prevalence(),
|
||||
acc=abs(acc(e_sample.prevalence()) - acc(estim_prev_b)),
|
||||
)
|
||||
ergs.append_row(
|
||||
sample.prevalence(),
|
||||
acc=abs(acc(e_sample.prevalence()) - acc(estim_prev_gs)),
|
||||
)
|
||||
|
||||
cr = CompReport(
|
||||
[erb, ergs],
|
||||
"test",
|
||||
train_prev=d.train_prev,
|
||||
valid_prev=d.validation_prev,
|
||||
)
|
||||
|
||||
print(cr.table())
|
||||
print(f"[took {time() - tstart:.3f}s]")
|
||||
win11toast.notify("Test", "completed")
|
||||
|
||||
|
||||
def test_mc():
|
||||
d = Dataset(name="rcv1", target="CCAT", prevs=[0.9]).get()[0]
|
||||
classifier = LogisticRegression().fit(*d.train.Xy)
|
||||
protocol = APP(
|
||||
d.test,
|
||||
sample_size=1000,
|
||||
repeats=100,
|
||||
n_prevalences=21,
|
||||
return_type="labelled_collection",
|
||||
)
|
||||
|
||||
ref_er = ref(classifier, d.validation, protocol)
|
||||
mulmc_er = mulmc_sld(classifier, d.validation, protocol)
|
||||
|
||||
cr = CompReport(
|
||||
[mulmc_er, ref_er],
|
||||
name="test_mc",
|
||||
train_prev=d.train_prev,
|
||||
valid_prev=d.validation_prev,
|
||||
)
|
||||
|
||||
with open("test_mc.md", "w") as f:
|
||||
f.write(cr.data().to_markdown())
|
||||
|
||||
|
||||
def test_et():
|
||||
d = Dataset(name="imdb", prevs=[0.5]).get()[0]
|
||||
classifier = LogisticRegression().fit(*d.train.Xy)
|
||||
estimator = MCAE(
|
||||
classifier,
|
||||
SLD(LogisticRegression(), exact_train_prev=False),
|
||||
confidence="max_conf",
|
||||
).fit(d.validation)
|
||||
e_test = estimator.extend(d.test)
|
||||
ep = estimator.estimate(e_test.X, ext=True)
|
||||
print(f"{qc.error.acc(ep) = }")
|
||||
print(f"{qc.error.acc(e_test.prevalence()) = }")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_et()
|
||||
from copy import deepcopy
|
||||
from time import time
|
||||
|
||||
import numpy as np
|
||||
import win11toast
|
||||
from quapy.method.aggregative import SLD
|
||||
from quapy.protocol import APP, UPP
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import quacc as qc
|
||||
from quacc.dataset import Dataset
|
||||
from quacc.error import acc
|
||||
from quacc.evaluation.baseline import ref
|
||||
from quacc.evaluation.method import mulmc_sld
|
||||
from quacc.evaluation.report import CompReport, EvaluationReport
|
||||
from quacc.method.base import MCAE, BinaryQuantifierAccuracyEstimator
|
||||
from quacc.method.model_selection import GridSearchAE
|
||||
|
||||
|
||||
def test_gs():
|
||||
d = Dataset(name="rcv1", target="CCAT", n_prevalences=1).get_raw()
|
||||
|
||||
classifier = LogisticRegression()
|
||||
classifier.fit(*d.train.Xy)
|
||||
|
||||
quantifier = SLD(LogisticRegression())
|
||||
# estimator = MultiClassAccuracyEstimator(classifier, quantifier)
|
||||
estimator = BinaryQuantifierAccuracyEstimator(classifier, quantifier)
|
||||
|
||||
v_train, v_val = d.validation.split_stratified(0.6, random_state=0)
|
||||
gs_protocol = UPP(v_val, sample_size=1000, repeats=100)
|
||||
gs_estimator = GridSearchAE(
|
||||
model=deepcopy(estimator),
|
||||
param_grid={
|
||||
"q__classifier__C": np.logspace(-3, 3, 7),
|
||||
"q__classifier__class_weight": [None, "balanced"],
|
||||
"q__recalib": [None, "bcts", "ts"],
|
||||
},
|
||||
refit=False,
|
||||
protocol=gs_protocol,
|
||||
verbose=True,
|
||||
).fit(v_train)
|
||||
|
||||
estimator.fit(d.validation)
|
||||
|
||||
tstart = time()
|
||||
erb, ergs = EvaluationReport("base"), EvaluationReport("gs")
|
||||
protocol = APP(
|
||||
d.test,
|
||||
sample_size=1000,
|
||||
n_prevalences=21,
|
||||
repeats=100,
|
||||
return_type="labelled_collection",
|
||||
)
|
||||
for sample in protocol():
|
||||
e_sample = gs_estimator.extend(sample)
|
||||
estim_prev_b = estimator.estimate(e_sample.X, ext=True)
|
||||
estim_prev_gs = gs_estimator.estimate(e_sample.X, ext=True)
|
||||
erb.append_row(
|
||||
sample.prevalence(),
|
||||
acc=abs(acc(e_sample.prevalence()) - acc(estim_prev_b)),
|
||||
)
|
||||
ergs.append_row(
|
||||
sample.prevalence(),
|
||||
acc=abs(acc(e_sample.prevalence()) - acc(estim_prev_gs)),
|
||||
)
|
||||
|
||||
cr = CompReport(
|
||||
[erb, ergs],
|
||||
"test",
|
||||
train_prev=d.train_prev,
|
||||
valid_prev=d.validation_prev,
|
||||
)
|
||||
|
||||
print(cr.table())
|
||||
print(f"[took {time() - tstart:.3f}s]")
|
||||
win11toast.notify("Test", "completed")
|
||||
|
||||
|
||||
def test_mc():
|
||||
d = Dataset(name="rcv1", target="CCAT", prevs=[0.9]).get()[0]
|
||||
classifier = LogisticRegression().fit(*d.train.Xy)
|
||||
protocol = APP(
|
||||
d.test,
|
||||
sample_size=1000,
|
||||
repeats=100,
|
||||
n_prevalences=21,
|
||||
return_type="labelled_collection",
|
||||
)
|
||||
|
||||
ref_er = ref(classifier, d.validation, protocol)
|
||||
mulmc_er = mulmc_sld(classifier, d.validation, protocol)
|
||||
|
||||
cr = CompReport(
|
||||
[mulmc_er, ref_er],
|
||||
name="test_mc",
|
||||
train_prev=d.train_prev,
|
||||
valid_prev=d.validation_prev,
|
||||
)
|
||||
|
||||
with open("test_mc.md", "w") as f:
|
||||
f.write(cr.data().to_markdown())
|
||||
|
||||
|
||||
def test_et():
|
||||
d = Dataset(name="imdb", prevs=[0.5]).get()[0]
|
||||
classifier = LogisticRegression().fit(*d.train.Xy)
|
||||
estimator = MCAE(
|
||||
classifier,
|
||||
SLD(LogisticRegression(), exact_train_prev=False),
|
||||
confidence="max_conf",
|
||||
).fit(d.validation)
|
||||
e_test = estimator.extend(d.test)
|
||||
ep = estimator.estimate(e_test.X, ext=True)
|
||||
print(f"{qc.error.acc(ep) = }")
|
||||
print(f"{qc.error.acc(e_test.prevalence()) = }")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_et()
|
||||
|
|
|
@ -1,177 +1,177 @@
|
|||
import math
|
||||
from abc import abstractmethod
|
||||
from copy import deepcopy
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
from quapy.data import LabelledCollection
|
||||
from quapy.method.aggregative import BaseQuantifier
|
||||
from scipy.sparse import csr_matrix
|
||||
from sklearn.base import BaseEstimator
|
||||
|
||||
from quacc.data import ExtendedCollection
|
||||
|
||||
|
||||
class BaseAccuracyEstimator(BaseQuantifier):
|
||||
def __init__(
|
||||
self,
|
||||
classifier: BaseEstimator,
|
||||
quantifier: BaseQuantifier,
|
||||
confidence=None,
|
||||
):
|
||||
self.__check_classifier(classifier)
|
||||
self.quantifier = quantifier
|
||||
self.confidence = confidence
|
||||
|
||||
def __check_classifier(self, classifier):
|
||||
if not hasattr(classifier, "predict_proba"):
|
||||
raise ValueError(
|
||||
f"Passed classifier {classifier.__class__.__name__} cannot predict probabilities."
|
||||
)
|
||||
self.classifier = classifier
|
||||
|
||||
def __get_confidence(self):
|
||||
def max_conf(probas):
|
||||
_mc = np.max(probas, axis=-1)
|
||||
_min = 1.0 / probas.shape[1]
|
||||
_norm_mc = (_mc - _min) / (1.0 - _min)
|
||||
return _norm_mc
|
||||
|
||||
def entropy(probas):
|
||||
_ent = np.sum(np.multiply(probas, np.log(probas + 1e-20)), axis=1)
|
||||
return _ent
|
||||
|
||||
if self.confidence is None:
|
||||
return None
|
||||
|
||||
__confs = {
|
||||
"max_conf": max_conf,
|
||||
"entropy": entropy,
|
||||
}
|
||||
return __confs.get(self.confidence, None)
|
||||
|
||||
def __get_ext(self, pred_proba):
|
||||
_ext = pred_proba
|
||||
_f_conf = self.__get_confidence()
|
||||
if _f_conf is not None:
|
||||
_confs = _f_conf(pred_proba).reshape((len(pred_proba), 1))
|
||||
_ext = np.concatenate((_confs, pred_proba), axis=1)
|
||||
|
||||
return _ext
|
||||
|
||||
def extend(self, coll: LabelledCollection, pred_proba=None) -> ExtendedCollection:
|
||||
if pred_proba is None:
|
||||
pred_proba = self.classifier.predict_proba(coll.X)
|
||||
|
||||
_ext = self.__get_ext(pred_proba)
|
||||
return ExtendedCollection.extend_collection(coll, pred_proba=_ext)
|
||||
|
||||
def _extend_instances(self, instances: np.ndarray | csr_matrix, pred_proba=None):
|
||||
if pred_proba is None:
|
||||
pred_proba = self.classifier.predict_proba(instances)
|
||||
|
||||
_ext = self.__get_ext(pred_proba)
|
||||
return ExtendedCollection.extend_instances(instances, _ext)
|
||||
|
||||
@abstractmethod
|
||||
def fit(self, train: LabelledCollection | ExtendedCollection):
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def estimate(self, instances, ext=False) -> np.ndarray:
|
||||
...
|
||||
|
||||
|
||||
class MultiClassAccuracyEstimator(BaseAccuracyEstimator):
|
||||
def __init__(
|
||||
self,
|
||||
classifier: BaseEstimator,
|
||||
quantifier: BaseQuantifier,
|
||||
confidence: str = None,
|
||||
):
|
||||
super().__init__(
|
||||
classifier=classifier,
|
||||
quantifier=quantifier,
|
||||
confidence=confidence,
|
||||
)
|
||||
self.e_train = None
|
||||
|
||||
def fit(self, train: LabelledCollection):
|
||||
self.e_train = self.extend(train)
|
||||
|
||||
self.quantifier.fit(self.e_train)
|
||||
|
||||
return self
|
||||
|
||||
def estimate(self, instances, ext=False) -> np.ndarray:
|
||||
e_inst = instances if ext else self._extend_instances(instances)
|
||||
|
||||
estim_prev = self.quantifier.quantify(e_inst)
|
||||
return self._check_prevalence_classes(estim_prev, self.quantifier.classes_)
|
||||
|
||||
def _check_prevalence_classes(self, estim_prev, estim_classes) -> np.ndarray:
|
||||
true_classes = self.e_train.classes_
|
||||
for _cls in true_classes:
|
||||
if _cls not in estim_classes:
|
||||
estim_prev = np.insert(estim_prev, _cls, [0.0], axis=0)
|
||||
return estim_prev
|
||||
|
||||
|
||||
class BinaryQuantifierAccuracyEstimator(BaseAccuracyEstimator):
|
||||
def __init__(
|
||||
self,
|
||||
classifier: BaseEstimator,
|
||||
quantifier: BaseAccuracyEstimator,
|
||||
confidence: str = None,
|
||||
):
|
||||
super().__init__(
|
||||
classifier=classifier,
|
||||
quantifier=quantifier,
|
||||
confidence=confidence,
|
||||
)
|
||||
self.quantifiers = []
|
||||
self.e_trains = []
|
||||
|
||||
def fit(self, train: LabelledCollection | ExtendedCollection):
|
||||
self.e_train = self.extend(train)
|
||||
|
||||
self.n_classes = self.e_train.n_classes
|
||||
self.e_trains = self.e_train.split_by_pred()
|
||||
|
||||
self.quantifiers = []
|
||||
for train in self.e_trains:
|
||||
quant = deepcopy(self.quantifier)
|
||||
quant.fit(train)
|
||||
self.quantifiers.append(quant)
|
||||
|
||||
return self
|
||||
|
||||
def estimate(self, instances, ext=False):
|
||||
# TODO: test
|
||||
e_inst = instances if ext else self._extend_instances(instances)
|
||||
|
||||
_ncl = int(math.sqrt(self.n_classes))
|
||||
s_inst, norms = ExtendedCollection.split_inst_by_pred(_ncl, e_inst)
|
||||
estim_prevs = self._quantify_helper(s_inst, norms)
|
||||
|
||||
estim_prev = np.array([prev_row for prev_row in zip(*estim_prevs)]).flatten()
|
||||
return estim_prev
|
||||
|
||||
def _quantify_helper(
|
||||
self,
|
||||
s_inst: List[np.ndarray | csr_matrix],
|
||||
norms: List[float],
|
||||
):
|
||||
estim_prevs = []
|
||||
for quant, inst, norm in zip(self.quantifiers, s_inst, norms):
|
||||
if inst.shape[0] > 0:
|
||||
estim_prevs.append(quant.quantify(inst) * norm)
|
||||
else:
|
||||
estim_prevs.append(np.asarray([0.0, 0.0]))
|
||||
|
||||
return estim_prevs
|
||||
|
||||
|
||||
BAE = BaseAccuracyEstimator
|
||||
MCAE = MultiClassAccuracyEstimator
|
||||
BQAE = BinaryQuantifierAccuracyEstimator
|
||||
import math
|
||||
from abc import abstractmethod
|
||||
from copy import deepcopy
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
from quapy.data import LabelledCollection
|
||||
from quapy.method.aggregative import BaseQuantifier
|
||||
from scipy.sparse import csr_matrix
|
||||
from sklearn.base import BaseEstimator
|
||||
|
||||
from quacc.data import ExtendedCollection
|
||||
|
||||
|
||||
class BaseAccuracyEstimator(BaseQuantifier):
|
||||
def __init__(
|
||||
self,
|
||||
classifier: BaseEstimator,
|
||||
quantifier: BaseQuantifier,
|
||||
confidence=None,
|
||||
):
|
||||
self.__check_classifier(classifier)
|
||||
self.quantifier = quantifier
|
||||
self.confidence = confidence
|
||||
|
||||
def __check_classifier(self, classifier):
|
||||
if not hasattr(classifier, "predict_proba"):
|
||||
raise ValueError(
|
||||
f"Passed classifier {classifier.__class__.__name__} cannot predict probabilities."
|
||||
)
|
||||
self.classifier = classifier
|
||||
|
||||
def __get_confidence(self):
|
||||
def max_conf(probas):
|
||||
_mc = np.max(probas, axis=-1)
|
||||
_min = 1.0 / probas.shape[1]
|
||||
_norm_mc = (_mc - _min) / (1.0 - _min)
|
||||
return _norm_mc
|
||||
|
||||
def entropy(probas):
|
||||
_ent = np.sum(np.multiply(probas, np.log(probas + 1e-20)), axis=1)
|
||||
return _ent
|
||||
|
||||
if self.confidence is None:
|
||||
return None
|
||||
|
||||
__confs = {
|
||||
"max_conf": max_conf,
|
||||
"entropy": entropy,
|
||||
}
|
||||
return __confs.get(self.confidence, None)
|
||||
|
||||
def __get_ext(self, pred_proba):
|
||||
_ext = pred_proba
|
||||
_f_conf = self.__get_confidence()
|
||||
if _f_conf is not None:
|
||||
_confs = _f_conf(pred_proba).reshape((len(pred_proba), 1))
|
||||
_ext = np.concatenate((_confs, pred_proba), axis=1)
|
||||
|
||||
return _ext
|
||||
|
||||
def extend(self, coll: LabelledCollection, pred_proba=None) -> ExtendedCollection:
|
||||
if pred_proba is None:
|
||||
pred_proba = self.classifier.predict_proba(coll.X)
|
||||
|
||||
_ext = self.__get_ext(pred_proba)
|
||||
return ExtendedCollection.extend_collection(coll, pred_proba=_ext)
|
||||
|
||||
def _extend_instances(self, instances: np.ndarray | csr_matrix, pred_proba=None):
|
||||
if pred_proba is None:
|
||||
pred_proba = self.classifier.predict_proba(instances)
|
||||
|
||||
_ext = self.__get_ext(pred_proba)
|
||||
return ExtendedCollection.extend_instances(instances, _ext)
|
||||
|
||||
@abstractmethod
|
||||
def fit(self, train: LabelledCollection | ExtendedCollection):
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def estimate(self, instances, ext=False) -> np.ndarray:
|
||||
...
|
||||
|
||||
|
||||
class MultiClassAccuracyEstimator(BaseAccuracyEstimator):
|
||||
def __init__(
|
||||
self,
|
||||
classifier: BaseEstimator,
|
||||
quantifier: BaseQuantifier,
|
||||
confidence: str = None,
|
||||
):
|
||||
super().__init__(
|
||||
classifier=classifier,
|
||||
quantifier=quantifier,
|
||||
confidence=confidence,
|
||||
)
|
||||
self.e_train = None
|
||||
|
||||
def fit(self, train: LabelledCollection):
|
||||
self.e_train = self.extend(train)
|
||||
|
||||
self.quantifier.fit(self.e_train)
|
||||
|
||||
return self
|
||||
|
||||
def estimate(self, instances, ext=False) -> np.ndarray:
|
||||
e_inst = instances if ext else self._extend_instances(instances)
|
||||
|
||||
estim_prev = self.quantifier.quantify(e_inst)
|
||||
return self._check_prevalence_classes(estim_prev, self.quantifier.classes_)
|
||||
|
||||
def _check_prevalence_classes(self, estim_prev, estim_classes) -> np.ndarray:
|
||||
true_classes = self.e_train.classes_
|
||||
for _cls in true_classes:
|
||||
if _cls not in estim_classes:
|
||||
estim_prev = np.insert(estim_prev, _cls, [0.0], axis=0)
|
||||
return estim_prev
|
||||
|
||||
|
||||
class BinaryQuantifierAccuracyEstimator(BaseAccuracyEstimator):
|
||||
def __init__(
|
||||
self,
|
||||
classifier: BaseEstimator,
|
||||
quantifier: BaseAccuracyEstimator,
|
||||
confidence: str = None,
|
||||
):
|
||||
super().__init__(
|
||||
classifier=classifier,
|
||||
quantifier=quantifier,
|
||||
confidence=confidence,
|
||||
)
|
||||
self.quantifiers = []
|
||||
self.e_trains = []
|
||||
|
||||
def fit(self, train: LabelledCollection | ExtendedCollection):
|
||||
self.e_train = self.extend(train)
|
||||
|
||||
self.n_classes = self.e_train.n_classes
|
||||
self.e_trains = self.e_train.split_by_pred()
|
||||
|
||||
self.quantifiers = []
|
||||
for train in self.e_trains:
|
||||
quant = deepcopy(self.quantifier)
|
||||
quant.fit(train)
|
||||
self.quantifiers.append(quant)
|
||||
|
||||
return self
|
||||
|
||||
def estimate(self, instances, ext=False):
|
||||
# TODO: test
|
||||
e_inst = instances if ext else self._extend_instances(instances)
|
||||
|
||||
_ncl = int(math.sqrt(self.n_classes))
|
||||
s_inst, norms = ExtendedCollection.split_inst_by_pred(_ncl, e_inst)
|
||||
estim_prevs = self._quantify_helper(s_inst, norms)
|
||||
|
||||
estim_prev = np.array([prev_row for prev_row in zip(*estim_prevs)]).flatten()
|
||||
return estim_prev
|
||||
|
||||
def _quantify_helper(
|
||||
self,
|
||||
s_inst: List[np.ndarray | csr_matrix],
|
||||
norms: List[float],
|
||||
):
|
||||
estim_prevs = []
|
||||
for quant, inst, norm in zip(self.quantifiers, s_inst, norms):
|
||||
if inst.shape[0] > 0:
|
||||
estim_prevs.append(quant.quantify(inst) * norm)
|
||||
else:
|
||||
estim_prevs.append(np.asarray([0.0, 0.0]))
|
||||
|
||||
return estim_prevs
|
||||
|
||||
|
||||
BAE = BaseAccuracyEstimator
|
||||
MCAE = MultiClassAccuracyEstimator
|
||||
BQAE = BinaryQuantifierAccuracyEstimator
|
||||
|
|
|
@ -1,307 +1,307 @@
|
|||
import itertools
|
||||
from copy import deepcopy
|
||||
from time import time
|
||||
from typing import Callable, Union
|
||||
import numpy as np
|
||||
|
||||
import quapy as qp
|
||||
from quapy.data import LabelledCollection
|
||||
from quapy.model_selection import GridSearchQ
|
||||
from quapy.protocol import UPP, AbstractProtocol, OnLabelledCollectionProtocol
|
||||
from sklearn.base import BaseEstimator
|
||||
|
||||
import quacc as qc
|
||||
import quacc.error
|
||||
from quacc.data import ExtendedCollection
|
||||
from quacc.evaluation import evaluate
|
||||
from quacc.logger import SubLogger
|
||||
from quacc.method.base import (
|
||||
BaseAccuracyEstimator,
|
||||
BinaryQuantifierAccuracyEstimator,
|
||||
MultiClassAccuracyEstimator,
|
||||
)
|
||||
|
||||
|
||||
class GridSearchAE(BaseAccuracyEstimator):
|
||||
def __init__(
|
||||
self,
|
||||
model: BaseAccuracyEstimator,
|
||||
param_grid: dict,
|
||||
protocol: AbstractProtocol,
|
||||
error: Union[Callable, str] = qc.error.maccd,
|
||||
refit=True,
|
||||
# timeout=-1,
|
||||
# n_jobs=None,
|
||||
verbose=False,
|
||||
):
|
||||
self.model = model
|
||||
self.param_grid = self.__normalize_params(param_grid)
|
||||
self.protocol = protocol
|
||||
self.refit = refit
|
||||
# self.timeout = timeout
|
||||
# self.n_jobs = qp._get_njobs(n_jobs)
|
||||
self.verbose = verbose
|
||||
self.__check_error(error)
|
||||
assert isinstance(protocol, AbstractProtocol), "unknown protocol"
|
||||
|
||||
def _sout(self, msg):
|
||||
if self.verbose:
|
||||
print(f"[{self.__class__.__name__}]: {msg}")
|
||||
|
||||
def __normalize_params(self, params):
|
||||
__remap = {}
|
||||
for key in params.keys():
|
||||
k, delim, sub_key = key.partition("__")
|
||||
if delim and k == "q":
|
||||
__remap[key] = f"quantifier__{sub_key}"
|
||||
|
||||
return {(__remap[k] if k in __remap else k): v for k, v in params.items()}
|
||||
|
||||
def __check_error(self, error):
|
||||
if error in qc.error.ACCURACY_ERROR:
|
||||
self.error = error
|
||||
elif isinstance(error, str):
|
||||
self.error = qc.error.from_name(error)
|
||||
elif hasattr(error, "__call__"):
|
||||
self.error = error
|
||||
else:
|
||||
raise ValueError(
|
||||
f"unexpected error type; must either be a callable function or a str representing\n"
|
||||
f"the name of an error function in {qc.error.ACCURACY_ERROR_NAMES}"
|
||||
)
|
||||
|
||||
def fit(self, training: LabelledCollection):
|
||||
"""Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
|
||||
the error metric.
|
||||
|
||||
:param training: the training set on which to optimize the hyperparameters
|
||||
:return: self
|
||||
"""
|
||||
params_keys = list(self.param_grid.keys())
|
||||
params_values = list(self.param_grid.values())
|
||||
|
||||
protocol = self.protocol
|
||||
|
||||
self.param_scores_ = {}
|
||||
self.best_score_ = None
|
||||
|
||||
tinit = time()
|
||||
|
||||
hyper = [
|
||||
dict(zip(params_keys, val)) for val in itertools.product(*params_values)
|
||||
]
|
||||
|
||||
# self._sout(f"starting model selection with {self.n_jobs =}")
|
||||
self._sout("starting model selection")
|
||||
|
||||
scores = [self.__params_eval(params, training) for params in hyper]
|
||||
|
||||
for params, score, model in scores:
|
||||
if score is not None:
|
||||
if self.best_score_ is None or score < self.best_score_:
|
||||
self.best_score_ = score
|
||||
self.best_params_ = params
|
||||
self.best_model_ = model
|
||||
self.param_scores_[str(params)] = score
|
||||
else:
|
||||
self.param_scores_[str(params)] = "timeout"
|
||||
|
||||
tend = time() - tinit
|
||||
|
||||
if self.best_score_ is None:
|
||||
raise TimeoutError("no combination of hyperparameters seem to work")
|
||||
|
||||
self._sout(
|
||||
f"optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) "
|
||||
f"[took {tend:.4f}s]"
|
||||
)
|
||||
log = SubLogger.logger()
|
||||
log.debug(
|
||||
f"[{self.model.__class__.__name__}] "
|
||||
f"optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) "
|
||||
f"[took {tend:.4f}s]"
|
||||
)
|
||||
|
||||
if self.refit:
|
||||
if isinstance(protocol, OnLabelledCollectionProtocol):
|
||||
self._sout("refitting on the whole development set")
|
||||
self.best_model_.fit(training + protocol.get_labelled_collection())
|
||||
else:
|
||||
raise RuntimeWarning(
|
||||
f'"refit" was requested, but the protocol does not '
|
||||
f"implement the {OnLabelledCollectionProtocol.__name__} interface"
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
def __params_eval(self, params, training):
|
||||
protocol = self.protocol
|
||||
error = self.error
|
||||
|
||||
# if self.timeout > 0:
|
||||
|
||||
# def handler(signum, frame):
|
||||
# raise TimeoutError()
|
||||
|
||||
# signal.signal(signal.SIGALRM, handler)
|
||||
|
||||
tinit = time()
|
||||
|
||||
# if self.timeout > 0:
|
||||
# signal.alarm(self.timeout)
|
||||
|
||||
try:
|
||||
model = deepcopy(self.model)
|
||||
# overrides default parameters with the parameters being explored at this iteration
|
||||
model.set_params(**params)
|
||||
# print({k: v for k, v in model.get_params().items() if k in params})
|
||||
model.fit(training)
|
||||
score = evaluate(model, protocol=protocol, error_metric=error)
|
||||
|
||||
ttime = time() - tinit
|
||||
self._sout(
|
||||
f"hyperparams={params}\t got score {score:.5f} [took {ttime:.4f}s]"
|
||||
)
|
||||
|
||||
# if self.timeout > 0:
|
||||
# signal.alarm(0)
|
||||
# except TimeoutError:
|
||||
# self._sout(f"timeout ({self.timeout}s) reached for config {params}")
|
||||
# score = None
|
||||
except ValueError as e:
|
||||
self._sout(f"the combination of hyperparameters {params} is invalid")
|
||||
raise e
|
||||
except Exception as e:
|
||||
self._sout(f"something went wrong for config {params}; skipping:")
|
||||
self._sout(f"\tException: {e}")
|
||||
score = None
|
||||
|
||||
return params, score, model
|
||||
|
||||
def extend(self, coll: LabelledCollection, pred_proba=None) -> ExtendedCollection:
|
||||
assert hasattr(self, "best_model_"), "quantify called before fit"
|
||||
return self.best_model().extend(coll, pred_proba=pred_proba)
|
||||
|
||||
def estimate(self, instances, ext=False):
|
||||
"""Estimate class prevalence values using the best model found after calling the :meth:`fit` method.
|
||||
|
||||
:param instances: sample contanining the instances
|
||||
:return: a ndarray of shape `(n_classes)` with class prevalence estimates as according to the best model found
|
||||
by the model selection process.
|
||||
"""
|
||||
|
||||
assert hasattr(self, "best_model_"), "estimate called before fit"
|
||||
return self.best_model().estimate(instances, ext=ext)
|
||||
|
||||
def set_params(self, **parameters):
|
||||
"""Sets the hyper-parameters to explore.
|
||||
|
||||
:param parameters: a dictionary with keys the parameter names and values the list of values to explore
|
||||
"""
|
||||
self.param_grid = parameters
|
||||
|
||||
def get_params(self, deep=True):
|
||||
"""Returns the dictionary of hyper-parameters to explore (`param_grid`)
|
||||
|
||||
:param deep: Unused
|
||||
:return: the dictionary `param_grid`
|
||||
"""
|
||||
return self.param_grid
|
||||
|
||||
def best_model(self):
|
||||
"""
|
||||
Returns the best model found after calling the :meth:`fit` method, i.e., the one trained on the combination
|
||||
of hyper-parameters that minimized the error function.
|
||||
|
||||
:return: a trained quantifier
|
||||
"""
|
||||
if hasattr(self, "best_model_"):
|
||||
return self.best_model_
|
||||
raise ValueError("best_model called before fit")
|
||||
|
||||
|
||||
|
||||
class MCAEgsq(MultiClassAccuracyEstimator):
|
||||
def __init__(
|
||||
self,
|
||||
classifier: BaseEstimator,
|
||||
quantifier: BaseAccuracyEstimator,
|
||||
param_grid: dict,
|
||||
error: Union[Callable, str] = qp.error.mae,
|
||||
refit=True,
|
||||
timeout=-1,
|
||||
n_jobs=None,
|
||||
verbose=False,
|
||||
):
|
||||
self.param_grid = param_grid
|
||||
self.refit = refit
|
||||
self.timeout = timeout
|
||||
self.n_jobs = n_jobs
|
||||
self.verbose = verbose
|
||||
self.error = error
|
||||
super().__init__(classifier, quantifier)
|
||||
|
||||
def fit(self, train: LabelledCollection):
|
||||
self.e_train = self.extend(train)
|
||||
t_train, t_val = self.e_train.split_stratified(0.6, random_state=0)
|
||||
self.quantifier = GridSearchQ(
|
||||
deepcopy(self.quantifier),
|
||||
param_grid=self.param_grid,
|
||||
protocol=UPP(t_val, repeats=100),
|
||||
error=self.error,
|
||||
refit=self.refit,
|
||||
timeout=self.timeout,
|
||||
n_jobs=self.n_jobs,
|
||||
verbose=self.verbose,
|
||||
).fit(self.e_train)
|
||||
|
||||
return self
|
||||
|
||||
def estimate(self, instances, ext=False) -> np.ndarray:
|
||||
e_inst = instances if ext else self._extend_instances(instances)
|
||||
estim_prev = self.quantifier.quantify(e_inst)
|
||||
return self._check_prevalence_classes(estim_prev, self.quantifier.best_model().classes_)
|
||||
|
||||
|
||||
class BQAEgsq(BinaryQuantifierAccuracyEstimator):
|
||||
def __init__(
|
||||
self,
|
||||
classifier: BaseEstimator,
|
||||
quantifier: BaseAccuracyEstimator,
|
||||
param_grid: dict,
|
||||
error: Union[Callable, str] = qp.error.mae,
|
||||
refit=True,
|
||||
timeout=-1,
|
||||
n_jobs=None,
|
||||
verbose=False,
|
||||
):
|
||||
self.param_grid = param_grid
|
||||
self.refit = refit
|
||||
self.timeout = timeout
|
||||
self.n_jobs = n_jobs
|
||||
self.verbose = verbose
|
||||
self.error = error
|
||||
super().__init__(classifier=classifier, quantifier=quantifier)
|
||||
|
||||
def fit(self, train: LabelledCollection):
|
||||
self.e_train = self.extend(train)
|
||||
|
||||
self.n_classes = self.e_train.n_classes
|
||||
self.e_trains = self.e_train.split_by_pred()
|
||||
|
||||
self.quantifiers = []
|
||||
for e_train in self.e_trains:
|
||||
t_train, t_val = e_train.split_stratified(0.6, random_state=0)
|
||||
quantifier = GridSearchQ(
|
||||
model=deepcopy(self.quantifier),
|
||||
param_grid=self.param_grid,
|
||||
protocol=UPP(t_val, repeats=100),
|
||||
error=self.error,
|
||||
refit=self.refit,
|
||||
timeout=self.timeout,
|
||||
n_jobs=self.n_jobs,
|
||||
verbose=self.verbose,
|
||||
).fit(t_train)
|
||||
self.quantifiers.append(quantifier)
|
||||
|
||||
return self
|
||||
import itertools
|
||||
from copy import deepcopy
|
||||
from time import time
|
||||
from typing import Callable, Union
|
||||
import numpy as np
|
||||
|
||||
import quapy as qp
|
||||
from quapy.data import LabelledCollection
|
||||
from quapy.model_selection import GridSearchQ
|
||||
from quapy.protocol import UPP, AbstractProtocol, OnLabelledCollectionProtocol
|
||||
from sklearn.base import BaseEstimator
|
||||
|
||||
import quacc as qc
|
||||
import quacc.error
|
||||
from quacc.data import ExtendedCollection
|
||||
from quacc.evaluation import evaluate
|
||||
from quacc.logger import SubLogger
|
||||
from quacc.method.base import (
|
||||
BaseAccuracyEstimator,
|
||||
BinaryQuantifierAccuracyEstimator,
|
||||
MultiClassAccuracyEstimator,
|
||||
)
|
||||
|
||||
|
||||
class GridSearchAE(BaseAccuracyEstimator):
|
||||
def __init__(
|
||||
self,
|
||||
model: BaseAccuracyEstimator,
|
||||
param_grid: dict,
|
||||
protocol: AbstractProtocol,
|
||||
error: Union[Callable, str] = qc.error.maccd,
|
||||
refit=True,
|
||||
# timeout=-1,
|
||||
# n_jobs=None,
|
||||
verbose=False,
|
||||
):
|
||||
self.model = model
|
||||
self.param_grid = self.__normalize_params(param_grid)
|
||||
self.protocol = protocol
|
||||
self.refit = refit
|
||||
# self.timeout = timeout
|
||||
# self.n_jobs = qp._get_njobs(n_jobs)
|
||||
self.verbose = verbose
|
||||
self.__check_error(error)
|
||||
assert isinstance(protocol, AbstractProtocol), "unknown protocol"
|
||||
|
||||
def _sout(self, msg):
|
||||
if self.verbose:
|
||||
print(f"[{self.__class__.__name__}]: {msg}")
|
||||
|
||||
def __normalize_params(self, params):
|
||||
__remap = {}
|
||||
for key in params.keys():
|
||||
k, delim, sub_key = key.partition("__")
|
||||
if delim and k == "q":
|
||||
__remap[key] = f"quantifier__{sub_key}"
|
||||
|
||||
return {(__remap[k] if k in __remap else k): v for k, v in params.items()}
|
||||
|
||||
def __check_error(self, error):
|
||||
if error in qc.error.ACCURACY_ERROR:
|
||||
self.error = error
|
||||
elif isinstance(error, str):
|
||||
self.error = qc.error.from_name(error)
|
||||
elif hasattr(error, "__call__"):
|
||||
self.error = error
|
||||
else:
|
||||
raise ValueError(
|
||||
f"unexpected error type; must either be a callable function or a str representing\n"
|
||||
f"the name of an error function in {qc.error.ACCURACY_ERROR_NAMES}"
|
||||
)
|
||||
|
||||
def fit(self, training: LabelledCollection):
|
||||
"""Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing
|
||||
the error metric.
|
||||
|
||||
:param training: the training set on which to optimize the hyperparameters
|
||||
:return: self
|
||||
"""
|
||||
params_keys = list(self.param_grid.keys())
|
||||
params_values = list(self.param_grid.values())
|
||||
|
||||
protocol = self.protocol
|
||||
|
||||
self.param_scores_ = {}
|
||||
self.best_score_ = None
|
||||
|
||||
tinit = time()
|
||||
|
||||
hyper = [
|
||||
dict(zip(params_keys, val)) for val in itertools.product(*params_values)
|
||||
]
|
||||
|
||||
# self._sout(f"starting model selection with {self.n_jobs =}")
|
||||
self._sout("starting model selection")
|
||||
|
||||
scores = [self.__params_eval(params, training) for params in hyper]
|
||||
|
||||
for params, score, model in scores:
|
||||
if score is not None:
|
||||
if self.best_score_ is None or score < self.best_score_:
|
||||
self.best_score_ = score
|
||||
self.best_params_ = params
|
||||
self.best_model_ = model
|
||||
self.param_scores_[str(params)] = score
|
||||
else:
|
||||
self.param_scores_[str(params)] = "timeout"
|
||||
|
||||
tend = time() - tinit
|
||||
|
||||
if self.best_score_ is None:
|
||||
raise TimeoutError("no combination of hyperparameters seem to work")
|
||||
|
||||
self._sout(
|
||||
f"optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) "
|
||||
f"[took {tend:.4f}s]"
|
||||
)
|
||||
log = SubLogger.logger()
|
||||
log.debug(
|
||||
f"[{self.model.__class__.__name__}] "
|
||||
f"optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) "
|
||||
f"[took {tend:.4f}s]"
|
||||
)
|
||||
|
||||
if self.refit:
|
||||
if isinstance(protocol, OnLabelledCollectionProtocol):
|
||||
self._sout("refitting on the whole development set")
|
||||
self.best_model_.fit(training + protocol.get_labelled_collection())
|
||||
else:
|
||||
raise RuntimeWarning(
|
||||
f'"refit" was requested, but the protocol does not '
|
||||
f"implement the {OnLabelledCollectionProtocol.__name__} interface"
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
def __params_eval(self, params, training):
|
||||
protocol = self.protocol
|
||||
error = self.error
|
||||
|
||||
# if self.timeout > 0:
|
||||
|
||||
# def handler(signum, frame):
|
||||
# raise TimeoutError()
|
||||
|
||||
# signal.signal(signal.SIGALRM, handler)
|
||||
|
||||
tinit = time()
|
||||
|
||||
# if self.timeout > 0:
|
||||
# signal.alarm(self.timeout)
|
||||
|
||||
try:
|
||||
model = deepcopy(self.model)
|
||||
# overrides default parameters with the parameters being explored at this iteration
|
||||
model.set_params(**params)
|
||||
# print({k: v for k, v in model.get_params().items() if k in params})
|
||||
model.fit(training)
|
||||
score = evaluate(model, protocol=protocol, error_metric=error)
|
||||
|
||||
ttime = time() - tinit
|
||||
self._sout(
|
||||
f"hyperparams={params}\t got score {score:.5f} [took {ttime:.4f}s]"
|
||||
)
|
||||
|
||||
# if self.timeout > 0:
|
||||
# signal.alarm(0)
|
||||
# except TimeoutError:
|
||||
# self._sout(f"timeout ({self.timeout}s) reached for config {params}")
|
||||
# score = None
|
||||
except ValueError as e:
|
||||
self._sout(f"the combination of hyperparameters {params} is invalid")
|
||||
raise e
|
||||
except Exception as e:
|
||||
self._sout(f"something went wrong for config {params}; skipping:")
|
||||
self._sout(f"\tException: {e}")
|
||||
score = None
|
||||
|
||||
return params, score, model
|
||||
|
||||
def extend(self, coll: LabelledCollection, pred_proba=None) -> ExtendedCollection:
|
||||
assert hasattr(self, "best_model_"), "quantify called before fit"
|
||||
return self.best_model().extend(coll, pred_proba=pred_proba)
|
||||
|
||||
def estimate(self, instances, ext=False):
|
||||
"""Estimate class prevalence values using the best model found after calling the :meth:`fit` method.
|
||||
|
||||
:param instances: sample contanining the instances
|
||||
:return: a ndarray of shape `(n_classes)` with class prevalence estimates as according to the best model found
|
||||
by the model selection process.
|
||||
"""
|
||||
|
||||
assert hasattr(self, "best_model_"), "estimate called before fit"
|
||||
return self.best_model().estimate(instances, ext=ext)
|
||||
|
||||
def set_params(self, **parameters):
|
||||
"""Sets the hyper-parameters to explore.
|
||||
|
||||
:param parameters: a dictionary with keys the parameter names and values the list of values to explore
|
||||
"""
|
||||
self.param_grid = parameters
|
||||
|
||||
def get_params(self, deep=True):
|
||||
"""Returns the dictionary of hyper-parameters to explore (`param_grid`)
|
||||
|
||||
:param deep: Unused
|
||||
:return: the dictionary `param_grid`
|
||||
"""
|
||||
return self.param_grid
|
||||
|
||||
def best_model(self):
|
||||
"""
|
||||
Returns the best model found after calling the :meth:`fit` method, i.e., the one trained on the combination
|
||||
of hyper-parameters that minimized the error function.
|
||||
|
||||
:return: a trained quantifier
|
||||
"""
|
||||
if hasattr(self, "best_model_"):
|
||||
return self.best_model_
|
||||
raise ValueError("best_model called before fit")
|
||||
|
||||
|
||||
|
||||
class MCAEgsq(MultiClassAccuracyEstimator):
|
||||
def __init__(
|
||||
self,
|
||||
classifier: BaseEstimator,
|
||||
quantifier: BaseAccuracyEstimator,
|
||||
param_grid: dict,
|
||||
error: Union[Callable, str] = qp.error.mae,
|
||||
refit=True,
|
||||
timeout=-1,
|
||||
n_jobs=None,
|
||||
verbose=False,
|
||||
):
|
||||
self.param_grid = param_grid
|
||||
self.refit = refit
|
||||
self.timeout = timeout
|
||||
self.n_jobs = n_jobs
|
||||
self.verbose = verbose
|
||||
self.error = error
|
||||
super().__init__(classifier, quantifier)
|
||||
|
||||
def fit(self, train: LabelledCollection):
|
||||
self.e_train = self.extend(train)
|
||||
t_train, t_val = self.e_train.split_stratified(0.6, random_state=0)
|
||||
self.quantifier = GridSearchQ(
|
||||
deepcopy(self.quantifier),
|
||||
param_grid=self.param_grid,
|
||||
protocol=UPP(t_val, repeats=100),
|
||||
error=self.error,
|
||||
refit=self.refit,
|
||||
timeout=self.timeout,
|
||||
n_jobs=self.n_jobs,
|
||||
verbose=self.verbose,
|
||||
).fit(self.e_train)
|
||||
|
||||
return self
|
||||
|
||||
def estimate(self, instances, ext=False) -> np.ndarray:
|
||||
e_inst = instances if ext else self._extend_instances(instances)
|
||||
estim_prev = self.quantifier.quantify(e_inst)
|
||||
return self._check_prevalence_classes(estim_prev, self.quantifier.best_model().classes_)
|
||||
|
||||
|
||||
class BQAEgsq(BinaryQuantifierAccuracyEstimator):
|
||||
def __init__(
|
||||
self,
|
||||
classifier: BaseEstimator,
|
||||
quantifier: BaseAccuracyEstimator,
|
||||
param_grid: dict,
|
||||
error: Union[Callable, str] = qp.error.mae,
|
||||
refit=True,
|
||||
timeout=-1,
|
||||
n_jobs=None,
|
||||
verbose=False,
|
||||
):
|
||||
self.param_grid = param_grid
|
||||
self.refit = refit
|
||||
self.timeout = timeout
|
||||
self.n_jobs = n_jobs
|
||||
self.verbose = verbose
|
||||
self.error = error
|
||||
super().__init__(classifier=classifier, quantifier=quantifier)
|
||||
|
||||
def fit(self, train: LabelledCollection):
|
||||
self.e_train = self.extend(train)
|
||||
|
||||
self.n_classes = self.e_train.n_classes
|
||||
self.e_trains = self.e_train.split_by_pred()
|
||||
|
||||
self.quantifiers = []
|
||||
for e_train in self.e_trains:
|
||||
t_train, t_val = e_train.split_stratified(0.6, random_state=0)
|
||||
quantifier = GridSearchQ(
|
||||
model=deepcopy(self.quantifier),
|
||||
param_grid=self.param_grid,
|
||||
protocol=UPP(t_val, repeats=100),
|
||||
error=self.error,
|
||||
refit=self.refit,
|
||||
timeout=self.timeout,
|
||||
n_jobs=self.n_jobs,
|
||||
verbose=self.verbose,
|
||||
).fit(t_train)
|
||||
self.quantifiers.append(quantifier)
|
||||
|
||||
return self
|
||||
|
|
478
quacc/plot.py
478
quacc/plot.py
|
@ -1,239 +1,239 @@
|
|||
from pathlib import Path
|
||||
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from cycler import cycler
|
||||
|
||||
from quacc.environment import env
|
||||
|
||||
matplotlib.use("agg")
|
||||
|
||||
|
||||
def _get_markers(n: int):
|
||||
ls = "ovx+sDph*^1234X><.Pd"
|
||||
if n > len(ls):
|
||||
ls = ls * (n / len(ls) + 1)
|
||||
return list(ls)[:n]
|
||||
|
||||
|
||||
def plot_delta(
|
||||
base_prevs,
|
||||
columns,
|
||||
data,
|
||||
*,
|
||||
stdevs=None,
|
||||
pos_class=1,
|
||||
metric="acc",
|
||||
name="default",
|
||||
train_prev=None,
|
||||
legend=True,
|
||||
avg=None,
|
||||
) -> Path:
|
||||
_base_title = "delta_stdev" if stdevs is not None else "delta"
|
||||
if train_prev is not None:
|
||||
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
||||
title = f"{_base_title}_{name}_{t_prev_pos}_{metric}"
|
||||
else:
|
||||
title = f"{_base_title}_{name}_avg_{avg}_{metric}"
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
ax.set_aspect("auto")
|
||||
ax.grid()
|
||||
|
||||
NUM_COLORS = len(data)
|
||||
cm = plt.get_cmap("tab10")
|
||||
if NUM_COLORS > 10:
|
||||
cm = plt.get_cmap("tab20")
|
||||
cy = cycler(color=[cm(i) for i in range(NUM_COLORS)])
|
||||
|
||||
base_prevs = base_prevs[:, pos_class]
|
||||
for method, deltas, _cy in zip(columns, data, cy):
|
||||
ax.plot(
|
||||
base_prevs,
|
||||
deltas,
|
||||
label=method,
|
||||
color=_cy["color"],
|
||||
linestyle="-",
|
||||
marker="o",
|
||||
markersize=3,
|
||||
zorder=2,
|
||||
)
|
||||
if stdevs is not None:
|
||||
_col_idx = np.where(columns == method)[0]
|
||||
stdev = stdevs[_col_idx].flatten()
|
||||
nn_idx = np.intersect1d(
|
||||
np.where(deltas != np.nan)[0],
|
||||
np.where(stdev != np.nan)[0],
|
||||
)
|
||||
_bps, _ds, _st = base_prevs[nn_idx], deltas[nn_idx], stdev[nn_idx]
|
||||
ax.fill_between(
|
||||
_bps,
|
||||
_ds - _st,
|
||||
_ds + _st,
|
||||
color=_cy["color"],
|
||||
alpha=0.25,
|
||||
)
|
||||
|
||||
x_label = "test" if avg is None or avg == "train" else "train"
|
||||
ax.set(
|
||||
xlabel=f"{x_label} prevalence",
|
||||
ylabel=metric,
|
||||
title=title,
|
||||
)
|
||||
|
||||
if legend:
|
||||
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
||||
output_path = env.PLOT_OUT_DIR / f"{title}.png"
|
||||
fig.savefig(output_path, bbox_inches="tight")
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
def plot_diagonal(
|
||||
reference,
|
||||
columns,
|
||||
data,
|
||||
*,
|
||||
pos_class=1,
|
||||
metric="acc",
|
||||
name="default",
|
||||
train_prev=None,
|
||||
legend=True,
|
||||
):
|
||||
if train_prev is not None:
|
||||
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
||||
title = f"diagonal_{name}_{t_prev_pos}_{metric}"
|
||||
else:
|
||||
title = f"diagonal_{name}_{metric}"
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
ax.set_aspect("auto")
|
||||
ax.grid()
|
||||
ax.set_aspect("equal")
|
||||
|
||||
NUM_COLORS = len(data)
|
||||
cm = plt.get_cmap("tab10")
|
||||
if NUM_COLORS > 10:
|
||||
cm = plt.get_cmap("tab20")
|
||||
cy = cycler(
|
||||
color=[cm(i) for i in range(NUM_COLORS)],
|
||||
marker=_get_markers(NUM_COLORS),
|
||||
)
|
||||
|
||||
reference = np.array(reference)
|
||||
x_ticks = np.unique(reference)
|
||||
x_ticks.sort()
|
||||
|
||||
for deltas, _cy in zip(data, cy):
|
||||
ax.plot(
|
||||
reference,
|
||||
deltas,
|
||||
color=_cy["color"],
|
||||
linestyle="None",
|
||||
marker=_cy["marker"],
|
||||
markersize=3,
|
||||
zorder=2,
|
||||
alpha=0.25,
|
||||
)
|
||||
|
||||
# ensure limits are equal for both axes
|
||||
_alims = np.stack(((ax.get_xlim(), ax.get_ylim())), axis=-1)
|
||||
_lims = np.array([f(ls) for f, ls in zip([np.min, np.max], _alims)])
|
||||
ax.set(xlim=tuple(_lims), ylim=tuple(_lims))
|
||||
|
||||
for method, deltas, _cy in zip(columns, data, cy):
|
||||
slope, interc = np.polyfit(reference, deltas, 1)
|
||||
y_lr = np.array([slope * x + interc for x in _lims])
|
||||
ax.plot(
|
||||
_lims,
|
||||
y_lr,
|
||||
label=method,
|
||||
color=_cy["color"],
|
||||
linestyle="-",
|
||||
markersize="0",
|
||||
zorder=1,
|
||||
)
|
||||
|
||||
# plot reference line
|
||||
ax.plot(
|
||||
_lims,
|
||||
_lims,
|
||||
color="black",
|
||||
linestyle="--",
|
||||
markersize=0,
|
||||
zorder=1,
|
||||
)
|
||||
|
||||
ax.set(xlabel=f"true {metric}", ylabel=f"estim. {metric}", title=title)
|
||||
|
||||
if legend:
|
||||
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
||||
output_path = env.PLOT_OUT_DIR / f"{title}.png"
|
||||
fig.savefig(output_path, bbox_inches="tight")
|
||||
return output_path
|
||||
|
||||
|
||||
def plot_shift(
|
||||
shift_prevs,
|
||||
columns,
|
||||
data,
|
||||
*,
|
||||
counts=None,
|
||||
pos_class=1,
|
||||
metric="acc",
|
||||
name="default",
|
||||
train_prev=None,
|
||||
legend=True,
|
||||
) -> Path:
|
||||
if train_prev is not None:
|
||||
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
||||
title = f"shift_{name}_{t_prev_pos}_{metric}"
|
||||
else:
|
||||
title = f"shift_{name}_avg_{metric}"
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
ax.set_aspect("auto")
|
||||
ax.grid()
|
||||
|
||||
NUM_COLORS = len(data)
|
||||
cm = plt.get_cmap("tab10")
|
||||
if NUM_COLORS > 10:
|
||||
cm = plt.get_cmap("tab20")
|
||||
cy = cycler(color=[cm(i) for i in range(NUM_COLORS)])
|
||||
|
||||
shift_prevs = shift_prevs[:, pos_class]
|
||||
for method, shifts, _cy in zip(columns, data, cy):
|
||||
ax.plot(
|
||||
shift_prevs,
|
||||
shifts,
|
||||
label=method,
|
||||
color=_cy["color"],
|
||||
linestyle="-",
|
||||
marker="o",
|
||||
markersize=3,
|
||||
zorder=2,
|
||||
)
|
||||
if counts is not None:
|
||||
_col_idx = np.where(columns == method)[0]
|
||||
count = counts[_col_idx].flatten()
|
||||
for prev, shift, cnt in zip(shift_prevs, shifts, count):
|
||||
label = f"{cnt}"
|
||||
plt.annotate(
|
||||
label,
|
||||
(prev, shift),
|
||||
textcoords="offset points",
|
||||
xytext=(0, 10),
|
||||
ha="center",
|
||||
color=_cy["color"],
|
||||
fontsize=12.0,
|
||||
)
|
||||
|
||||
ax.set(xlabel="dataset shift", ylabel=metric, title=title)
|
||||
|
||||
if legend:
|
||||
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
||||
output_path = env.PLOT_OUT_DIR / f"{title}.png"
|
||||
fig.savefig(output_path, bbox_inches="tight")
|
||||
|
||||
return output_path
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from cycler import cycler
|
||||
|
||||
from quacc.environment import env
|
||||
|
||||
matplotlib.use("agg")
|
||||
|
||||
|
||||
def _get_markers(n: int):
|
||||
ls = "ovx+sDph*^1234X><.Pd"
|
||||
if n > len(ls):
|
||||
ls = ls * (n / len(ls) + 1)
|
||||
return list(ls)[:n]
|
||||
|
||||
|
||||
def plot_delta(
|
||||
base_prevs,
|
||||
columns,
|
||||
data,
|
||||
*,
|
||||
stdevs=None,
|
||||
pos_class=1,
|
||||
metric="acc",
|
||||
name="default",
|
||||
train_prev=None,
|
||||
legend=True,
|
||||
avg=None,
|
||||
) -> Path:
|
||||
_base_title = "delta_stdev" if stdevs is not None else "delta"
|
||||
if train_prev is not None:
|
||||
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
||||
title = f"{_base_title}_{name}_{t_prev_pos}_{metric}"
|
||||
else:
|
||||
title = f"{_base_title}_{name}_avg_{avg}_{metric}"
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
ax.set_aspect("auto")
|
||||
ax.grid()
|
||||
|
||||
NUM_COLORS = len(data)
|
||||
cm = plt.get_cmap("tab10")
|
||||
if NUM_COLORS > 10:
|
||||
cm = plt.get_cmap("tab20")
|
||||
cy = cycler(color=[cm(i) for i in range(NUM_COLORS)])
|
||||
|
||||
base_prevs = base_prevs[:, pos_class]
|
||||
for method, deltas, _cy in zip(columns, data, cy):
|
||||
ax.plot(
|
||||
base_prevs,
|
||||
deltas,
|
||||
label=method,
|
||||
color=_cy["color"],
|
||||
linestyle="-",
|
||||
marker="o",
|
||||
markersize=3,
|
||||
zorder=2,
|
||||
)
|
||||
if stdevs is not None:
|
||||
_col_idx = np.where(columns == method)[0]
|
||||
stdev = stdevs[_col_idx].flatten()
|
||||
nn_idx = np.intersect1d(
|
||||
np.where(deltas != np.nan)[0],
|
||||
np.where(stdev != np.nan)[0],
|
||||
)
|
||||
_bps, _ds, _st = base_prevs[nn_idx], deltas[nn_idx], stdev[nn_idx]
|
||||
ax.fill_between(
|
||||
_bps,
|
||||
_ds - _st,
|
||||
_ds + _st,
|
||||
color=_cy["color"],
|
||||
alpha=0.25,
|
||||
)
|
||||
|
||||
x_label = "test" if avg is None or avg == "train" else "train"
|
||||
ax.set(
|
||||
xlabel=f"{x_label} prevalence",
|
||||
ylabel=metric,
|
||||
title=title,
|
||||
)
|
||||
|
||||
if legend:
|
||||
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
||||
output_path = env.PLOT_OUT_DIR / f"{title}.png"
|
||||
fig.savefig(output_path, bbox_inches="tight")
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
def plot_diagonal(
|
||||
reference,
|
||||
columns,
|
||||
data,
|
||||
*,
|
||||
pos_class=1,
|
||||
metric="acc",
|
||||
name="default",
|
||||
train_prev=None,
|
||||
legend=True,
|
||||
):
|
||||
if train_prev is not None:
|
||||
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
||||
title = f"diagonal_{name}_{t_prev_pos}_{metric}"
|
||||
else:
|
||||
title = f"diagonal_{name}_{metric}"
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
ax.set_aspect("auto")
|
||||
ax.grid()
|
||||
ax.set_aspect("equal")
|
||||
|
||||
NUM_COLORS = len(data)
|
||||
cm = plt.get_cmap("tab10")
|
||||
if NUM_COLORS > 10:
|
||||
cm = plt.get_cmap("tab20")
|
||||
cy = cycler(
|
||||
color=[cm(i) for i in range(NUM_COLORS)],
|
||||
marker=_get_markers(NUM_COLORS),
|
||||
)
|
||||
|
||||
reference = np.array(reference)
|
||||
x_ticks = np.unique(reference)
|
||||
x_ticks.sort()
|
||||
|
||||
for deltas, _cy in zip(data, cy):
|
||||
ax.plot(
|
||||
reference,
|
||||
deltas,
|
||||
color=_cy["color"],
|
||||
linestyle="None",
|
||||
marker=_cy["marker"],
|
||||
markersize=3,
|
||||
zorder=2,
|
||||
alpha=0.25,
|
||||
)
|
||||
|
||||
# ensure limits are equal for both axes
|
||||
_alims = np.stack(((ax.get_xlim(), ax.get_ylim())), axis=-1)
|
||||
_lims = np.array([f(ls) for f, ls in zip([np.min, np.max], _alims)])
|
||||
ax.set(xlim=tuple(_lims), ylim=tuple(_lims))
|
||||
|
||||
for method, deltas, _cy in zip(columns, data, cy):
|
||||
slope, interc = np.polyfit(reference, deltas, 1)
|
||||
y_lr = np.array([slope * x + interc for x in _lims])
|
||||
ax.plot(
|
||||
_lims,
|
||||
y_lr,
|
||||
label=method,
|
||||
color=_cy["color"],
|
||||
linestyle="-",
|
||||
markersize="0",
|
||||
zorder=1,
|
||||
)
|
||||
|
||||
# plot reference line
|
||||
ax.plot(
|
||||
_lims,
|
||||
_lims,
|
||||
color="black",
|
||||
linestyle="--",
|
||||
markersize=0,
|
||||
zorder=1,
|
||||
)
|
||||
|
||||
ax.set(xlabel=f"true {metric}", ylabel=f"estim. {metric}", title=title)
|
||||
|
||||
if legend:
|
||||
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
||||
output_path = env.PLOT_OUT_DIR / f"{title}.png"
|
||||
fig.savefig(output_path, bbox_inches="tight")
|
||||
return output_path
|
||||
|
||||
|
||||
def plot_shift(
|
||||
shift_prevs,
|
||||
columns,
|
||||
data,
|
||||
*,
|
||||
counts=None,
|
||||
pos_class=1,
|
||||
metric="acc",
|
||||
name="default",
|
||||
train_prev=None,
|
||||
legend=True,
|
||||
) -> Path:
|
||||
if train_prev is not None:
|
||||
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
||||
title = f"shift_{name}_{t_prev_pos}_{metric}"
|
||||
else:
|
||||
title = f"shift_{name}_avg_{metric}"
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
ax.set_aspect("auto")
|
||||
ax.grid()
|
||||
|
||||
NUM_COLORS = len(data)
|
||||
cm = plt.get_cmap("tab10")
|
||||
if NUM_COLORS > 10:
|
||||
cm = plt.get_cmap("tab20")
|
||||
cy = cycler(color=[cm(i) for i in range(NUM_COLORS)])
|
||||
|
||||
shift_prevs = shift_prevs[:, pos_class]
|
||||
for method, shifts, _cy in zip(columns, data, cy):
|
||||
ax.plot(
|
||||
shift_prevs,
|
||||
shifts,
|
||||
label=method,
|
||||
color=_cy["color"],
|
||||
linestyle="-",
|
||||
marker="o",
|
||||
markersize=3,
|
||||
zorder=2,
|
||||
)
|
||||
if counts is not None:
|
||||
_col_idx = np.where(columns == method)[0]
|
||||
count = counts[_col_idx].flatten()
|
||||
for prev, shift, cnt in zip(shift_prevs, shifts, count):
|
||||
label = f"{cnt}"
|
||||
plt.annotate(
|
||||
label,
|
||||
(prev, shift),
|
||||
textcoords="offset points",
|
||||
xytext=(0, 10),
|
||||
ha="center",
|
||||
color=_cy["color"],
|
||||
fontsize=12.0,
|
||||
)
|
||||
|
||||
ax.set(xlabel="dataset shift", ylabel=metric, title=title)
|
||||
|
||||
if legend:
|
||||
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
||||
output_path = env.PLOT_OUT_DIR / f"{title}.png"
|
||||
fig.savefig(output_path, bbox_inches="tight")
|
||||
|
||||
return output_path
|
||||
|
|
118
quacc/utils.py
118
quacc/utils.py
|
@ -1,59 +1,59 @@
|
|||
import functools
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from quacc.environment import env
|
||||
|
||||
|
||||
def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame:
|
||||
if len(dfs) < 1:
|
||||
raise ValueError
|
||||
if len(dfs) == 1:
|
||||
return dfs[0]
|
||||
df = dfs[0]
|
||||
for ndf in dfs[1:]:
|
||||
df = df.join(ndf.set_index(df_index), on=df_index)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def avg_group_report(df: pd.DataFrame) -> pd.DataFrame:
|
||||
def _reduce_func(s1, s2):
|
||||
return {(n1, n2): v + s2[(n1, n2)] for ((n1, n2), v) in s1.items()}
|
||||
|
||||
lst = df.to_dict(orient="records")[1:-1]
|
||||
summed_series = functools.reduce(_reduce_func, lst)
|
||||
idx = df.columns.drop([("base", "T"), ("base", "F")])
|
||||
avg_report = {
|
||||
(n1, n2): (v / len(lst))
|
||||
for ((n1, n2), v) in summed_series.items()
|
||||
if n1 != "base"
|
||||
}
|
||||
return pd.DataFrame([avg_report], columns=idx)
|
||||
|
||||
|
||||
def fmt_line_md(s):
|
||||
return f"> {s} \n"
|
||||
|
||||
|
||||
def create_dataser_dir(dir_name, update=False):
|
||||
base_out_dir = Path(env.OUT_DIR_NAME)
|
||||
if not base_out_dir.exists():
|
||||
os.mkdir(base_out_dir)
|
||||
|
||||
dataset_dir = base_out_dir / dir_name
|
||||
env.OUT_DIR = dataset_dir
|
||||
if update:
|
||||
if not dataset_dir.exists():
|
||||
os.mkdir(dataset_dir)
|
||||
else:
|
||||
shutil.rmtree(dataset_dir, ignore_errors=True)
|
||||
os.mkdir(dataset_dir)
|
||||
|
||||
plot_dir_path = dataset_dir / "plot"
|
||||
env.PLOT_OUT_DIR = plot_dir_path
|
||||
if not plot_dir_path.exists():
|
||||
os.mkdir(plot_dir_path)
|
||||
import functools
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from quacc.environment import env
|
||||
|
||||
|
||||
def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame:
|
||||
if len(dfs) < 1:
|
||||
raise ValueError
|
||||
if len(dfs) == 1:
|
||||
return dfs[0]
|
||||
df = dfs[0]
|
||||
for ndf in dfs[1:]:
|
||||
df = df.join(ndf.set_index(df_index), on=df_index)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def avg_group_report(df: pd.DataFrame) -> pd.DataFrame:
|
||||
def _reduce_func(s1, s2):
|
||||
return {(n1, n2): v + s2[(n1, n2)] for ((n1, n2), v) in s1.items()}
|
||||
|
||||
lst = df.to_dict(orient="records")[1:-1]
|
||||
summed_series = functools.reduce(_reduce_func, lst)
|
||||
idx = df.columns.drop([("base", "T"), ("base", "F")])
|
||||
avg_report = {
|
||||
(n1, n2): (v / len(lst))
|
||||
for ((n1, n2), v) in summed_series.items()
|
||||
if n1 != "base"
|
||||
}
|
||||
return pd.DataFrame([avg_report], columns=idx)
|
||||
|
||||
|
||||
def fmt_line_md(s):
|
||||
return f"> {s} \n"
|
||||
|
||||
|
||||
def create_dataser_dir(dir_name, update=False):
|
||||
base_out_dir = Path(env.OUT_DIR_NAME)
|
||||
if not base_out_dir.exists():
|
||||
os.mkdir(base_out_dir)
|
||||
|
||||
dataset_dir = base_out_dir / dir_name
|
||||
env.OUT_DIR = dataset_dir
|
||||
if update:
|
||||
if not dataset_dir.exists():
|
||||
os.mkdir(dataset_dir)
|
||||
else:
|
||||
shutil.rmtree(dataset_dir, ignore_errors=True)
|
||||
os.mkdir(dataset_dir)
|
||||
|
||||
plot_dir_path = dataset_dir / "plot"
|
||||
env.PLOT_OUT_DIR = plot_dir_path
|
||||
if not plot_dir_path.exists():
|
||||
os.mkdir(plot_dir_path)
|
||||
|
|
80
roadmap.md
80
roadmap.md
|
@ -1,40 +1,40 @@
|
|||
|
||||
## Roadmap
|
||||
|
||||
#### quantificator domain
|
||||
|
||||
- single multilabel quantificator
|
||||
|
||||
- vector of binary quantificators
|
||||
|
||||
| quantificator | | |
|
||||
|:-------------------:|:--------------:|:--------------:|
|
||||
| true quantificator | true positive | false positive |
|
||||
| false quantificator | false negative | true negative |
|
||||
|
||||
#### dataset split
|
||||
|
||||
- train | test
|
||||
- classificator C is fit on train
|
||||
- quantificator Q is fit on cross validation of C over train
|
||||
- train | validation | test
|
||||
- classificator C is fit on train
|
||||
- quantificator Q is fit on validation
|
||||
|
||||
#### classificator origin
|
||||
|
||||
- black box
|
||||
- crystal box
|
||||
|
||||
#### test metrics
|
||||
|
||||
- f1_score
|
||||
- K
|
||||
|
||||
#### models
|
||||
|
||||
- classificator
|
||||
- quantificator
|
||||
|
||||
|
||||
|
||||
|
||||
## Roadmap
|
||||
|
||||
#### quantificator domain
|
||||
|
||||
- single multilabel quantificator
|
||||
|
||||
- vector of binary quantificators
|
||||
|
||||
| quantificator | | |
|
||||
|:-------------------:|:--------------:|:--------------:|
|
||||
| true quantificator | true positive | false positive |
|
||||
| false quantificator | false negative | true negative |
|
||||
|
||||
#### dataset split
|
||||
|
||||
- train | test
|
||||
- classificator C is fit on train
|
||||
- quantificator Q is fit on cross validation of C over train
|
||||
- train | validation | test
|
||||
- classificator C is fit on train
|
||||
- quantificator Q is fit on validation
|
||||
|
||||
#### classificator origin
|
||||
|
||||
- black box
|
||||
- crystal box
|
||||
|
||||
#### test metrics
|
||||
|
||||
- f1_score
|
||||
- K
|
||||
|
||||
#### models
|
||||
|
||||
- classificator
|
||||
- quantificator
|
||||
|
||||
|
||||
|
||||
|
|
4202
test_mc.md
4202
test_mc.md
File diff suppressed because it is too large
Load Diff
|
@ -1,225 +1,225 @@
|
|||
import pytest
|
||||
from quacc.data import ExClassManager as ECM, ExtendedCollection
|
||||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
|
||||
|
||||
class TestExClassManager:
|
||||
@pytest.mark.parametrize(
|
||||
"true_class,pred_class,result",
|
||||
[
|
||||
(0, 0, 0),
|
||||
(0, 1, 1),
|
||||
(1, 0, 2),
|
||||
(1, 1, 3),
|
||||
],
|
||||
)
|
||||
def test_get_ex(self, true_class, pred_class, result):
|
||||
ncl = 2
|
||||
assert ECM.get_ex(ncl, true_class, pred_class) == result
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ex_class,result",
|
||||
[
|
||||
(0, 0),
|
||||
(1, 1),
|
||||
(2, 0),
|
||||
(3, 1),
|
||||
],
|
||||
)
|
||||
def test_get_pred(self, ex_class, result):
|
||||
ncl = 2
|
||||
assert ECM.get_pred(ncl, ex_class) == result
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ex_class,result",
|
||||
[
|
||||
(0, 0),
|
||||
(1, 0),
|
||||
(2, 1),
|
||||
(3, 1),
|
||||
],
|
||||
)
|
||||
def test_get_true(self, ex_class, result):
|
||||
ncl = 2
|
||||
assert ECM.get_true(ncl, ex_class) == result
|
||||
|
||||
|
||||
class TestExtendedCollection:
|
||||
@pytest.mark.parametrize(
|
||||
"instances,result",
|
||||
[
|
||||
(
|
||||
np.asarray(
|
||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||
),
|
||||
[np.asarray([1, 3]), np.asarray([0, 2])],
|
||||
),
|
||||
(
|
||||
sp.csr_matrix(
|
||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||
),
|
||||
[np.asarray([1, 3]), np.asarray([0, 2])],
|
||||
),
|
||||
(
|
||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
[np.asarray([], dtype=int), np.asarray([0, 1])],
|
||||
),
|
||||
(
|
||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
[np.asarray([], dtype=int), np.asarray([0, 1])],
|
||||
),
|
||||
(
|
||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
[np.asarray([0, 1]), np.asarray([], dtype=int)],
|
||||
),
|
||||
(
|
||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
[np.asarray([0, 1]), np.asarray([], dtype=int)],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test__split_index_by_pred(self, instances, result):
|
||||
ncl = 2
|
||||
assert all(
|
||||
np.array_equal(a, b)
|
||||
for (a, b) in zip(
|
||||
ExtendedCollection._split_index_by_pred(ncl, instances),
|
||||
result,
|
||||
)
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"instances,s_inst,norms",
|
||||
[
|
||||
(
|
||||
np.asarray(
|
||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||
),
|
||||
[
|
||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
],
|
||||
[0.5, 0.5],
|
||||
),
|
||||
(
|
||||
sp.csr_matrix(
|
||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||
),
|
||||
[
|
||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
],
|
||||
[0.5, 0.5],
|
||||
),
|
||||
(
|
||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
[
|
||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([], dtype=int),
|
||||
],
|
||||
[1.0, 0.0],
|
||||
),
|
||||
(
|
||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
[
|
||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
sp.csr_matrix([], dtype=int),
|
||||
],
|
||||
[1.0, 0.0],
|
||||
),
|
||||
(
|
||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
[
|
||||
np.asarray([], dtype=int),
|
||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
],
|
||||
[0.0, 1.0],
|
||||
),
|
||||
(
|
||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
[
|
||||
sp.csr_matrix([], dtype=int),
|
||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
],
|
||||
[0.0, 1.0],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_split_inst_by_pred(self, instances, s_inst, norms):
|
||||
ncl = 2
|
||||
_s_inst, _norms = ExtendedCollection.split_inst_by_pred(ncl, instances)
|
||||
if isinstance(s_inst, np.ndarray):
|
||||
assert all(np.array_equal(a, b) for (a, b) in zip(_s_inst, s_inst))
|
||||
if isinstance(s_inst, sp.csr_matrix):
|
||||
assert all((a != b).nnz == 0 for (a, b) in zip(_s_inst, s_inst))
|
||||
assert all(a == b for (a, b) in zip(_norms, norms))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"instances,labels,inst0,lbl0,inst1,lbl1",
|
||||
[
|
||||
(
|
||||
np.asarray(
|
||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||
),
|
||||
np.asarray([3, 0, 1, 2]),
|
||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([0, 1]),
|
||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
np.asarray([1, 0]),
|
||||
),
|
||||
(
|
||||
sp.csr_matrix(
|
||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||
),
|
||||
np.asarray([3, 0, 1, 2]),
|
||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([0, 1]),
|
||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
np.asarray([1, 0]),
|
||||
),
|
||||
(
|
||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
np.asarray([3, 1]),
|
||||
np.asarray([], dtype=int),
|
||||
np.asarray([], dtype=int),
|
||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
np.asarray([1, 0]),
|
||||
),
|
||||
(
|
||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
np.asarray([3, 1]),
|
||||
sp.csr_matrix(np.empty((0, 0), dtype=int)),
|
||||
np.asarray([], dtype=int),
|
||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
np.asarray([1, 0]),
|
||||
),
|
||||
(
|
||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([0, 2]),
|
||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([0, 1]),
|
||||
np.asarray([], dtype=int),
|
||||
np.asarray([], dtype=int),
|
||||
),
|
||||
(
|
||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([0, 2]),
|
||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([0, 1]),
|
||||
sp.csr_matrix(np.empty((0, 0), dtype=int)),
|
||||
np.asarray([], dtype=int),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_split_by_pred(self, instances, labels, inst0, lbl0, inst1, lbl1):
|
||||
ec = ExtendedCollection(instances, labels, classes=range(0, 4))
|
||||
[ec0, ec1] = ec.split_by_pred()
|
||||
if isinstance(instances, np.ndarray):
|
||||
assert np.array_equal(ec0.X, inst0)
|
||||
assert np.array_equal(ec1.X, inst1)
|
||||
if isinstance(instances, sp.csr_matrix):
|
||||
assert (ec0.X != inst0).nnz == 0
|
||||
assert (ec1.X != inst1).nnz == 0
|
||||
assert np.array_equal(ec0.y, lbl0)
|
||||
assert np.array_equal(ec1.y, lbl1)
|
||||
import pytest
|
||||
from quacc.data import ExClassManager as ECM, ExtendedCollection
|
||||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
|
||||
|
||||
class TestExClassManager:
|
||||
@pytest.mark.parametrize(
|
||||
"true_class,pred_class,result",
|
||||
[
|
||||
(0, 0, 0),
|
||||
(0, 1, 1),
|
||||
(1, 0, 2),
|
||||
(1, 1, 3),
|
||||
],
|
||||
)
|
||||
def test_get_ex(self, true_class, pred_class, result):
|
||||
ncl = 2
|
||||
assert ECM.get_ex(ncl, true_class, pred_class) == result
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ex_class,result",
|
||||
[
|
||||
(0, 0),
|
||||
(1, 1),
|
||||
(2, 0),
|
||||
(3, 1),
|
||||
],
|
||||
)
|
||||
def test_get_pred(self, ex_class, result):
|
||||
ncl = 2
|
||||
assert ECM.get_pred(ncl, ex_class) == result
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ex_class,result",
|
||||
[
|
||||
(0, 0),
|
||||
(1, 0),
|
||||
(2, 1),
|
||||
(3, 1),
|
||||
],
|
||||
)
|
||||
def test_get_true(self, ex_class, result):
|
||||
ncl = 2
|
||||
assert ECM.get_true(ncl, ex_class) == result
|
||||
|
||||
|
||||
class TestExtendedCollection:
|
||||
@pytest.mark.parametrize(
|
||||
"instances,result",
|
||||
[
|
||||
(
|
||||
np.asarray(
|
||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||
),
|
||||
[np.asarray([1, 3]), np.asarray([0, 2])],
|
||||
),
|
||||
(
|
||||
sp.csr_matrix(
|
||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||
),
|
||||
[np.asarray([1, 3]), np.asarray([0, 2])],
|
||||
),
|
||||
(
|
||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
[np.asarray([], dtype=int), np.asarray([0, 1])],
|
||||
),
|
||||
(
|
||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
[np.asarray([], dtype=int), np.asarray([0, 1])],
|
||||
),
|
||||
(
|
||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
[np.asarray([0, 1]), np.asarray([], dtype=int)],
|
||||
),
|
||||
(
|
||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
[np.asarray([0, 1]), np.asarray([], dtype=int)],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test__split_index_by_pred(self, instances, result):
|
||||
ncl = 2
|
||||
assert all(
|
||||
np.array_equal(a, b)
|
||||
for (a, b) in zip(
|
||||
ExtendedCollection._split_index_by_pred(ncl, instances),
|
||||
result,
|
||||
)
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"instances,s_inst,norms",
|
||||
[
|
||||
(
|
||||
np.asarray(
|
||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||
),
|
||||
[
|
||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
],
|
||||
[0.5, 0.5],
|
||||
),
|
||||
(
|
||||
sp.csr_matrix(
|
||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||
),
|
||||
[
|
||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
],
|
||||
[0.5, 0.5],
|
||||
),
|
||||
(
|
||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
[
|
||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([], dtype=int),
|
||||
],
|
||||
[1.0, 0.0],
|
||||
),
|
||||
(
|
||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
[
|
||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
sp.csr_matrix([], dtype=int),
|
||||
],
|
||||
[1.0, 0.0],
|
||||
),
|
||||
(
|
||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
[
|
||||
np.asarray([], dtype=int),
|
||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
],
|
||||
[0.0, 1.0],
|
||||
),
|
||||
(
|
||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
[
|
||||
sp.csr_matrix([], dtype=int),
|
||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
],
|
||||
[0.0, 1.0],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_split_inst_by_pred(self, instances, s_inst, norms):
|
||||
ncl = 2
|
||||
_s_inst, _norms = ExtendedCollection.split_inst_by_pred(ncl, instances)
|
||||
if isinstance(s_inst, np.ndarray):
|
||||
assert all(np.array_equal(a, b) for (a, b) in zip(_s_inst, s_inst))
|
||||
if isinstance(s_inst, sp.csr_matrix):
|
||||
assert all((a != b).nnz == 0 for (a, b) in zip(_s_inst, s_inst))
|
||||
assert all(a == b for (a, b) in zip(_norms, norms))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"instances,labels,inst0,lbl0,inst1,lbl1",
|
||||
[
|
||||
(
|
||||
np.asarray(
|
||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||
),
|
||||
np.asarray([3, 0, 1, 2]),
|
||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([0, 1]),
|
||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
np.asarray([1, 0]),
|
||||
),
|
||||
(
|
||||
sp.csr_matrix(
|
||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||
),
|
||||
np.asarray([3, 0, 1, 2]),
|
||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([0, 1]),
|
||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
np.asarray([1, 0]),
|
||||
),
|
||||
(
|
||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
np.asarray([3, 1]),
|
||||
np.asarray([], dtype=int),
|
||||
np.asarray([], dtype=int),
|
||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
np.asarray([1, 0]),
|
||||
),
|
||||
(
|
||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
np.asarray([3, 1]),
|
||||
sp.csr_matrix(np.empty((0, 0), dtype=int)),
|
||||
np.asarray([], dtype=int),
|
||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
np.asarray([1, 0]),
|
||||
),
|
||||
(
|
||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([0, 2]),
|
||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([0, 1]),
|
||||
np.asarray([], dtype=int),
|
||||
np.asarray([], dtype=int),
|
||||
),
|
||||
(
|
||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([0, 2]),
|
||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([0, 1]),
|
||||
sp.csr_matrix(np.empty((0, 0), dtype=int)),
|
||||
np.asarray([], dtype=int),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_split_by_pred(self, instances, labels, inst0, lbl0, inst1, lbl1):
|
||||
ec = ExtendedCollection(instances, labels, classes=range(0, 4))
|
||||
[ec0, ec1] = ec.split_by_pred()
|
||||
if isinstance(instances, np.ndarray):
|
||||
assert np.array_equal(ec0.X, inst0)
|
||||
assert np.array_equal(ec1.X, inst1)
|
||||
if isinstance(instances, sp.csr_matrix):
|
||||
assert (ec0.X != inst0).nnz == 0
|
||||
assert (ec1.X != inst1).nnz == 0
|
||||
assert np.array_equal(ec0.y, lbl0)
|
||||
assert np.array_equal(ec1.y, lbl1)
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
|
||||
class TestDataset:
|
||||
|
||||
class TestDataset:
|
||||
pass
|
|
@ -1,12 +1,12 @@
|
|||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
from quacc.dataset import Dataset
|
||||
from quacc.evaluation.baseline import kfcv
|
||||
|
||||
|
||||
class TestBaseline:
|
||||
def test_kfcv(self):
|
||||
spambase = Dataset("spambase", n_prevalences=1).get_raw()
|
||||
c_model = LogisticRegression()
|
||||
c_model.fit(spambase.train.X, spambase.train.y)
|
||||
assert "f1_score" in kfcv(c_model, spambase.validation)
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
from quacc.dataset import Dataset
|
||||
from quacc.evaluation.baseline import kfcv
|
||||
|
||||
|
||||
class TestBaseline:
|
||||
def test_kfcv(self):
|
||||
spambase = Dataset("spambase", n_prevalences=1).get_raw()
|
||||
c_model = LogisticRegression()
|
||||
c_model.fit(spambase.train.X, spambase.train.y)
|
||||
assert "f1_score" in kfcv(c_model, spambase.validation)
|
||||
|
|
|
@ -1,66 +1,66 @@
|
|||
import numpy as np
|
||||
import pytest
|
||||
import scipy.sparse as sp
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
from quacc.method.base import BinaryQuantifierAccuracyEstimator
|
||||
|
||||
|
||||
class TestBQAE:
|
||||
@pytest.mark.parametrize(
|
||||
"instances,preds0,preds1,result",
|
||||
[
|
||||
(
|
||||
np.asarray(
|
||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||
),
|
||||
np.asarray([0.3, 0.7]),
|
||||
np.asarray([0.4, 0.6]),
|
||||
np.asarray([0.15, 0.2, 0.35, 0.3]),
|
||||
),
|
||||
(
|
||||
sp.csr_matrix(
|
||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||
),
|
||||
np.asarray([0.3, 0.7]),
|
||||
np.asarray([0.4, 0.6]),
|
||||
np.asarray([0.15, 0.2, 0.35, 0.3]),
|
||||
),
|
||||
(
|
||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
np.asarray([0.3, 0.7]),
|
||||
np.asarray([0.4, 0.6]),
|
||||
np.asarray([0.0, 0.4, 0.0, 0.6]),
|
||||
),
|
||||
(
|
||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
np.asarray([0.3, 0.7]),
|
||||
np.asarray([0.4, 0.6]),
|
||||
np.asarray([0.0, 0.4, 0.0, 0.6]),
|
||||
),
|
||||
(
|
||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([0.3, 0.7]),
|
||||
np.asarray([0.4, 0.6]),
|
||||
np.asarray([0.3, 0.0, 0.7, 0.0]),
|
||||
),
|
||||
(
|
||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([0.3, 0.7]),
|
||||
np.asarray([0.4, 0.6]),
|
||||
np.asarray([0.3, 0.0, 0.7, 0.0]),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_estimate_ndarray(self, mocker, instances, preds0, preds1, result):
|
||||
estimator = BinaryQuantifierAccuracyEstimator(LogisticRegression())
|
||||
estimator.n_classes = 4
|
||||
with mocker.patch.object(estimator.q_model_0, "quantify"), mocker.patch.object(
|
||||
estimator.q_model_1, "quantify"
|
||||
):
|
||||
estimator.q_model_0.quantify.return_value = preds0
|
||||
estimator.q_model_1.quantify.return_value = preds1
|
||||
assert np.array_equal(
|
||||
estimator.estimate(instances, ext=True),
|
||||
result,
|
||||
)
|
||||
import numpy as np
|
||||
import pytest
|
||||
import scipy.sparse as sp
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
from quacc.method.base import BinaryQuantifierAccuracyEstimator
|
||||
|
||||
|
||||
class TestBQAE:
|
||||
@pytest.mark.parametrize(
|
||||
"instances,preds0,preds1,result",
|
||||
[
|
||||
(
|
||||
np.asarray(
|
||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||
),
|
||||
np.asarray([0.3, 0.7]),
|
||||
np.asarray([0.4, 0.6]),
|
||||
np.asarray([0.15, 0.2, 0.35, 0.3]),
|
||||
),
|
||||
(
|
||||
sp.csr_matrix(
|
||||
[[0, 0.3, 0.7], [1, 0.54, 0.46], [2, 0.28, 0.72], [3, 0.6, 0.4]]
|
||||
),
|
||||
np.asarray([0.3, 0.7]),
|
||||
np.asarray([0.4, 0.6]),
|
||||
np.asarray([0.15, 0.2, 0.35, 0.3]),
|
||||
),
|
||||
(
|
||||
np.asarray([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
np.asarray([0.3, 0.7]),
|
||||
np.asarray([0.4, 0.6]),
|
||||
np.asarray([0.0, 0.4, 0.0, 0.6]),
|
||||
),
|
||||
(
|
||||
sp.csr_matrix([[0, 0.3, 0.7], [2, 0.28, 0.72]]),
|
||||
np.asarray([0.3, 0.7]),
|
||||
np.asarray([0.4, 0.6]),
|
||||
np.asarray([0.0, 0.4, 0.0, 0.6]),
|
||||
),
|
||||
(
|
||||
np.asarray([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([0.3, 0.7]),
|
||||
np.asarray([0.4, 0.6]),
|
||||
np.asarray([0.3, 0.0, 0.7, 0.0]),
|
||||
),
|
||||
(
|
||||
sp.csr_matrix([[1, 0.54, 0.46], [3, 0.6, 0.4]]),
|
||||
np.asarray([0.3, 0.7]),
|
||||
np.asarray([0.4, 0.6]),
|
||||
np.asarray([0.3, 0.0, 0.7, 0.0]),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_estimate_ndarray(self, mocker, instances, preds0, preds1, result):
|
||||
estimator = BinaryQuantifierAccuracyEstimator(LogisticRegression())
|
||||
estimator.n_classes = 4
|
||||
with mocker.patch.object(estimator.q_model_0, "quantify"), mocker.patch.object(
|
||||
estimator.q_model_1, "quantify"
|
||||
):
|
||||
estimator.q_model_0.quantify.return_value = preds0
|
||||
estimator.q_model_1.quantify.return_value = preds1
|
||||
assert np.array_equal(
|
||||
estimator.estimate(instances, ext=True),
|
||||
result,
|
||||
)
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
class TestMCAE:
|
||||
pass
|
||||
class TestMCAE:
|
||||
pass
|
||||
|
|
Loading…
Reference in New Issue