diag plot fixed, opts, avg plot, best score added
This commit is contained in:
parent
232a670305
commit
3345514c99
|
@ -11,4 +11,5 @@ lipton_bbse/__pycache__/*
|
||||||
elsahar19_rca/__pycache__/*
|
elsahar19_rca/__pycache__/*
|
||||||
*.coverage
|
*.coverage
|
||||||
.coverage
|
.coverage
|
||||||
scp_sync.py
|
scp_sync.py
|
||||||
|
out/*
|
67
TODO.html
67
TODO.html
|
@ -41,12 +41,67 @@
|
||||||
</head>
|
</head>
|
||||||
<body class="vscode-body vscode-light">
|
<body class="vscode-body vscode-light">
|
||||||
<ul class="contains-task-list">
|
<ul class="contains-task-list">
|
||||||
<li class="task-list-item enabled"><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere media tabelle</li>
|
<li class="task-list-item enabled">
|
||||||
<li class="task-list-item enabled"><input class="task-list-item-checkbox" checked=""type="checkbox"> plot; 3 tipi (appunti + email + garg)</li>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere media tabelle</p>
|
||||||
<li class="task-list-item enabled"><input class="task-list-item-checkbox"type="checkbox"> sistemare kfcv baseline</li>
|
</li>
|
||||||
<li class="task-list-item enabled"><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere metodo con CC oltre SLD</li>
|
<li class="task-list-item enabled">
|
||||||
<li class="task-list-item enabled"><input class="task-list-item-checkbox" checked=""type="checkbox"> prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)</li>
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot; 3 tipi (appunti + email + garg)</p>
|
||||||
<li class="task-list-item enabled"><input class="task-list-item-checkbox" checked=""type="checkbox"> variare parametro recalibration in SLD</li>
|
</li>
|
||||||
|
<li class="task-list-item enabled">
|
||||||
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> sistemare kfcv baseline</p>
|
||||||
|
</li>
|
||||||
|
<li class="task-list-item enabled">
|
||||||
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> aggiungere metodo con CC oltre SLD</p>
|
||||||
|
</li>
|
||||||
|
<li class="task-list-item enabled">
|
||||||
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)</p>
|
||||||
|
</li>
|
||||||
|
<li class="task-list-item enabled">
|
||||||
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> variare parametro recalibration in SLD</p>
|
||||||
|
</li>
|
||||||
|
<li class="task-list-item enabled">
|
||||||
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> fix grafico diagonal</p>
|
||||||
|
<ul>
|
||||||
|
<li>seaborn example gallery</li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
<li class="task-list-item enabled">
|
||||||
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> varianti recalib: bcts, SLD (provare exact_train_prev=False)</p>
|
||||||
|
</li>
|
||||||
|
<li class="task-list-item enabled">
|
||||||
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> vedere cosa usa garg di validation size</p>
|
||||||
|
</li>
|
||||||
|
<li class="task-list-item enabled">
|
||||||
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> per model selection testare il parametro c del classificatore, si esplora in np.logscale(-3,3, 7) oppure np.logscale(-4, 4, 9), parametro class_weight si esplora in None oppure "balanced"; va usato qp.model_selection.GridSearchQ in funzione di mae come errore, UPP come protocollo</p>
|
||||||
|
<ul>
|
||||||
|
<li>qp.train_test_split per avere v_train e v_val</li>
|
||||||
|
<li>GridSearchQ(
|
||||||
|
model: BaseQuantifier,
|
||||||
|
param_grid: {
|
||||||
|
'classifier__C': np.logspace(-3,3,7),
|
||||||
|
'classifier__class_weight': [None, 'balanced'],
|
||||||
|
'recalib': [None, 'bcts']
|
||||||
|
},
|
||||||
|
protocol: UPP(V_val, repeats=1000),
|
||||||
|
error = qp.error.mae,
|
||||||
|
refit=True,
|
||||||
|
timeout=-1,
|
||||||
|
n_jobs=-2,
|
||||||
|
verbose=True).fit(V_tr)</li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
<li class="task-list-item enabled">
|
||||||
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> plot collettivo, con sulla x lo shift e prenda in considerazione tutti i training set, facendo la media sui 9 casi (ogni line è un metodo), risultati non ottimizzati e ottimizzati</p>
|
||||||
|
</li>
|
||||||
|
<li class="task-list-item enabled">
|
||||||
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> salvare il best score ottenuto da ogni applicazione di GridSearchQ</p>
|
||||||
|
<ul>
|
||||||
|
<li>nel caso di bin fare media dei due best score</li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
<li class="task-list-item enabled">
|
||||||
|
<p><input class="task-list-item-checkbox" checked=""type="checkbox"> import baselines</p>
|
||||||
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
|
|
||||||
|
|
21
TODO.md
21
TODO.md
|
@ -1,16 +1,17 @@
|
||||||
- [x] aggiungere media tabelle
|
- [x] aggiungere media tabelle
|
||||||
- [x] plot; 3 tipi (appunti + email + garg)
|
- [x] plot; 3 tipi (appunti + email + garg)
|
||||||
- [ ] sistemare kfcv baseline
|
- [x] sistemare kfcv baseline
|
||||||
- [x] aggiungere metodo con CC oltre SLD
|
- [x] aggiungere metodo con CC oltre SLD
|
||||||
- [x] prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)
|
- [x] prendere classe più popolosa di rcv1, togliere negativi fino a raggiungere 50/50; poi fare subsampling con 9 training prvalences (da 0.1-0.9 a 0.9-0.1)
|
||||||
- [x] variare parametro recalibration in SLD
|
- [x] variare parametro recalibration in SLD
|
||||||
|
|
||||||
|
|
||||||
- [ ] plot collettivo, con sulla x lo shift e prenda in considerazione tutti i training set, facendo la media sui 9 casi (ogni line è un metodo), risultati non ottimizzati e ottimizzati
|
- [x] fix grafico diagonal
|
||||||
- [ ] varianti recalib: bcts, SLD (provare exact_train_prev=False)
|
- seaborn example gallery
|
||||||
- [ ] vedere cosa usa garg di validation size
|
- [x] varianti recalib: bcts, SLD (provare exact_train_prev=False)
|
||||||
- [ ] per model selection testare il parametro c del classificatore, si esplora in np.logscale(-3,3, 7) oppure np.logscale(-4, 4, 9), parametro class_weight si esplora in None oppure "balanced"; va usato qp.model_selection.GridSearchQ in funzione di mae come errore, UPP come protocollo
|
- [x] vedere cosa usa garg di validation size
|
||||||
- qp.train_test_split per avere val_train e val_val
|
- [x] per model selection testare il parametro c del classificatore, si esplora in np.logscale(-3,3, 7) oppure np.logscale(-4, 4, 9), parametro class_weight si esplora in None oppure "balanced"; va usato qp.model_selection.GridSearchQ in funzione di mae come errore, UPP come protocollo
|
||||||
|
- qp.train_test_split per avere v_train e v_val
|
||||||
- GridSearchQ(
|
- GridSearchQ(
|
||||||
model: BaseQuantifier,
|
model: BaseQuantifier,
|
||||||
param_grid: {
|
param_grid: {
|
||||||
|
@ -24,7 +25,7 @@
|
||||||
timeout=-1,
|
timeout=-1,
|
||||||
n_jobs=-2,
|
n_jobs=-2,
|
||||||
verbose=True).fit(V_tr)
|
verbose=True).fit(V_tr)
|
||||||
- salvare il best score ottenuto da ogni applicazione di GridSearchQ
|
- [x] plot collettivo, con sulla x lo shift e prenda in considerazione tutti i training set, facendo la media sui 9 casi (ogni line è un metodo), risultati non ottimizzati e ottimizzati
|
||||||
- nel caso di bin fare media dei due best score
|
- [x] salvare il best score ottenuto da ogni applicazione di GridSearchQ
|
||||||
|
- nel caso di bin fare media dei due best score
|
||||||
- seaborn example gallery
|
- [x] import baselines
|
||||||
|
|
149
conf.yaml
149
conf.yaml
|
@ -1,71 +1,102 @@
|
||||||
|
debug_conf: &debug_conf
|
||||||
|
global:
|
||||||
|
METRICS:
|
||||||
|
- acc
|
||||||
|
DATASET_N_PREVS: 1
|
||||||
|
|
||||||
exec: []
|
datasets:
|
||||||
|
- DATASET_NAME: rcv1
|
||||||
|
DATASET_TARGET: CCAT
|
||||||
|
|
||||||
commons:
|
plot_confs:
|
||||||
- DATASET_NAME: rcv1
|
debug:
|
||||||
DATASET_TARGET: CCAT
|
PLOT_ESTIMATORS:
|
||||||
|
# - mul_sld_bcts
|
||||||
|
- mul_sld
|
||||||
|
- ref
|
||||||
|
- atc_mc
|
||||||
|
- atc_ne
|
||||||
|
|
||||||
|
test_conf: &test_conf
|
||||||
|
global:
|
||||||
METRICS:
|
METRICS:
|
||||||
- acc
|
- acc
|
||||||
- f1
|
- f1
|
||||||
DATASET_N_PREVS: 9
|
DATASET_N_PREVS: 3
|
||||||
- DATASET_NAME: imdb
|
|
||||||
|
datasets:
|
||||||
|
- DATASET_NAME: rcv1
|
||||||
|
DATASET_TARGET: CCAT
|
||||||
|
|
||||||
|
plot_confs:
|
||||||
|
best_vs_atc:
|
||||||
|
PLOT_ESTIMATORS:
|
||||||
|
- bin_sld
|
||||||
|
- bin_sld_bcts
|
||||||
|
- bin_sld_gs
|
||||||
|
- mul_sld
|
||||||
|
- mul_sld_bcts
|
||||||
|
- mul_sld_gs
|
||||||
|
- ref
|
||||||
|
- atc_mc
|
||||||
|
- atc_ne
|
||||||
|
|
||||||
|
main_conf: &main_conf
|
||||||
|
global:
|
||||||
METRICS:
|
METRICS:
|
||||||
- acc
|
- acc
|
||||||
- f1
|
- f1
|
||||||
DATASET_N_PREVS: 9
|
DATASET_N_PREVS: 9
|
||||||
|
|
||||||
confs:
|
datasets:
|
||||||
|
- DATASET_NAME: rcv1
|
||||||
|
DATASET_TARGET: CCAT
|
||||||
|
datasets_bck:
|
||||||
|
- DATASET_NAME: rcv1
|
||||||
|
DATASET_TARGET: GCAT
|
||||||
|
- DATASET_NAME: rcv1
|
||||||
|
DATASET_TARGET: MCAT
|
||||||
|
- DATASET_NAME: imdb
|
||||||
|
|
||||||
all_mul_vs_atc:
|
plot_confs:
|
||||||
COMP_ESTIMATORS:
|
gs_vs_atc:
|
||||||
- our_mul_SLD
|
PLOT_ESTIMATORS:
|
||||||
- our_mul_SLD_nbvs
|
- mul_sld_gs
|
||||||
- our_mul_SLD_bcts
|
- bin_sld_gs
|
||||||
- our_mul_SLD_ts
|
- ref
|
||||||
- our_mul_SLD_vs
|
- atc_mc
|
||||||
- our_mul_CC
|
- atc_ne
|
||||||
- ref
|
PLOT_STDEV: true
|
||||||
- atc_mc
|
best_vs_atc:
|
||||||
- atc_ne
|
PLOT_ESTIMATORS:
|
||||||
|
- mul_sld_bcts
|
||||||
|
- mul_sld_gs
|
||||||
|
- bin_sld_bcts
|
||||||
|
- bin_sld_gs
|
||||||
|
- ref
|
||||||
|
- atc_mc
|
||||||
|
- atc_ne
|
||||||
|
all_vs_atc:
|
||||||
|
PLOT_ESTIMATORS:
|
||||||
|
- bin_sld
|
||||||
|
- bin_sld_bcts
|
||||||
|
- bin_sld_gs
|
||||||
|
- mul_sld
|
||||||
|
- mul_sld_bcts
|
||||||
|
- mul_sld_gs
|
||||||
|
- ref
|
||||||
|
- atc_mc
|
||||||
|
- atc_ne
|
||||||
|
best_vs_all:
|
||||||
|
PLOT_ESTIMATORS:
|
||||||
|
- bin_sld_bcts
|
||||||
|
- bin_sld_gs
|
||||||
|
- mul_sld_bcts
|
||||||
|
- mul_sld_gs
|
||||||
|
- ref
|
||||||
|
- kfcv
|
||||||
|
- atc_mc
|
||||||
|
- atc_ne
|
||||||
|
- doc_feat
|
||||||
|
|
||||||
all_bin_vs_atc:
|
exec: *main_conf
|
||||||
COMP_ESTIMATORS:
|
|
||||||
- our_bin_SLD
|
|
||||||
- our_bin_SLD_nbvs
|
|
||||||
- our_bin_SLD_bcts
|
|
||||||
- our_bin_SLD_ts
|
|
||||||
- our_bin_SLD_vs
|
|
||||||
- our_bin_CC
|
|
||||||
- ref
|
|
||||||
- atc_mc
|
|
||||||
- atc_ne
|
|
||||||
|
|
||||||
best_our_vs_atc:
|
|
||||||
COMP_ESTIMATORS:
|
|
||||||
- our_bin_SLD
|
|
||||||
- our_bin_SLD_bcts
|
|
||||||
- our_bin_SLD_vs
|
|
||||||
- our_bin_CC
|
|
||||||
- our_mul_SLD
|
|
||||||
- our_mul_SLD_bcts
|
|
||||||
- our_mul_SLD_vs
|
|
||||||
- our_mul_CC
|
|
||||||
- ref
|
|
||||||
- atc_mc
|
|
||||||
- atc_ne
|
|
||||||
|
|
||||||
best_our_vs_all:
|
|
||||||
COMP_ESTIMATORS:
|
|
||||||
- our_bin_SLD
|
|
||||||
- our_bin_SLD_bcts
|
|
||||||
- our_bin_SLD_vs
|
|
||||||
- our_bin_CC
|
|
||||||
- our_mul_SLD
|
|
||||||
- our_mul_SLD_bcts
|
|
||||||
- our_mul_SLD_vs
|
|
||||||
- our_mul_CC
|
|
||||||
- ref
|
|
||||||
- kfcv
|
|
||||||
- atc_mc
|
|
||||||
- atc_ne
|
|
||||||
- doc_feat
|
|
|
@ -1204,6 +1204,44 @@ files = [
|
||||||
{file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"},
|
{file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "win11toast"
|
||||||
|
version = "0.32"
|
||||||
|
description = "Toast notifications for Windows 10 and 11"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "win11toast-0.32-py3-none-any.whl", hash = "sha256:38ecf6625374cbeebce4f3eda20cef0b2c468fedda23d95d883dfcdac98154a6"},
|
||||||
|
{file = "win11toast-0.32.tar.gz", hash = "sha256:640650374285ef51bcad4453a3404f502e5b746e4a7fd7d974064a73ae808e33"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
winsdk = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winsdk"
|
||||||
|
version = "1.0.0b10"
|
||||||
|
description = "Python bindings for the Windows SDK"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "winsdk-1.0.0b10-cp310-cp310-win32.whl", hash = "sha256:90f75c67e166d588a045bcde0117a4631c705904f7af4ac42644479dcf0d8c52"},
|
||||||
|
{file = "winsdk-1.0.0b10-cp310-cp310-win_amd64.whl", hash = "sha256:c3be3fbf692b8888bac8c0712c490c080ab8976649ef01f9f6365947f4e5a8b1"},
|
||||||
|
{file = "winsdk-1.0.0b10-cp310-cp310-win_arm64.whl", hash = "sha256:6ab69dd65d959d94939c21974a33f4f1dfa625106c8784435ecacbd8ff0bf74d"},
|
||||||
|
{file = "winsdk-1.0.0b10-cp311-cp311-win32.whl", hash = "sha256:9ea4fdad9ca8a542198aee3c753ac164b8e2f550d760bb88815095d64750e0f5"},
|
||||||
|
{file = "winsdk-1.0.0b10-cp311-cp311-win_amd64.whl", hash = "sha256:f12e25bbf0a658270203615677520b8170edf500fba11e0f80359c5dbf090676"},
|
||||||
|
{file = "winsdk-1.0.0b10-cp311-cp311-win_arm64.whl", hash = "sha256:e77bce44a9ff151562bd261b2a1a8255e258bb10696d0d31ef63267a27628af1"},
|
||||||
|
{file = "winsdk-1.0.0b10-cp312-cp312-win32.whl", hash = "sha256:775a55a71e05ec2aa262c1fd67d80f270d4186bbdbbee2f43c9c412cf76f0761"},
|
||||||
|
{file = "winsdk-1.0.0b10-cp312-cp312-win_amd64.whl", hash = "sha256:8231ce5f16e1fc88bb7dda0adf35633b5b26101eae3b0799083ca2177f03e4e5"},
|
||||||
|
{file = "winsdk-1.0.0b10-cp312-cp312-win_arm64.whl", hash = "sha256:f4ab469ada19b34ccfc69a148090f98b40a1da1da797b50b9cbba0c090c365a5"},
|
||||||
|
{file = "winsdk-1.0.0b10-cp38-cp38-win32.whl", hash = "sha256:786d6b50e4fcb8af2d701d7400c74e1c3f3ab7766ed1dfd516cdd6688072ea87"},
|
||||||
|
{file = "winsdk-1.0.0b10-cp38-cp38-win_amd64.whl", hash = "sha256:1d4fdd1f79b41b64fedfbc478a29112edf2076e1a61001eccb536c0568510e74"},
|
||||||
|
{file = "winsdk-1.0.0b10-cp39-cp39-win32.whl", hash = "sha256:4f04d3e50eeb8ca5fe4eb2e39785f3fa594199819acdfb23a10aaef4b97699ad"},
|
||||||
|
{file = "winsdk-1.0.0b10-cp39-cp39-win_amd64.whl", hash = "sha256:7948bc5d8a02d73b1db043788d32b2988b8e7e29a25e503c21d34478e630eaf1"},
|
||||||
|
{file = "winsdk-1.0.0b10-cp39-cp39-win_arm64.whl", hash = "sha256:342b1095cbd937865cee962676e279a1fd28896a0680724fcf9c65157e7ebdb7"},
|
||||||
|
{file = "winsdk-1.0.0b10.tar.gz", hash = "sha256:8f39ea759626797449371f857c9085b84bb9f3b6d493dc6525e2cedcb3d15ea2"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "xlrd"
|
name = "xlrd"
|
||||||
version = "2.0.1"
|
version = "2.0.1"
|
||||||
|
@ -1223,4 +1261,4 @@ test = ["pytest", "pytest-cov"]
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.11"
|
python-versions = "^3.11"
|
||||||
content-hash = "0ce0e6b058900e7db2939e7eb047a1f868c88de67def370c1c1fa0ba532df0b0"
|
content-hash = "c98b7510ac055b667340b52e1b0b0777370e68d325d3149cb1fef42b6f1ec50a"
|
||||||
|
|
|
@ -23,6 +23,7 @@ pytest = "^7.4.0"
|
||||||
pylance = "^0.5.9"
|
pylance = "^0.5.9"
|
||||||
pytest-mock = "^3.11.1"
|
pytest-mock = "^3.11.1"
|
||||||
pytest-cov = "^4.1.0"
|
pytest-cov = "^4.1.0"
|
||||||
|
win11toast = "^0.32"
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
addopts = "--cov=quacc --capture=tee-sys"
|
addopts = "--cov=quacc --capture=tee-sys"
|
||||||
|
|
|
@ -40,21 +40,22 @@ class Dataset:
|
||||||
self.n_prevs = n_prevalences
|
self.n_prevs = n_prevalences
|
||||||
|
|
||||||
def __spambase(self):
|
def __spambase(self):
|
||||||
return qp.datasets.fetch_reviews("imdb", tfidf=True).train_test
|
return qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test
|
||||||
|
|
||||||
def __imdb(self):
|
def __imdb(self):
|
||||||
return qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test
|
return qp.datasets.fetch_reviews("imdb", tfidf=True).train_test
|
||||||
|
|
||||||
def __rcv1(self):
|
def __rcv1(self):
|
||||||
n_train = 23149
|
n_train = 23149
|
||||||
available_targets = ["CCAT", "GCAT", "MCAT"]
|
available_targets = ["CCAT", "GCAT", "MCAT"]
|
||||||
|
|
||||||
if self._target is None or self._target not in available_targets:
|
if self._target is None or self._target not in available_targets:
|
||||||
raise ValueError("Invalid target")
|
raise ValueError(f"Invalid target {self._target}")
|
||||||
|
|
||||||
dataset = fetch_rcv1()
|
dataset = fetch_rcv1()
|
||||||
target_index = np.where(dataset.target_names == self._target)[0]
|
target_index = np.where(dataset.target_names == self._target)[0]
|
||||||
all_train_d, test_d = dataset.data[:n_train, :], dataset.data[n_train:, :]
|
all_train_d = dataset.data[:n_train, :]
|
||||||
|
test_d = dataset.data[n_train:, :]
|
||||||
labels = dataset.target[:, target_index].toarray().flatten()
|
labels = dataset.target[:, target_index].toarray().flatten()
|
||||||
all_train_l, test_l = labels[:n_train], labels[n_train:]
|
all_train_l, test_l = labels[:n_train], labels[n_train:]
|
||||||
all_train = LabelledCollection(all_train_d, all_train_l, classes=[0, 1])
|
all_train = LabelledCollection(all_train_d, all_train_l, classes=[0, 1])
|
||||||
|
@ -62,6 +63,21 @@ class Dataset:
|
||||||
|
|
||||||
return all_train, test
|
return all_train, test
|
||||||
|
|
||||||
|
def get_raw(self, validation=True) -> DatasetSample:
|
||||||
|
all_train, test = {
|
||||||
|
"spambase": self.__spambase,
|
||||||
|
"imdb": self.__imdb,
|
||||||
|
"rcv1": self.__rcv1,
|
||||||
|
}[self._name]()
|
||||||
|
|
||||||
|
train, val = all_train, None
|
||||||
|
if validation:
|
||||||
|
train, val = all_train.split_stratified(
|
||||||
|
train_prop=TRAIN_VAL_PROP, random_state=0
|
||||||
|
)
|
||||||
|
|
||||||
|
return DatasetSample(train, val, test)
|
||||||
|
|
||||||
def get(self) -> List[DatasetSample]:
|
def get(self) -> List[DatasetSample]:
|
||||||
all_train, test = {
|
all_train, test = {
|
||||||
"spambase": self.__spambase,
|
"spambase": self.__spambase,
|
||||||
|
|
|
@ -1,72 +0,0 @@
|
||||||
import yaml
|
|
||||||
|
|
||||||
defalut_env = {
|
|
||||||
"DATASET_NAME": "rcv1",
|
|
||||||
"DATASET_TARGET": "CCAT",
|
|
||||||
"METRICS": ["acc", "f1"],
|
|
||||||
"COMP_ESTIMATORS": [
|
|
||||||
"our_bin_SLD",
|
|
||||||
"our_bin_SLD_nbvs",
|
|
||||||
"our_bin_SLD_bcts",
|
|
||||||
"our_bin_SLD_ts",
|
|
||||||
"our_bin_SLD_vs",
|
|
||||||
"our_bin_CC",
|
|
||||||
"our_mul_SLD",
|
|
||||||
"our_mul_SLD_nbvs",
|
|
||||||
"our_mul_SLD_bcts",
|
|
||||||
"our_mul_SLD_ts",
|
|
||||||
"our_mul_SLD_vs",
|
|
||||||
"our_mul_CC",
|
|
||||||
"ref",
|
|
||||||
"kfcv",
|
|
||||||
"atc_mc",
|
|
||||||
"atc_ne",
|
|
||||||
"doc_feat",
|
|
||||||
"rca",
|
|
||||||
"rca_star",
|
|
||||||
],
|
|
||||||
"DATASET_N_PREVS": 9,
|
|
||||||
"OUT_DIR_NAME": "output",
|
|
||||||
"PLOT_DIR_NAME": "plot",
|
|
||||||
"PROTOCOL_N_PREVS": 21,
|
|
||||||
"PROTOCOL_REPEATS": 100,
|
|
||||||
"SAMPLE_SIZE": 1000,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class Environ:
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
self.exec = []
|
|
||||||
self.confs = {}
|
|
||||||
self.__setdict(kwargs)
|
|
||||||
|
|
||||||
def __setdict(self, d):
|
|
||||||
for k, v in d.items():
|
|
||||||
self.__setattr__(k, v)
|
|
||||||
|
|
||||||
def load_conf(self):
|
|
||||||
with open("conf.yaml", "r") as f:
|
|
||||||
confs = yaml.safe_load(f)
|
|
||||||
|
|
||||||
for common in confs["commons"]:
|
|
||||||
name = common["DATASET_NAME"]
|
|
||||||
if "DATASET_TARGET" in common:
|
|
||||||
name += "_" + common["DATASET_TARGET"]
|
|
||||||
for k, d in confs["confs"].items():
|
|
||||||
_k = f"{name}_{k}"
|
|
||||||
self.confs[_k] = common | d
|
|
||||||
self.exec.append(_k)
|
|
||||||
|
|
||||||
if "exec" in confs:
|
|
||||||
if len(confs["exec"]) > 0:
|
|
||||||
self.exec = confs["exec"]
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
self.load_conf()
|
|
||||||
for _conf in self.exec:
|
|
||||||
if _conf in self.confs:
|
|
||||||
self.__setdict(self.confs[_conf])
|
|
||||||
yield _conf
|
|
||||||
|
|
||||||
|
|
||||||
env = Environ(**defalut_env)
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
defalut_env = {
|
||||||
|
"DATASET_NAME": "rcv1",
|
||||||
|
"DATASET_TARGET": "CCAT",
|
||||||
|
"METRICS": ["acc", "f1"],
|
||||||
|
"COMP_ESTIMATORS": [],
|
||||||
|
"PLOT_ESTIMATORS": [],
|
||||||
|
"PLOT_STDEV": False,
|
||||||
|
"DATASET_N_PREVS": 9,
|
||||||
|
"OUT_DIR_NAME": "output",
|
||||||
|
"OUT_DIR": None,
|
||||||
|
"PLOT_DIR_NAME": "plot",
|
||||||
|
"PLOT_OUT_DIR": None,
|
||||||
|
"DATASET_DIR_UPDATE": False,
|
||||||
|
"PROTOCOL_N_PREVS": 21,
|
||||||
|
"PROTOCOL_REPEATS": 100,
|
||||||
|
"SAMPLE_SIZE": 1000,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class environ:
|
||||||
|
_instance = None
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
self.exec = []
|
||||||
|
self.confs = []
|
||||||
|
self._default = kwargs
|
||||||
|
self.__setdict(kwargs)
|
||||||
|
self.load_conf()
|
||||||
|
|
||||||
|
def __setdict(self, d):
|
||||||
|
for k, v in d.items():
|
||||||
|
self.__setattr__(k, v)
|
||||||
|
if len(self.PLOT_ESTIMATORS) == 0:
|
||||||
|
self.PLOT_ESTIMATORS = self.COMP_ESTIMATORS
|
||||||
|
|
||||||
|
def __class_getitem__(cls, k):
|
||||||
|
env = cls.get()
|
||||||
|
return env.__getattribute__(k)
|
||||||
|
|
||||||
|
def load_conf(self):
|
||||||
|
with open("conf.yaml", "r") as f:
|
||||||
|
confs = yaml.safe_load(f)["exec"]
|
||||||
|
|
||||||
|
_global = confs["global"]
|
||||||
|
_estimators = set()
|
||||||
|
for pc in confs["plot_confs"].values():
|
||||||
|
_estimators = _estimators.union(set(pc["PLOT_ESTIMATORS"]))
|
||||||
|
_global["COMP_ESTIMATORS"] = list(_estimators)
|
||||||
|
|
||||||
|
self.plot_confs = confs["plot_confs"]
|
||||||
|
|
||||||
|
for dataset in confs["datasets"]:
|
||||||
|
self.confs.append(_global | dataset)
|
||||||
|
|
||||||
|
def get_confs(self):
|
||||||
|
for _conf in self.confs:
|
||||||
|
self.__setdict(self._default)
|
||||||
|
self.__setdict(_conf)
|
||||||
|
if "DATASET_TARGET" not in _conf:
|
||||||
|
self.DATASET_TARGET = None
|
||||||
|
|
||||||
|
name = self.DATASET_NAME
|
||||||
|
if self.DATASET_TARGET is not None:
|
||||||
|
name += f"_{self.DATASET_TARGET}"
|
||||||
|
name += f"_{self.DATASET_N_PREVS}prevs"
|
||||||
|
|
||||||
|
yield name
|
||||||
|
|
||||||
|
def get_plot_confs(self):
|
||||||
|
for k, pc in self.plot_confs.items():
|
||||||
|
if "PLOT_ESTIMATORS" in pc:
|
||||||
|
self.PLOT_ESTIMATORS = pc["PLOT_ESTIMATORS"]
|
||||||
|
if "PLOT_STDEV" in pc:
|
||||||
|
self.PLOT_STDEV = pc["PLOT_STDEV"]
|
||||||
|
|
||||||
|
name = self.DATASET_NAME
|
||||||
|
if self.DATASET_TARGET is not None:
|
||||||
|
name += f"_{self.DATASET_TARGET}"
|
||||||
|
name += f"_{k}"
|
||||||
|
yield name
|
||||||
|
|
||||||
|
|
||||||
|
env = environ(**defalut_env)
|
|
@ -2,8 +2,11 @@ import math
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import quapy as qp
|
||||||
from quapy.data import LabelledCollection
|
from quapy.data import LabelledCollection
|
||||||
from quapy.method.aggregative import CC, SLD
|
from quapy.method.aggregative import CC, SLD
|
||||||
|
from quapy.model_selection import GridSearchQ
|
||||||
|
from quapy.protocol import UPP
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
from sklearn.model_selection import cross_val_predict
|
from sklearn.model_selection import cross_val_predict
|
||||||
|
@ -12,6 +15,24 @@ from quacc.data import ExtendedCollection
|
||||||
|
|
||||||
|
|
||||||
class AccuracyEstimator:
|
class AccuracyEstimator:
|
||||||
|
def __init__(self):
|
||||||
|
self.fit_score = None
|
||||||
|
|
||||||
|
def _gs_params(self, t_val: LabelledCollection):
|
||||||
|
return {
|
||||||
|
"param_grid": {
|
||||||
|
"classifier__C": np.logspace(-3, 3, 7),
|
||||||
|
"classifier__class_weight": [None, "balanced"],
|
||||||
|
"recalib": [None, "bcts"],
|
||||||
|
},
|
||||||
|
"protocol": UPP(t_val, repeats=1000),
|
||||||
|
"error": qp.error.mae,
|
||||||
|
"refit": False,
|
||||||
|
"timeout": -1,
|
||||||
|
"n_jobs": None,
|
||||||
|
"verbose": True,
|
||||||
|
}
|
||||||
|
|
||||||
def extend(self, base: LabelledCollection, pred_proba=None) -> ExtendedCollection:
|
def extend(self, base: LabelledCollection, pred_proba=None) -> ExtendedCollection:
|
||||||
if not pred_proba:
|
if not pred_proba:
|
||||||
pred_proba = self.c_model.predict_proba(base.X)
|
pred_proba = self.c_model.predict_proba(base.X)
|
||||||
|
@ -26,17 +47,17 @@ class AccuracyEstimator:
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
class MulticlassAccuracyEstimator(AccuracyEstimator):
|
AE = AccuracyEstimator
|
||||||
def __init__(self, c_model: BaseEstimator, q_model="SLD", **kwargs):
|
|
||||||
self.c_model = c_model
|
|
||||||
if q_model == "SLD":
|
|
||||||
available_args = ["recalib"]
|
|
||||||
sld_args = {k: v for k, v in kwargs.items() if k in available_args}
|
|
||||||
self.q_model = SLD(LogisticRegression(), **sld_args)
|
|
||||||
elif q_model == "CC":
|
|
||||||
self.q_model = CC(LogisticRegression())
|
|
||||||
|
|
||||||
|
|
||||||
|
class MulticlassAccuracyEstimator(AccuracyEstimator):
|
||||||
|
def __init__(self, c_model: BaseEstimator, q_model="SLD", gs=False, recalib=None):
|
||||||
|
super().__init__()
|
||||||
|
self.c_model = c_model
|
||||||
|
self._q_model_name = q_model.upper()
|
||||||
self.e_train = None
|
self.e_train = None
|
||||||
|
self.gs = gs
|
||||||
|
self.recalib = recalib
|
||||||
|
|
||||||
def fit(self, train: LabelledCollection | ExtendedCollection):
|
def fit(self, train: LabelledCollection | ExtendedCollection):
|
||||||
# check if model is fit
|
# check if model is fit
|
||||||
|
@ -45,12 +66,26 @@ class MulticlassAccuracyEstimator(AccuracyEstimator):
|
||||||
pred_prob_train = cross_val_predict(
|
pred_prob_train = cross_val_predict(
|
||||||
self.c_model, *train.Xy, method="predict_proba"
|
self.c_model, *train.Xy, method="predict_proba"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.e_train = ExtendedCollection.extend_collection(train, pred_prob_train)
|
self.e_train = ExtendedCollection.extend_collection(train, pred_prob_train)
|
||||||
else:
|
else:
|
||||||
self.e_train = train
|
self.e_train = train
|
||||||
|
|
||||||
self.q_model.fit(self.e_train)
|
if self._q_model_name == "SLD":
|
||||||
|
if self.gs:
|
||||||
|
t_train, t_val = self.e_train.split_stratified(0.6, random_state=0)
|
||||||
|
gs_params = self._gs_params(t_val)
|
||||||
|
self.q_model = GridSearchQ(
|
||||||
|
SLD(LogisticRegression()),
|
||||||
|
**gs_params,
|
||||||
|
)
|
||||||
|
self.q_model.fit(t_train)
|
||||||
|
self.fit_score = self.q_model.best_score_
|
||||||
|
else:
|
||||||
|
self.q_model = SLD(LogisticRegression(), recalib=self.recalib)
|
||||||
|
self.q_model.fit(self.e_train)
|
||||||
|
elif self._q_model_name == "CC":
|
||||||
|
self.q_model = CC(LogisticRegression())
|
||||||
|
self.q_model.fit(self.e_train)
|
||||||
|
|
||||||
def estimate(self, instances, ext=False):
|
def estimate(self, instances, ext=False):
|
||||||
if not ext:
|
if not ext:
|
||||||
|
@ -62,10 +97,14 @@ class MulticlassAccuracyEstimator(AccuracyEstimator):
|
||||||
estim_prev = self.q_model.quantify(e_inst)
|
estim_prev = self.q_model.quantify(e_inst)
|
||||||
|
|
||||||
return self._check_prevalence_classes(
|
return self._check_prevalence_classes(
|
||||||
self.e_train.classes_, self.q_model.classes_, estim_prev
|
self.e_train.classes_, self.q_model, estim_prev
|
||||||
)
|
)
|
||||||
|
|
||||||
def _check_prevalence_classes(self, true_classes, estim_classes, estim_prev):
|
def _check_prevalence_classes(self, true_classes, q_model, estim_prev):
|
||||||
|
if isinstance(q_model, GridSearchQ):
|
||||||
|
estim_classes = q_model.best_model().classes_
|
||||||
|
else:
|
||||||
|
estim_classes = q_model.classes_
|
||||||
for _cls in true_classes:
|
for _cls in true_classes:
|
||||||
if _cls not in estim_classes:
|
if _cls not in estim_classes:
|
||||||
estim_prev = np.insert(estim_prev, _cls, [0.0], axis=0)
|
estim_prev = np.insert(estim_prev, _cls, [0.0], axis=0)
|
||||||
|
@ -73,17 +112,13 @@ class MulticlassAccuracyEstimator(AccuracyEstimator):
|
||||||
|
|
||||||
|
|
||||||
class BinaryQuantifierAccuracyEstimator(AccuracyEstimator):
|
class BinaryQuantifierAccuracyEstimator(AccuracyEstimator):
|
||||||
def __init__(self, c_model: BaseEstimator, q_model="SLD", **kwargs):
|
def __init__(self, c_model: BaseEstimator, q_model="SLD", gs=False, recalib=None):
|
||||||
|
super().__init__()
|
||||||
self.c_model = c_model
|
self.c_model = c_model
|
||||||
if q_model == "SLD":
|
self._q_model_name = q_model.upper()
|
||||||
available_args = ["recalib"]
|
self.q_models = []
|
||||||
sld_args = {k: v for k, v in kwargs.items() if k in available_args}
|
self.gs = gs
|
||||||
self.q_model_0 = SLD(LogisticRegression(), **sld_args)
|
self.recalib = recalib
|
||||||
self.q_model_1 = SLD(LogisticRegression(), **sld_args)
|
|
||||||
elif q_model == "CC":
|
|
||||||
self.q_model_0 = CC(LogisticRegression())
|
|
||||||
self.q_model_1 = CC(LogisticRegression())
|
|
||||||
|
|
||||||
self.e_train = None
|
self.e_train = None
|
||||||
|
|
||||||
def fit(self, train: LabelledCollection | ExtendedCollection):
|
def fit(self, train: LabelledCollection | ExtendedCollection):
|
||||||
|
@ -99,10 +134,34 @@ class BinaryQuantifierAccuracyEstimator(AccuracyEstimator):
|
||||||
self.e_train = train
|
self.e_train = train
|
||||||
|
|
||||||
self.n_classes = self.e_train.n_classes
|
self.n_classes = self.e_train.n_classes
|
||||||
[e_train_0, e_train_1] = self.e_train.split_by_pred()
|
e_trains = self.e_train.split_by_pred()
|
||||||
|
|
||||||
self.q_model_0.fit(e_train_0)
|
if self._q_model_name == "SLD":
|
||||||
self.q_model_1.fit(e_train_1)
|
fit_scores = []
|
||||||
|
for e_train in e_trains:
|
||||||
|
if self.gs:
|
||||||
|
t_train, t_val = e_train.split_stratified(0.6, random_state=0)
|
||||||
|
gs_params = self._gs_params(t_val)
|
||||||
|
q_model = GridSearchQ(
|
||||||
|
SLD(LogisticRegression()),
|
||||||
|
**gs_params,
|
||||||
|
)
|
||||||
|
q_model.fit(t_train)
|
||||||
|
fit_scores.append(q_model.best_score_)
|
||||||
|
self.q_models.append(q_model)
|
||||||
|
else:
|
||||||
|
q_model = SLD(LogisticRegression(), recalib=self.recalib)
|
||||||
|
q_model.fit(e_train)
|
||||||
|
self.q_models.append(q_model)
|
||||||
|
|
||||||
|
if self.gs:
|
||||||
|
self.fit_score = np.mean(fit_scores)
|
||||||
|
|
||||||
|
elif self._q_model_name == "CC":
|
||||||
|
for e_train in e_trains:
|
||||||
|
q_model = CC(LogisticRegression())
|
||||||
|
q_model.fit(e_train)
|
||||||
|
self.q_models.append(q_model)
|
||||||
|
|
||||||
def estimate(self, instances, ext=False):
|
def estimate(self, instances, ext=False):
|
||||||
# TODO: test
|
# TODO: test
|
||||||
|
@ -114,15 +173,13 @@ class BinaryQuantifierAccuracyEstimator(AccuracyEstimator):
|
||||||
|
|
||||||
_ncl = int(math.sqrt(self.n_classes))
|
_ncl = int(math.sqrt(self.n_classes))
|
||||||
s_inst, norms = ExtendedCollection.split_inst_by_pred(_ncl, e_inst)
|
s_inst, norms = ExtendedCollection.split_inst_by_pred(_ncl, e_inst)
|
||||||
[estim_prev_0, estim_prev_1] = [
|
estim_prevs = [
|
||||||
self._quantify_helper(inst, norm, q_model)
|
self._quantify_helper(inst, norm, q_model)
|
||||||
for (inst, norm, q_model) in zip(
|
for (inst, norm, q_model) in zip(s_inst, norms, self.q_models)
|
||||||
s_inst, norms, [self.q_model_0, self.q_model_1]
|
|
||||||
)
|
|
||||||
]
|
]
|
||||||
|
|
||||||
estim_prev = []
|
estim_prev = []
|
||||||
for prev_row in zip(estim_prev_0, estim_prev_1):
|
for prev_row in zip(*estim_prevs):
|
||||||
for prev in prev_row:
|
for prev in prev_row:
|
||||||
estim_prev.append(prev)
|
estim_prev.append(prev)
|
||||||
|
|
||||||
|
|
|
@ -1,23 +1,35 @@
|
||||||
|
from functools import wraps
|
||||||
from statistics import mean
|
from statistics import mean
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sklearn.metrics as metrics
|
import sklearn.metrics as metrics
|
||||||
from quapy.data import LabelledCollection
|
from quapy.data import LabelledCollection
|
||||||
from quapy.protocol import (
|
from quapy.protocol import AbstractStochasticSeededProtocol
|
||||||
AbstractStochasticSeededProtocol,
|
from scipy.sparse import issparse
|
||||||
OnLabelledCollectionProtocol,
|
|
||||||
)
|
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
from sklearn.model_selection import cross_validate
|
from sklearn.model_selection import cross_validate
|
||||||
|
|
||||||
import elsahar19_rca.rca as rca
|
import baselines.atc as atc
|
||||||
import garg22_ATC.ATC_helper as atc
|
import baselines.doc as doc
|
||||||
import guillory21_doc.doc as doc
|
import baselines.impweight as iw
|
||||||
import jiang18_trustscore.trustscore as trustscore
|
import baselines.rca as rcalib
|
||||||
|
|
||||||
from .report import EvaluationReport
|
from .report import EvaluationReport
|
||||||
|
|
||||||
|
_baselines = {}
|
||||||
|
|
||||||
|
|
||||||
|
def baseline(func):
|
||||||
|
@wraps(func)
|
||||||
|
def wrapper(c_model, validation, protocol):
|
||||||
|
return func(c_model, validation, protocol)
|
||||||
|
|
||||||
|
_baselines[func.__name__] = wrapper
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
@baseline
|
||||||
def kfcv(
|
def kfcv(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
|
@ -31,9 +43,6 @@ def kfcv(
|
||||||
acc_score = mean(scores["test_accuracy"])
|
acc_score = mean(scores["test_accuracy"])
|
||||||
f1_score = mean(scores["test_f1_macro"])
|
f1_score = mean(scores["test_f1_macro"])
|
||||||
|
|
||||||
# ensure that the protocol returns a LabelledCollection for each iteration
|
|
||||||
protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
|
|
||||||
|
|
||||||
report = EvaluationReport(name="kfcv")
|
report = EvaluationReport(name="kfcv")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
test_preds = c_model_predict(test.X)
|
test_preds = c_model_predict(test.X)
|
||||||
|
@ -50,12 +59,12 @@ def kfcv(
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
def reference(
|
@baseline
|
||||||
|
def ref(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
):
|
):
|
||||||
protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
|
|
||||||
c_model_predict = getattr(c_model, "predict_proba")
|
c_model_predict = getattr(c_model, "predict_proba")
|
||||||
report = EvaluationReport(name="ref")
|
report = EvaluationReport(name="ref")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
|
@ -70,6 +79,7 @@ def reference(
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
|
@baseline
|
||||||
def atc_mc(
|
def atc_mc(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
|
@ -86,9 +96,6 @@ def atc_mc(
|
||||||
val_preds = np.argmax(val_probs, axis=-1)
|
val_preds = np.argmax(val_probs, axis=-1)
|
||||||
_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
|
_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
|
||||||
|
|
||||||
# ensure that the protocol returns a LabelledCollection for each iteration
|
|
||||||
protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
|
|
||||||
|
|
||||||
report = EvaluationReport(name="atc_mc")
|
report = EvaluationReport(name="atc_mc")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
## Load OOD test data probs
|
## Load OOD test data probs
|
||||||
|
@ -110,6 +117,7 @@ def atc_mc(
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
|
@baseline
|
||||||
def atc_ne(
|
def atc_ne(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
|
@ -126,9 +134,6 @@ def atc_ne(
|
||||||
val_preds = np.argmax(val_probs, axis=-1)
|
val_preds = np.argmax(val_probs, axis=-1)
|
||||||
_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
|
_, atc_thres = atc.find_ATC_threshold(val_scores, val_labels == val_preds)
|
||||||
|
|
||||||
# ensure that the protocol returns a LabelledCollection for each iteration
|
|
||||||
protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
|
|
||||||
|
|
||||||
report = EvaluationReport(name="atc_ne")
|
report = EvaluationReport(name="atc_ne")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
## Load OOD test data probs
|
## Load OOD test data probs
|
||||||
|
@ -150,22 +155,7 @@ def atc_ne(
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
def trust_score(
|
@baseline
|
||||||
c_model: BaseEstimator,
|
|
||||||
validation: LabelledCollection,
|
|
||||||
test: LabelledCollection,
|
|
||||||
predict_method="predict",
|
|
||||||
):
|
|
||||||
c_model_predict = getattr(c_model, predict_method)
|
|
||||||
|
|
||||||
test_pred = c_model_predict(test.X)
|
|
||||||
|
|
||||||
trust_model = trustscore.TrustScore()
|
|
||||||
trust_model.fit(validation.X, validation.y)
|
|
||||||
|
|
||||||
return trust_model.get_score(test.X, test_pred)
|
|
||||||
|
|
||||||
|
|
||||||
def doc_feat(
|
def doc_feat(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
|
@ -179,9 +169,6 @@ def doc_feat(
|
||||||
val_preds = np.argmax(val_probs, axis=-1)
|
val_preds = np.argmax(val_probs, axis=-1)
|
||||||
v1acc = np.mean(val_preds == val_labels) * 100
|
v1acc = np.mean(val_preds == val_labels) * 100
|
||||||
|
|
||||||
# ensure that the protocol returns a LabelledCollection for each iteration
|
|
||||||
protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
|
|
||||||
|
|
||||||
report = EvaluationReport(name="doc_feat")
|
report = EvaluationReport(name="doc_feat")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
test_probs = c_model_predict(test.X)
|
test_probs = c_model_predict(test.X)
|
||||||
|
@ -194,26 +181,25 @@ def doc_feat(
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
def rca_score(
|
@baseline
|
||||||
|
def rca(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
predict_method="predict",
|
predict_method="predict",
|
||||||
):
|
):
|
||||||
|
"""elsahar19"""
|
||||||
c_model_predict = getattr(c_model, predict_method)
|
c_model_predict = getattr(c_model, predict_method)
|
||||||
val_pred1 = c_model_predict(validation.X)
|
val_pred1 = c_model_predict(validation.X)
|
||||||
|
|
||||||
# ensure that the protocol returns a LabelledCollection for each iteration
|
|
||||||
protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
|
|
||||||
|
|
||||||
report = EvaluationReport(name="rca")
|
report = EvaluationReport(name="rca")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
try:
|
try:
|
||||||
test_pred = c_model_predict(test.X)
|
test_pred = c_model_predict(test.X)
|
||||||
c_model2 = rca.clone_fit(c_model, test.X, test_pred)
|
c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
|
||||||
c_model2_predict = getattr(c_model2, predict_method)
|
c_model2_predict = getattr(c_model2, predict_method)
|
||||||
val_pred2 = c_model2_predict(validation.X)
|
val_pred2 = c_model2_predict(validation.X)
|
||||||
rca_score = 1.0 - rca.get_score(val_pred1, val_pred2, validation.y)
|
rca_score = 1.0 - rcalib.get_score(val_pred1, val_pred2, validation.y)
|
||||||
meta_score = abs(rca_score - metrics.accuracy_score(test.y, test_pred))
|
meta_score = abs(rca_score - metrics.accuracy_score(test.y, test_pred))
|
||||||
report.append_row(test.prevalence(), acc=meta_score, acc_score=rca_score)
|
report.append_row(test.prevalence(), acc=meta_score, acc_score=rca_score)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
@ -224,32 +210,33 @@ def rca_score(
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
def rca_star_score(
|
@baseline
|
||||||
|
def rca_star(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
validation: LabelledCollection,
|
validation: LabelledCollection,
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
predict_method="predict",
|
predict_method="predict",
|
||||||
):
|
):
|
||||||
|
"""elsahar19"""
|
||||||
c_model_predict = getattr(c_model, predict_method)
|
c_model_predict = getattr(c_model, predict_method)
|
||||||
validation1, validation2 = validation.split_stratified(
|
validation1, validation2 = validation.split_stratified(
|
||||||
train_prop=0.5, random_state=0
|
train_prop=0.5, random_state=0
|
||||||
)
|
)
|
||||||
val1_pred = c_model_predict(validation1.X)
|
val1_pred = c_model_predict(validation1.X)
|
||||||
c_model1 = rca.clone_fit(c_model, validation1.X, val1_pred)
|
c_model1 = rcalib.clone_fit(c_model, validation1.X, val1_pred)
|
||||||
c_model1_predict = getattr(c_model1, predict_method)
|
c_model1_predict = getattr(c_model1, predict_method)
|
||||||
val2_pred1 = c_model1_predict(validation2.X)
|
val2_pred1 = c_model1_predict(validation2.X)
|
||||||
|
|
||||||
# ensure that the protocol returns a LabelledCollection for each iteration
|
|
||||||
protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
|
|
||||||
|
|
||||||
report = EvaluationReport(name="rca_star")
|
report = EvaluationReport(name="rca_star")
|
||||||
for test in protocol():
|
for test in protocol():
|
||||||
try:
|
try:
|
||||||
test_pred = c_model_predict(test.X)
|
test_pred = c_model_predict(test.X)
|
||||||
c_model2 = rca.clone_fit(c_model, test.X, test_pred)
|
c_model2 = rcalib.clone_fit(c_model, test.X, test_pred)
|
||||||
c_model2_predict = getattr(c_model2, predict_method)
|
c_model2_predict = getattr(c_model2, predict_method)
|
||||||
val2_pred2 = c_model2_predict(validation2.X)
|
val2_pred2 = c_model2_predict(validation2.X)
|
||||||
rca_star_score = 1.0 - rca.get_score(val2_pred1, val2_pred2, validation2.y)
|
rca_star_score = 1.0 - rcalib.get_score(
|
||||||
|
val2_pred1, val2_pred2, validation2.y
|
||||||
|
)
|
||||||
meta_score = abs(rca_star_score - metrics.accuracy_score(test.y, test_pred))
|
meta_score = abs(rca_star_score - metrics.accuracy_score(test.y, test_pred))
|
||||||
report.append_row(
|
report.append_row(
|
||||||
test.prevalence(), acc=meta_score, acc_score=rca_star_score
|
test.prevalence(), acc=meta_score, acc_score=rca_star_score
|
||||||
|
@ -260,3 +247,52 @@ def rca_star_score(
|
||||||
)
|
)
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
|
@baseline
|
||||||
|
def logreg(
|
||||||
|
c_model: BaseEstimator,
|
||||||
|
validation: LabelledCollection,
|
||||||
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
|
predict_method="predict",
|
||||||
|
):
|
||||||
|
c_model_predict = getattr(c_model, predict_method)
|
||||||
|
|
||||||
|
val_preds = c_model_predict(validation.X)
|
||||||
|
|
||||||
|
report = EvaluationReport(name="logreg")
|
||||||
|
for test in protocol():
|
||||||
|
wx = iw.logreg(validation.X, validation.y, test.X)
|
||||||
|
test_preds = c_model_predict(test.X)
|
||||||
|
estim_acc = iw.get_acc(val_preds, validation.y, wx)
|
||||||
|
true_acc = metrics.accuracy_score(test.y, test_preds)
|
||||||
|
meta_score = abs(estim_acc - true_acc)
|
||||||
|
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
||||||
|
|
||||||
|
@baseline
|
||||||
|
def kdex2(
|
||||||
|
c_model: BaseEstimator,
|
||||||
|
validation: LabelledCollection,
|
||||||
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
|
predict_method="predict",
|
||||||
|
):
|
||||||
|
c_model_predict = getattr(c_model, predict_method)
|
||||||
|
|
||||||
|
val_preds = c_model_predict(validation.X)
|
||||||
|
log_likelihood_val = iw.kdex2_lltr(validation.X)
|
||||||
|
Xval = validation.X.toarray() if issparse(validation.X) else validation.X
|
||||||
|
|
||||||
|
report = EvaluationReport(name="kdex2")
|
||||||
|
for test in protocol():
|
||||||
|
Xte = test.X.toarray() if issparse(test.X) else test.X
|
||||||
|
wx = iw.kdex2_weights(Xval, Xte, log_likelihood_val)
|
||||||
|
test_preds = c_model_predict(Xte)
|
||||||
|
estim_acc = iw.get_acc(val_preds, validation.y, wx)
|
||||||
|
true_acc = metrics.accuracy_score(test.y, test_preds)
|
||||||
|
meta_score = abs(estim_acc - true_acc)
|
||||||
|
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
|
@ -1,17 +1,18 @@
|
||||||
|
import logging as log
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import time
|
import time
|
||||||
import traceback
|
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from quapy.protocol import APP
|
from quapy.protocol import APP
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
from quacc.dataset import Dataset
|
from quacc.dataset import Dataset
|
||||||
from quacc.environ import env
|
from quacc.environment import env
|
||||||
from quacc.evaluation import baseline, method
|
from quacc.evaluation import baseline, method
|
||||||
from quacc.evaluation.report import DatasetReport, EvaluationReport
|
from quacc.evaluation.report import CompReport, DatasetReport, EvaluationReport
|
||||||
|
|
||||||
qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
|
qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
|
||||||
|
|
||||||
|
@ -19,27 +20,7 @@ pd.set_option("display.float_format", "{:.4f}".format)
|
||||||
|
|
||||||
|
|
||||||
class CompEstimator:
|
class CompEstimator:
|
||||||
__dict = {
|
__dict = method._methods | baseline._baselines
|
||||||
"our_bin_SLD": method.evaluate_bin_sld,
|
|
||||||
"our_mul_SLD": method.evaluate_mul_sld,
|
|
||||||
"our_bin_SLD_nbvs": method.evaluate_bin_sld_nbvs,
|
|
||||||
"our_mul_SLD_nbvs": method.evaluate_mul_sld_nbvs,
|
|
||||||
"our_bin_SLD_bcts": method.evaluate_bin_sld_bcts,
|
|
||||||
"our_mul_SLD_bcts": method.evaluate_mul_sld_bcts,
|
|
||||||
"our_bin_SLD_ts": method.evaluate_bin_sld_ts,
|
|
||||||
"our_mul_SLD_ts": method.evaluate_mul_sld_ts,
|
|
||||||
"our_bin_SLD_vs": method.evaluate_bin_sld_vs,
|
|
||||||
"our_mul_SLD_vs": method.evaluate_mul_sld_vs,
|
|
||||||
"our_bin_CC": method.evaluate_bin_cc,
|
|
||||||
"our_mul_CC": method.evaluate_mul_cc,
|
|
||||||
"ref": baseline.reference,
|
|
||||||
"kfcv": baseline.kfcv,
|
|
||||||
"atc_mc": baseline.atc_mc,
|
|
||||||
"atc_ne": baseline.atc_ne,
|
|
||||||
"doc_feat": baseline.doc_feat,
|
|
||||||
"rca": baseline.rca_score,
|
|
||||||
"rca_star": baseline.rca_star_score,
|
|
||||||
}
|
|
||||||
|
|
||||||
def __class_getitem__(cls, e: str | List[str]):
|
def __class_getitem__(cls, e: str | List[str]):
|
||||||
if isinstance(e, str):
|
if isinstance(e, str):
|
||||||
|
@ -48,30 +29,34 @@ class CompEstimator:
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise KeyError(f"Invalid estimator: estimator {e} does not exist")
|
raise KeyError(f"Invalid estimator: estimator {e} does not exist")
|
||||||
elif isinstance(e, list):
|
elif isinstance(e, list):
|
||||||
try:
|
_subtr = [k for k in e if k not in cls.__dict]
|
||||||
return [cls.__dict[est] for est in e]
|
if len(_subtr) > 0:
|
||||||
except KeyError as ke:
|
|
||||||
raise KeyError(
|
raise KeyError(
|
||||||
f"Invalid estimator: estimator {ke.args[0]} does not exist"
|
f"Invalid estimator: estimator {_subtr[0]} does not exist"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
return [fun for k, fun in cls.__dict.items() if k in e]
|
||||||
|
|
||||||
|
|
||||||
CE = CompEstimator
|
CE = CompEstimator
|
||||||
|
|
||||||
|
|
||||||
def fit_and_estimate(_estimate, train, validation, test):
|
def fit_and_estimate(_estimate, train, validation, test, _env=None):
|
||||||
|
_env = env if _env is None else _env
|
||||||
model = LogisticRegression()
|
model = LogisticRegression()
|
||||||
|
|
||||||
model.fit(*train.Xy)
|
model.fit(*train.Xy)
|
||||||
protocol = APP(
|
protocol = APP(
|
||||||
test, n_prevalences=env.PROTOCOL_N_PREVS, repeats=env.PROTOCOL_REPEATS
|
test,
|
||||||
|
n_prevalences=_env.PROTOCOL_N_PREVS,
|
||||||
|
repeats=_env.PROTOCOL_REPEATS,
|
||||||
|
return_type="labelled_collection",
|
||||||
)
|
)
|
||||||
start = time.time()
|
start = time.time()
|
||||||
try:
|
try:
|
||||||
result = _estimate(model, validation, protocol)
|
result = _estimate(model, validation, protocol)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Method {_estimate.__name__} failed.")
|
log.error(f"Method {_estimate.__name__} failed. Exception: {e}")
|
||||||
traceback(e)
|
|
||||||
return {
|
return {
|
||||||
"name": _estimate.__name__,
|
"name": _estimate.__name__,
|
||||||
"result": None,
|
"result": None,
|
||||||
|
@ -79,7 +64,7 @@ def fit_and_estimate(_estimate, train, validation, test):
|
||||||
}
|
}
|
||||||
|
|
||||||
end = time.time()
|
end = time.time()
|
||||||
print(f"{_estimate.__name__}: {end-start:.2f}s")
|
log.info(f"{_estimate.__name__} finished [took {end-start:.4f}s]")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"name": _estimate.__name__,
|
"name": _estimate.__name__,
|
||||||
|
@ -91,13 +76,17 @@ def fit_and_estimate(_estimate, train, validation, test):
|
||||||
def evaluate_comparison(
|
def evaluate_comparison(
|
||||||
dataset: Dataset, estimators=["OUR_BIN_SLD", "OUR_MUL_SLD"]
|
dataset: Dataset, estimators=["OUR_BIN_SLD", "OUR_MUL_SLD"]
|
||||||
) -> EvaluationReport:
|
) -> EvaluationReport:
|
||||||
|
# with multiprocessing.Pool(1) as pool:
|
||||||
with multiprocessing.Pool(len(estimators)) as pool:
|
with multiprocessing.Pool(len(estimators)) as pool:
|
||||||
dr = DatasetReport(dataset.name)
|
dr = DatasetReport(dataset.name)
|
||||||
|
log.info(f"dataset {dataset.name}")
|
||||||
for d in dataset():
|
for d in dataset():
|
||||||
print(f"train prev.: {d.train_prev}")
|
log.info(f"train prev.: {np.around(d.train_prev, decimals=2)}")
|
||||||
start = time.time()
|
tstart = time.time()
|
||||||
tasks = [(estim, d.train, d.validation, d.test) for estim in CE[estimators]]
|
tasks = [(estim, d.train, d.validation, d.test) for estim in CE[estimators]]
|
||||||
results = [pool.apply_async(fit_and_estimate, t) for t in tasks]
|
results = [
|
||||||
|
pool.apply_async(fit_and_estimate, t, {"_env": env}) for t in tasks
|
||||||
|
]
|
||||||
|
|
||||||
results_got = []
|
results_got = []
|
||||||
for _r in results:
|
for _r in results:
|
||||||
|
@ -106,19 +95,22 @@ def evaluate_comparison(
|
||||||
if r["result"] is not None:
|
if r["result"] is not None:
|
||||||
results_got.append(r)
|
results_got.append(r)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
log.error(
|
||||||
|
f"Dataset sample {d.train[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
er = EvaluationReport.combine_reports(
|
tend = time.time()
|
||||||
*[r["result"] for r in results_got],
|
times = {r["name"]: r["time"] for r in results_got}
|
||||||
|
times["tot"] = tend - tstart
|
||||||
|
log.info(
|
||||||
|
f"Dataset sample {d.train[1]:.2f} of dataset {dataset.name} finished [took {times['tot']:.4f}s"
|
||||||
|
)
|
||||||
|
dr += CompReport(
|
||||||
|
[r["result"] for r in results_got],
|
||||||
name=dataset.name,
|
name=dataset.name,
|
||||||
train_prev=d.train_prev,
|
train_prev=d.train_prev,
|
||||||
valid_prev=d.validation_prev,
|
valid_prev=d.validation_prev,
|
||||||
|
times=times,
|
||||||
)
|
)
|
||||||
times = {r["name"]: r["time"] for r in results_got}
|
|
||||||
end = time.time()
|
|
||||||
times["tot"] = end - start
|
|
||||||
er.times = times
|
|
||||||
dr.add(er)
|
|
||||||
print()
|
|
||||||
|
|
||||||
return dr
|
return dr
|
||||||
|
|
|
@ -1,10 +1,9 @@
|
||||||
|
from functools import wraps
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sklearn.metrics as metrics
|
import sklearn.metrics as metrics
|
||||||
from quapy.data import LabelledCollection
|
from quapy.data import LabelledCollection
|
||||||
from quapy.protocol import (
|
from quapy.protocol import AbstractStochasticSeededProtocol
|
||||||
AbstractStochasticSeededProtocol,
|
|
||||||
OnLabelledCollectionProtocol,
|
|
||||||
)
|
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
|
|
||||||
import quacc.error as error
|
import quacc.error as error
|
||||||
|
@ -16,14 +15,23 @@ from ..estimator import (
|
||||||
MulticlassAccuracyEstimator,
|
MulticlassAccuracyEstimator,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
_methods = {}
|
||||||
|
|
||||||
|
|
||||||
|
def method(func):
|
||||||
|
@wraps(func)
|
||||||
|
def wrapper(c_model, validation, protocol):
|
||||||
|
return func(c_model, validation, protocol)
|
||||||
|
|
||||||
|
_methods[func.__name__] = wrapper
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
def estimate(
|
def estimate(
|
||||||
estimator: AccuracyEstimator,
|
estimator: AccuracyEstimator,
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
):
|
):
|
||||||
# ensure that the protocol returns a LabelledCollection for each iteration
|
|
||||||
protocol.collator = OnLabelledCollectionProtocol.get_collator("labelled_collection")
|
|
||||||
|
|
||||||
base_prevs, true_prevs, estim_prevs, pred_probas, labels = [], [], [], [], []
|
base_prevs, true_prevs, estim_prevs, pred_probas, labels = [], [], [], [], []
|
||||||
for sample in protocol():
|
for sample in protocol():
|
||||||
e_sample, pred_proba = estimator.extend(sample)
|
e_sample, pred_proba = estimator.extend(sample)
|
||||||
|
@ -61,6 +69,8 @@ def evaluation_report(
|
||||||
f1=abs(error.f1(true_prev) - f1_score),
|
f1=abs(error.f1(true_prev) - f1_score),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
report.fit_score = estimator.fit_score
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
|
@ -75,105 +85,51 @@ def evaluate(
|
||||||
estimator: AccuracyEstimator = {
|
estimator: AccuracyEstimator = {
|
||||||
"bin": BinaryQuantifierAccuracyEstimator,
|
"bin": BinaryQuantifierAccuracyEstimator,
|
||||||
"mul": MulticlassAccuracyEstimator,
|
"mul": MulticlassAccuracyEstimator,
|
||||||
}[method](c_model, q_model=q_model, **kwargs)
|
}[method](c_model, q_model=q_model.upper(), **kwargs)
|
||||||
estimator.fit(validation)
|
estimator.fit(validation)
|
||||||
_method = f"{method}_{q_model}"
|
_method = f"{method}_{q_model}"
|
||||||
for k, v in kwargs.items():
|
if "recalib" in kwargs:
|
||||||
_method += f"_{v}"
|
_method += f"_{kwargs['recalib']}"
|
||||||
|
if ("gs", True) in kwargs.items():
|
||||||
|
_method += "_gs"
|
||||||
return evaluation_report(estimator, protocol, _method)
|
return evaluation_report(estimator, protocol, _method)
|
||||||
|
|
||||||
|
|
||||||
def evaluate_bin_sld(
|
@method
|
||||||
c_model: BaseEstimator,
|
def bin_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||||
validation: LabelledCollection,
|
return evaluate(c_model, validation, protocol, "bin", "sld")
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
|
||||||
) -> EvaluationReport:
|
|
||||||
return evaluate(c_model, validation, protocol, "bin", "SLD")
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_mul_sld(
|
@method
|
||||||
c_model: BaseEstimator,
|
def mul_sld(c_model, validation, protocol) -> EvaluationReport:
|
||||||
validation: LabelledCollection,
|
return evaluate(c_model, validation, protocol, "mul", "sld")
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
|
||||||
) -> EvaluationReport:
|
|
||||||
return evaluate(c_model, validation, protocol, "mul", "SLD")
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_bin_sld_nbvs(
|
@method
|
||||||
c_model: BaseEstimator,
|
def bin_sld_bcts(c_model, validation, protocol) -> EvaluationReport:
|
||||||
validation: LabelledCollection,
|
return evaluate(c_model, validation, protocol, "bin", "sld", recalib="bcts")
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
|
||||||
) -> EvaluationReport:
|
|
||||||
return evaluate(c_model, validation, protocol, "bin", "SLD", recalib="nbvs")
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_mul_sld_nbvs(
|
@method
|
||||||
c_model: BaseEstimator,
|
def mul_sld_bcts(c_model, validation, protocol) -> EvaluationReport:
|
||||||
validation: LabelledCollection,
|
return evaluate(c_model, validation, protocol, "mul", "sld", recalib="bcts")
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
|
||||||
) -> EvaluationReport:
|
|
||||||
return evaluate(c_model, validation, protocol, "mul", "SLD", recalib="nbvs")
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_bin_sld_bcts(
|
@method
|
||||||
c_model: BaseEstimator,
|
def bin_sld_gs(c_model, validation, protocol) -> EvaluationReport:
|
||||||
validation: LabelledCollection,
|
return evaluate(c_model, validation, protocol, "bin", "sld", gs=True)
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
|
||||||
) -> EvaluationReport:
|
|
||||||
return evaluate(c_model, validation, protocol, "bin", "SLD", recalib="bcts")
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_mul_sld_bcts(
|
@method
|
||||||
c_model: BaseEstimator,
|
def mul_sld_gs(c_model, validation, protocol) -> EvaluationReport:
|
||||||
validation: LabelledCollection,
|
return evaluate(c_model, validation, protocol, "mul", "sld", gs=True)
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
|
||||||
) -> EvaluationReport:
|
|
||||||
return evaluate(c_model, validation, protocol, "mul", "SLD", recalib="bcts")
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_bin_sld_ts(
|
@method
|
||||||
c_model: BaseEstimator,
|
def bin_cc(c_model, validation, protocol) -> EvaluationReport:
|
||||||
validation: LabelledCollection,
|
return evaluate(c_model, validation, protocol, "bin", "cc")
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
|
||||||
) -> EvaluationReport:
|
|
||||||
return evaluate(c_model, validation, protocol, "bin", "SLD", recalib="ts")
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_mul_sld_ts(
|
@method
|
||||||
c_model: BaseEstimator,
|
def mul_cc(c_model, validation, protocol) -> EvaluationReport:
|
||||||
validation: LabelledCollection,
|
return evaluate(c_model, validation, protocol, "mul", "cc")
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
|
||||||
) -> EvaluationReport:
|
|
||||||
return evaluate(c_model, validation, protocol, "mul", "SLD", recalib="ts")
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_bin_sld_vs(
|
|
||||||
c_model: BaseEstimator,
|
|
||||||
validation: LabelledCollection,
|
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
|
||||||
) -> EvaluationReport:
|
|
||||||
return evaluate(c_model, validation, protocol, "bin", "SLD", recalib="vs")
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_mul_sld_vs(
|
|
||||||
c_model: BaseEstimator,
|
|
||||||
validation: LabelledCollection,
|
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
|
||||||
) -> EvaluationReport:
|
|
||||||
return evaluate(c_model, validation, protocol, "mul", "SLD", recalib="vs")
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_bin_cc(
|
|
||||||
c_model: BaseEstimator,
|
|
||||||
validation: LabelledCollection,
|
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
|
||||||
) -> EvaluationReport:
|
|
||||||
return evaluate(c_model, validation, protocol, "bin", "CC")
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_mul_cc(
|
|
||||||
c_model: BaseEstimator,
|
|
||||||
validation: LabelledCollection,
|
|
||||||
protocol: AbstractStochasticSeededProtocol,
|
|
||||||
) -> EvaluationReport:
|
|
||||||
return evaluate(c_model, validation, protocol, "mul", "CC")
|
|
||||||
|
|
|
@ -5,7 +5,7 @@ import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from quacc import plot
|
from quacc import plot
|
||||||
from quacc.environ import env
|
from quacc.environment import env
|
||||||
from quacc.utils import fmt_line_md
|
from quacc.utils import fmt_line_md
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,191 +13,399 @@ class EvaluationReport:
|
||||||
def __init__(self, name=None):
|
def __init__(self, name=None):
|
||||||
self._prevs = []
|
self._prevs = []
|
||||||
self._dict = {}
|
self._dict = {}
|
||||||
self._g_prevs = None
|
self.fit_score = None
|
||||||
self._g_dict = None
|
|
||||||
self.name = name if name is not None else "default"
|
self.name = name if name is not None else "default"
|
||||||
self.times = {}
|
|
||||||
self.train_prev = None
|
|
||||||
self.valid_prev = None
|
|
||||||
self.target = "default"
|
|
||||||
|
|
||||||
def append_row(self, base: np.ndarray | Tuple, **row):
|
def append_row(self, basep: np.ndarray | Tuple, **row):
|
||||||
if isinstance(base, np.ndarray):
|
bp = basep[1]
|
||||||
base = tuple(base.tolist())
|
self._prevs.append(bp)
|
||||||
self._prevs.append(base)
|
|
||||||
for k, v in row.items():
|
for k, v in row.items():
|
||||||
if (k, self.name) in self._dict:
|
if k not in self._dict:
|
||||||
self._dict[(k, self.name)].append(v)
|
self._dict[k] = {}
|
||||||
else:
|
if bp not in self._dict[k]:
|
||||||
self._dict[(k, self.name)] = [v]
|
self._dict[k][bp] = []
|
||||||
self._g_prevs = None
|
self._dict[k][bp] = np.append(self._dict[k][bp], [v])
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def columns(self):
|
def columns(self):
|
||||||
return self._dict.keys()
|
return self._dict.keys()
|
||||||
|
|
||||||
def group_by_prevs(self, metric: str = None):
|
@property
|
||||||
if self._g_dict is None:
|
def prevs(self):
|
||||||
self._g_prevs = []
|
return np.sort(np.unique([list(self._dict[_k].keys()) for _k in self._dict]))
|
||||||
self._g_dict = {k: [] for k in self._dict.keys()}
|
|
||||||
|
|
||||||
for col, vals in self._dict.items():
|
# def group_by_prevs(self, metric: str = None, estimators: List[str] = None):
|
||||||
col_grouped = {}
|
# if self._g_dict is None:
|
||||||
for bp, v in zip(self._prevs, vals):
|
# self._g_prevs = []
|
||||||
if bp not in col_grouped:
|
# self._g_dict = {k: [] for k in self._dict.keys()}
|
||||||
col_grouped[bp] = []
|
|
||||||
col_grouped[bp].append(v)
|
|
||||||
|
|
||||||
self._g_dict[col] = [
|
# for col, vals in self._dict.items():
|
||||||
vs
|
# col_grouped = {}
|
||||||
for bp, vs in sorted(col_grouped.items(), key=lambda cg: cg[0][1])
|
# for bp, v in zip(self._prevs, vals):
|
||||||
]
|
# if bp not in col_grouped:
|
||||||
|
# col_grouped[bp] = []
|
||||||
|
# col_grouped[bp].append(v)
|
||||||
|
|
||||||
self._g_prevs = sorted(
|
# self._g_dict[col] = [
|
||||||
[(p0, p1) for [p0, p1] in np.unique(self._prevs, axis=0).tolist()],
|
# vs
|
||||||
key=lambda bp: bp[1],
|
# for bp, vs in sorted(col_grouped.items(), key=lambda cg: cg[0][1])
|
||||||
|
# ]
|
||||||
|
|
||||||
|
# self._g_prevs = sorted(
|
||||||
|
# [(p0, p1) for [p0, p1] in np.unique(self._prevs, axis=0).tolist()],
|
||||||
|
# key=lambda bp: bp[1],
|
||||||
|
# )
|
||||||
|
|
||||||
|
# fg_dict = _filter_dict(self._g_dict, metric, estimators)
|
||||||
|
# return self._g_prevs, fg_dict
|
||||||
|
|
||||||
|
# def merge(self, other):
|
||||||
|
# if not all(v1 == v2 for v1, v2 in zip(self._prevs, other._prevs)):
|
||||||
|
# raise ValueError("other has not same base prevalences of self")
|
||||||
|
|
||||||
|
# inters_keys = set(self._dict.keys()).intersection(set(other._dict.keys()))
|
||||||
|
# if len(inters_keys) > 0:
|
||||||
|
# raise ValueError(f"self and other have matching keys {str(inters_keys)}.")
|
||||||
|
|
||||||
|
# report = EvaluationReport()
|
||||||
|
# report._prevs = self._prevs
|
||||||
|
# report._dict = self._dict | other._dict
|
||||||
|
# return report
|
||||||
|
|
||||||
|
|
||||||
|
class CompReport:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
reports: List[EvaluationReport],
|
||||||
|
name="default",
|
||||||
|
train_prev=None,
|
||||||
|
valid_prev=None,
|
||||||
|
times=None,
|
||||||
|
):
|
||||||
|
all_prevs = np.array([er.prevs for er in reports])
|
||||||
|
if not np.all(all_prevs == all_prevs[0, :], axis=0).all():
|
||||||
|
raise ValueError(
|
||||||
|
"Not all evaluation reports have the same base prevalences"
|
||||||
|
)
|
||||||
|
uq_names, name_c = np.unique([er.name for er in reports], return_counts=True)
|
||||||
|
if np.sum(name_c) > uq_names.shape[0]:
|
||||||
|
_matching = uq_names[[c > 1 for c in name_c]]
|
||||||
|
raise ValueError(
|
||||||
|
f"Evaluation reports have matching names: {_matching.tolist()}."
|
||||||
)
|
)
|
||||||
|
|
||||||
# last_end = 0
|
all_dicts = [{(k, er.name): v for k, v in er._dict.items()} for er in reports]
|
||||||
# for ind, bp in enumerate(self._prevs):
|
self._dict = {}
|
||||||
# if ind < (len(self._prevs) - 1) and bp == self._prevs[ind + 1]:
|
for d in all_dicts:
|
||||||
# continue
|
self._dict = self._dict | d
|
||||||
|
|
||||||
# self._g_prevs.append(bp)
|
self.fit_scores = {
|
||||||
# for col in self._dict.keys():
|
er.name: er.fit_score for er in reports if er.fit_score is not None
|
||||||
# self._g_dict[col].append(
|
}
|
||||||
# stats.mean(self._dict[col][last_end : ind + 1])
|
self.train_prev = train_prev
|
||||||
# )
|
self.valid_prev = valid_prev
|
||||||
|
self.times = times
|
||||||
|
|
||||||
# last_end = ind + 1
|
@property
|
||||||
|
def prevs(self):
|
||||||
|
return np.sort(np.unique([list(self._dict[_k].keys()) for _k in self._dict]))
|
||||||
|
|
||||||
filtered_g_dict = self._g_dict
|
@property
|
||||||
|
def cprevs(self):
|
||||||
|
return np.around([(1.0 - p, p) for p in self.prevs], decimals=2)
|
||||||
|
|
||||||
|
def data(self, metric: str = None, estimators: List[str] = None) -> dict:
|
||||||
|
f_dict = self._dict.copy()
|
||||||
if metric is not None:
|
if metric is not None:
|
||||||
filtered_g_dict = {
|
f_dict = {(c0, c1): ls for ((c0, c1), ls) in f_dict.items() if c0 == metric}
|
||||||
c1: ls for ((c0, c1), ls) in self._g_dict.items() if c0 == metric
|
if estimators is not None:
|
||||||
|
f_dict = {
|
||||||
|
(c0, c1): ls for ((c0, c1), ls) in f_dict.items() if c1 in estimators
|
||||||
}
|
}
|
||||||
|
if (metric, estimators) != (None, None):
|
||||||
|
f_dict = {c1: ls for ((c0, c1), ls) in f_dict.items()}
|
||||||
|
|
||||||
return self._g_prevs, filtered_g_dict
|
return f_dict
|
||||||
|
|
||||||
def avg_by_prevs(self, metric: str = None):
|
def group_by_shift(self, metric: str = None, estimators: List[str] = None):
|
||||||
g_prevs, g_dict = self.group_by_prevs(metric=metric)
|
f_dict = self.data(metric=metric, estimators=estimators)
|
||||||
|
shift_prevs = np.around(
|
||||||
a_dict = {}
|
np.absolute(self.prevs - self.train_prev[1]), decimals=2
|
||||||
for col, vals in g_dict.items():
|
)
|
||||||
a_dict[col] = [np.mean(vs) for vs in vals]
|
shift_dict = {col: {sp: [] for sp in shift_prevs} for col in f_dict.keys()}
|
||||||
|
|
||||||
return g_prevs, a_dict
|
|
||||||
|
|
||||||
def avg_all(self, metric: str = None):
|
|
||||||
f_dict = self._dict
|
|
||||||
if metric is not None:
|
|
||||||
f_dict = {c1: ls for ((c0, c1), ls) in self._dict.items() if c0 == metric}
|
|
||||||
|
|
||||||
a_dict = {}
|
|
||||||
for col, vals in f_dict.items():
|
for col, vals in f_dict.items():
|
||||||
a_dict[col] = [np.mean(vals)]
|
for sp, bp in zip(shift_prevs, self.prevs):
|
||||||
|
shift_dict[col][sp] = np.concatenate(
|
||||||
|
[shift_dict[col][sp], f_dict[col][bp]]
|
||||||
|
)
|
||||||
|
|
||||||
return a_dict
|
return np.sort(np.unique(shift_prevs)), shift_dict
|
||||||
|
|
||||||
def get_dataframe(self, metric="acc"):
|
def avg_by_prevs(self, metric: str = None, estimators: List[str] = None):
|
||||||
g_prevs, g_dict = self.avg_by_prevs(metric=metric)
|
f_dict = self.data(metric=metric, estimators=estimators)
|
||||||
a_dict = self.avg_all(metric=metric)
|
return {
|
||||||
for col in g_dict.keys():
|
col: np.array([np.mean(vals[bp]) for bp in self.prevs])
|
||||||
g_dict[col].extend(a_dict[col])
|
for col, vals in f_dict.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
def stdev_by_prevs(self, metric: str = None, estimators: List[str] = None):
|
||||||
|
f_dict = self.data(metric=metric, estimators=estimators)
|
||||||
|
return {
|
||||||
|
col: np.array([np.std(vals[bp]) for bp in self.prevs])
|
||||||
|
for col, vals in f_dict.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
def avg_all(self, metric: str = None, estimators: List[str] = None):
|
||||||
|
f_dict = self.data(metric=metric, estimators=estimators)
|
||||||
|
return {
|
||||||
|
col: [np.mean(np.concatenate(list(vals.values())))]
|
||||||
|
for col, vals in f_dict.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_dataframe(self, metric="acc", estimators=None):
|
||||||
|
avg_dict = self.avg_by_prevs(metric=metric, estimators=estimators)
|
||||||
|
all_dict = self.avg_all(metric=metric, estimators=estimators)
|
||||||
|
for col in avg_dict.keys():
|
||||||
|
avg_dict[col] = np.append(avg_dict[col], all_dict[col])
|
||||||
return pd.DataFrame(
|
return pd.DataFrame(
|
||||||
g_dict,
|
avg_dict,
|
||||||
index=g_prevs + ["tot"],
|
index=self.prevs.tolist() + ["tot"],
|
||||||
columns=g_dict.keys(),
|
columns=avg_dict.keys(),
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_plot(self, mode="delta", metric="acc") -> Path:
|
def get_plots(
|
||||||
if mode == "delta":
|
self,
|
||||||
g_prevs, g_dict = self.group_by_prevs(metric=metric)
|
modes=["delta", "diagonal", "shift"],
|
||||||
return plot.plot_delta(
|
metric="acc",
|
||||||
g_prevs,
|
estimators=None,
|
||||||
g_dict,
|
conf="default",
|
||||||
metric=metric,
|
stdev=False,
|
||||||
name=self.name,
|
) -> Path:
|
||||||
train_prev=self.train_prev,
|
pps = []
|
||||||
)
|
for mode in modes:
|
||||||
elif mode == "diagonal":
|
pp = []
|
||||||
_, g_dict = self.avg_by_prevs(metric=metric + "_score")
|
if mode == "delta":
|
||||||
f_dict = {k: v for k, v in g_dict.items() if k != "ref"}
|
f_dict = self.avg_by_prevs(metric=metric, estimators=estimators)
|
||||||
referece = g_dict["ref"]
|
_pp0 = plot.plot_delta(
|
||||||
return plot.plot_diagonal(
|
self.cprevs,
|
||||||
referece,
|
f_dict,
|
||||||
f_dict,
|
metric=metric,
|
||||||
metric=metric,
|
name=conf,
|
||||||
name=self.name,
|
train_prev=self.train_prev,
|
||||||
train_prev=self.train_prev,
|
fit_scores=self.fit_scores,
|
||||||
)
|
)
|
||||||
elif mode == "shift":
|
pp = [(mode, _pp0)]
|
||||||
g_prevs, g_dict = self.avg_by_prevs(metric=metric)
|
if stdev:
|
||||||
return plot.plot_shift(
|
fs_dict = self.stdev_by_prevs(metric=metric, estimators=estimators)
|
||||||
g_prevs,
|
_pp1 = plot.plot_delta(
|
||||||
g_dict,
|
self.cprevs,
|
||||||
metric=metric,
|
f_dict,
|
||||||
name=self.name,
|
metric=metric,
|
||||||
train_prev=self.train_prev,
|
name=conf,
|
||||||
)
|
train_prev=self.train_prev,
|
||||||
|
fit_scores=self.fit_scores,
|
||||||
|
stdevs=fs_dict,
|
||||||
|
)
|
||||||
|
pp.append((f"{mode}_stdev", _pp1))
|
||||||
|
elif mode == "diagonal":
|
||||||
|
f_dict = {
|
||||||
|
col: np.concatenate([vals[bp] for bp in self.prevs])
|
||||||
|
for col, vals in self.data(
|
||||||
|
metric=metric + "_score", estimators=estimators
|
||||||
|
).items()
|
||||||
|
}
|
||||||
|
reference = f_dict["ref"]
|
||||||
|
f_dict = {k: v for k, v in f_dict.items() if k != "ref"}
|
||||||
|
_pp0 = plot.plot_diagonal(
|
||||||
|
reference,
|
||||||
|
f_dict,
|
||||||
|
metric=metric,
|
||||||
|
name=conf,
|
||||||
|
train_prev=self.train_prev,
|
||||||
|
)
|
||||||
|
pp = [(mode, _pp0)]
|
||||||
|
|
||||||
def to_md(self, *metrics):
|
elif mode == "shift":
|
||||||
res = ""
|
s_prevs, s_dict = self.group_by_shift(
|
||||||
|
metric=metric, estimators=estimators
|
||||||
|
)
|
||||||
|
_pp0 = plot.plot_shift(
|
||||||
|
np.around([(1.0 - p, p) for p in s_prevs], decimals=2),
|
||||||
|
{
|
||||||
|
col: np.array([np.mean(vals[sp]) for sp in s_prevs])
|
||||||
|
for col, vals in s_dict.items()
|
||||||
|
},
|
||||||
|
metric=metric,
|
||||||
|
name=conf,
|
||||||
|
train_prev=self.train_prev,
|
||||||
|
fit_scores=self.fit_scores,
|
||||||
|
)
|
||||||
|
pp = [(mode, _pp0)]
|
||||||
|
|
||||||
|
pps.extend(pp)
|
||||||
|
|
||||||
|
return pps
|
||||||
|
|
||||||
|
def to_md(self, conf="default", metric="acc", estimators=None, stdev=False):
|
||||||
|
res = f"## {int(np.around(self.train_prev, decimals=2)[1]*100)}% positives\n"
|
||||||
res += fmt_line_md(f"train: {str(self.train_prev)}")
|
res += fmt_line_md(f"train: {str(self.train_prev)}")
|
||||||
res += fmt_line_md(f"validation: {str(self.valid_prev)}")
|
res += fmt_line_md(f"validation: {str(self.valid_prev)}")
|
||||||
for k, v in self.times.items():
|
for k, v in self.times.items():
|
||||||
res += fmt_line_md(f"{k}: {v:.3f}s")
|
res += fmt_line_md(f"{k}: {v:.3f}s")
|
||||||
res += "\n"
|
res += "\n"
|
||||||
for m in metrics:
|
res += (
|
||||||
res += self.get_dataframe(metric=m).to_html() + "\n\n"
|
self.get_dataframe(metric=metric, estimators=estimators).to_html() + "\n\n"
|
||||||
op_delta = self.get_plot(mode="delta", metric=m)
|
)
|
||||||
res += f"![plot_delta]({str(op_delta.relative_to(env.OUT_DIR))})\n"
|
plot_modes = ["delta", "diagonal", "shift"]
|
||||||
op_diag = self.get_plot(mode="diagonal", metric=m)
|
for mode, op in self.get_plots(
|
||||||
res += f"![plot_diagonal]({str(op_diag.relative_to(env.OUT_DIR))})\n"
|
modes=plot_modes,
|
||||||
op_shift = self.get_plot(mode="shift", metric=m)
|
metric=metric,
|
||||||
res += f"![plot_shift]({str(op_shift.relative_to(env.OUT_DIR))})\n"
|
estimators=estimators,
|
||||||
|
conf=conf,
|
||||||
|
stdev=stdev,
|
||||||
|
):
|
||||||
|
res += f"![plot_{mode}]({op.relative_to(env.OUT_DIR).as_posix()})\n"
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def merge(self, other):
|
|
||||||
if not all(v1 == v2 for v1, v2 in zip(self._prevs, other._prevs)):
|
|
||||||
raise ValueError("other has not same base prevalences of self")
|
|
||||||
|
|
||||||
inters_keys = set(self._dict.keys()).intersection(set(other._dict.keys()))
|
|
||||||
if len(inters_keys) > 0:
|
|
||||||
raise ValueError(f"self and other have matching keys {str(inters_keys)}.")
|
|
||||||
|
|
||||||
report = EvaluationReport()
|
|
||||||
report._prevs = self._prevs
|
|
||||||
report._dict = self._dict | other._dict
|
|
||||||
return report
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def combine_reports(*args, name="default", train_prev=None, valid_prev=None):
|
|
||||||
er = args[0]
|
|
||||||
for r in args[1:]:
|
|
||||||
er = er.merge(r)
|
|
||||||
|
|
||||||
er.name = name
|
|
||||||
er.train_prev = train_prev
|
|
||||||
er.valid_prev = valid_prev
|
|
||||||
return er
|
|
||||||
|
|
||||||
|
|
||||||
class DatasetReport:
|
class DatasetReport:
|
||||||
def __init__(self, name):
|
def __init__(self, name):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.ers: List[EvaluationReport] = []
|
self._dict = None
|
||||||
|
self.crs: List[CompReport] = []
|
||||||
|
|
||||||
def add(self, er: EvaluationReport):
|
@property
|
||||||
self.ers.append(er)
|
def cprevs(self):
|
||||||
|
return np.around([(1.0 - p, p) for p in self.prevs], decimals=2)
|
||||||
|
|
||||||
def to_md(self, *metrics):
|
def add(self, cr: CompReport):
|
||||||
res = f"{self.name}\n\n"
|
self.crs.append(cr)
|
||||||
for er in self.ers:
|
|
||||||
res += f"{er.to_md(*metrics)}\n\n"
|
if self._dict is None:
|
||||||
|
self.prevs = cr.prevs
|
||||||
|
self._dict = {
|
||||||
|
col: {bp: vals[bp] for bp in self.prevs}
|
||||||
|
for col, vals in cr.data().items()
|
||||||
|
}
|
||||||
|
self.s_prevs, self.s_dict = cr.group_by_shift()
|
||||||
|
self.fit_scores = {k: [score] for k, score in cr.fit_scores.items()}
|
||||||
|
return
|
||||||
|
|
||||||
|
cr_dict = cr.data()
|
||||||
|
both_prevs = np.array([self.prevs, cr.prevs])
|
||||||
|
if not np.all(both_prevs == both_prevs[0, :]).all():
|
||||||
|
raise ValueError("Comp report has incompatible base prevalences")
|
||||||
|
|
||||||
|
for col, vals in cr_dict.items():
|
||||||
|
if col not in self._dict:
|
||||||
|
self._dict[col] = {}
|
||||||
|
for bp in self.prevs:
|
||||||
|
if bp not in self._dict[col]:
|
||||||
|
self._dict[col][bp] = []
|
||||||
|
self._dict[col][bp] = np.concatenate(
|
||||||
|
[self._dict[col][bp], cr_dict[col][bp]]
|
||||||
|
)
|
||||||
|
|
||||||
|
cr_s_prevs, cr_s_dict = cr.group_by_shift()
|
||||||
|
self.s_prevs = np.sort(np.unique(np.concatenate([self.s_prevs, cr_s_prevs])))
|
||||||
|
|
||||||
|
for col, vals in cr_s_dict.items():
|
||||||
|
if col not in self.s_dict:
|
||||||
|
self.s_dict[col] = {}
|
||||||
|
for sp in cr_s_prevs:
|
||||||
|
if sp not in self.s_dict[col]:
|
||||||
|
self.s_dict[col][sp] = []
|
||||||
|
self.s_dict[col][sp] = np.concatenate(
|
||||||
|
[self.s_dict[col][sp], cr_s_dict[col][sp]]
|
||||||
|
)
|
||||||
|
|
||||||
|
for k, score in cr.fit_scores.items():
|
||||||
|
if k not in self.fit_scores:
|
||||||
|
self.fit_scores[k] = []
|
||||||
|
self.fit_scores[k].append(score)
|
||||||
|
|
||||||
|
def __add__(self, cr: CompReport):
|
||||||
|
self.add(cr)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __iadd__(self, cr: CompReport):
|
||||||
|
self.add(cr)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def to_md(self, conf="default", metric="acc", estimators=[], stdev=False):
|
||||||
|
res = f"# {self.name}\n\n"
|
||||||
|
for cr in self.crs:
|
||||||
|
res += f"{cr.to_md(conf, metric=metric, estimators=estimators, stdev=stdev)}\n\n"
|
||||||
|
|
||||||
|
f_dict = {
|
||||||
|
c1: v
|
||||||
|
for ((c0, c1), v) in self._dict.items()
|
||||||
|
if c0 == metric and c1 in estimators
|
||||||
|
}
|
||||||
|
s_avg_dict = {
|
||||||
|
col: np.array([np.mean(vals[sp]) for sp in self.s_prevs])
|
||||||
|
for col, vals in {
|
||||||
|
c1: v
|
||||||
|
for ((c0, c1), v) in self.s_dict.items()
|
||||||
|
if c0 == metric and c1 in estimators
|
||||||
|
}.items()
|
||||||
|
}
|
||||||
|
avg_dict = {
|
||||||
|
col: np.array([np.mean(vals[bp]) for bp in self.prevs])
|
||||||
|
for col, vals in f_dict.items()
|
||||||
|
}
|
||||||
|
if stdev:
|
||||||
|
stdev_dict = {
|
||||||
|
col: np.array([np.std(vals[bp]) for bp in self.prevs])
|
||||||
|
for col, vals in f_dict.items()
|
||||||
|
}
|
||||||
|
all_dict = {
|
||||||
|
col: [np.mean(np.concatenate(list(vals.values())))]
|
||||||
|
for col, vals in f_dict.items()
|
||||||
|
}
|
||||||
|
df = pd.DataFrame(
|
||||||
|
{col: np.append(avg_dict[col], val) for col, val in all_dict.items()},
|
||||||
|
index=self.prevs.tolist() + ["tot"],
|
||||||
|
columns=all_dict.keys(),
|
||||||
|
)
|
||||||
|
|
||||||
|
res += "## avg\n"
|
||||||
|
res += df.to_html() + "\n\n"
|
||||||
|
|
||||||
|
delta_op = plot.plot_delta(
|
||||||
|
np.around([(1.0 - p, p) for p in self.prevs], decimals=2),
|
||||||
|
avg_dict,
|
||||||
|
metric=metric,
|
||||||
|
name=conf,
|
||||||
|
train_prev=None,
|
||||||
|
fit_scores={k: np.mean(vals) for k, vals in self.fit_scores.items()},
|
||||||
|
)
|
||||||
|
res += f"![plot_delta]({delta_op.relative_to(env.OUT_DIR).as_posix()})\n"
|
||||||
|
|
||||||
|
if stdev:
|
||||||
|
delta_stdev_op = plot.plot_delta(
|
||||||
|
np.around([(1.0 - p, p) for p in self.prevs], decimals=2),
|
||||||
|
avg_dict,
|
||||||
|
metric=metric,
|
||||||
|
name=conf,
|
||||||
|
train_prev=None,
|
||||||
|
fit_scores={k: np.mean(vals) for k, vals in self.fit_scores.items()},
|
||||||
|
stdevs=stdev_dict,
|
||||||
|
)
|
||||||
|
res += f"![plot_delta_stdev]({delta_stdev_op.relative_to(env.OUT_DIR).as_posix()})\n"
|
||||||
|
|
||||||
|
shift_op = plot.plot_shift(
|
||||||
|
np.around([(1.0 - p, p) for p in self.s_prevs], decimals=2),
|
||||||
|
s_avg_dict,
|
||||||
|
metric=metric,
|
||||||
|
name=conf,
|
||||||
|
train_prev=None,
|
||||||
|
fit_scores={k: np.mean(vals) for k, vals in self.fit_scores.items()},
|
||||||
|
)
|
||||||
|
res += f"![plot_shift]({shift_op.relative_to(env.OUT_DIR).as_posix()})\n"
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
return (er for er in self.ers)
|
return (cr for cr in self.crs)
|
||||||
|
|
|
@ -1,49 +1,59 @@
|
||||||
import os
|
import logging as log
|
||||||
import shutil
|
import traceback
|
||||||
from pathlib import Path
|
from sys import platform
|
||||||
|
|
||||||
import quacc.evaluation.comp as comp
|
import quacc.evaluation.comp as comp
|
||||||
from quacc.dataset import Dataset
|
from quacc.dataset import Dataset
|
||||||
from quacc.environ import env
|
from quacc.environment import env
|
||||||
|
from quacc.utils import create_dataser_dir
|
||||||
|
|
||||||
|
|
||||||
def create_out_dir(dir_name):
|
def toast():
|
||||||
base_out_dir = Path(env.OUT_DIR_NAME)
|
if platform == "win32":
|
||||||
if not base_out_dir.exists():
|
import win11toast
|
||||||
os.mkdir(base_out_dir)
|
|
||||||
dir_path = base_out_dir / dir_name
|
win11toast.notify("Comp", "Completed Execution")
|
||||||
env.OUT_DIR = dir_path
|
|
||||||
shutil.rmtree(dir_path, ignore_errors=True)
|
|
||||||
os.mkdir(dir_path)
|
|
||||||
plot_dir_path = dir_path / "plot"
|
|
||||||
env.PLOT_OUT_DIR = plot_dir_path
|
|
||||||
os.mkdir(plot_dir_path)
|
|
||||||
|
|
||||||
|
|
||||||
def estimate_comparison():
|
def estimate_comparison():
|
||||||
for conf in env:
|
for conf in env.get_confs():
|
||||||
create_out_dir(conf)
|
create_dataser_dir(conf, update=env.DATASET_DIR_UPDATE)
|
||||||
dataset = Dataset(
|
dataset = Dataset(
|
||||||
env.DATASET_NAME,
|
env.DATASET_NAME,
|
||||||
target=env.DATASET_TARGET,
|
target=env.DATASET_TARGET,
|
||||||
n_prevalences=env.DATASET_N_PREVS,
|
n_prevalences=env.DATASET_N_PREVS,
|
||||||
)
|
)
|
||||||
output_path = env.OUT_DIR / f"{dataset.name}.md"
|
|
||||||
try:
|
try:
|
||||||
dr = comp.evaluate_comparison(dataset, estimators=env.COMP_ESTIMATORS)
|
dr = comp.evaluate_comparison(dataset, estimators=env.COMP_ESTIMATORS)
|
||||||
for m in env.METRICS:
|
for plot_conf in env.get_plot_confs():
|
||||||
output_path = env.OUT_DIR / f"{conf}_{m}.md"
|
for m in env.METRICS:
|
||||||
with open(output_path, "w") as f:
|
output_path = env.OUT_DIR / f"{plot_conf}_{m}.md"
|
||||||
f.write(dr.to_md(m))
|
with open(output_path, "w") as f:
|
||||||
|
f.write(
|
||||||
|
dr.to_md(
|
||||||
|
conf=plot_conf,
|
||||||
|
metric=m,
|
||||||
|
estimators=env.PLOT_ESTIMATORS,
|
||||||
|
stdev=env.PLOT_STDEV,
|
||||||
|
)
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Configuration {conf} failed. {e}")
|
log.error(f"Configuration {conf} failed. Exception: {e}")
|
||||||
|
traceback(e)
|
||||||
|
|
||||||
# print(df.to_latex(float_format="{:.4f}".format))
|
# print(df.to_latex(float_format="{:.4f}".format))
|
||||||
# print(utils.avg_group_report(df).to_latex(float_format="{:.4f}".format))
|
# print(utils.avg_group_report(df).to_latex(float_format="{:.4f}".format))
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
log.basicConfig(
|
||||||
|
filename="quacc.log",
|
||||||
|
filemode="a",
|
||||||
|
format="%(asctime)s| %(levelname)s: %(message)s",
|
||||||
|
datefmt="%d/%m/%y %H:%M:%S",
|
||||||
|
)
|
||||||
estimate_comparison()
|
estimate_comparison()
|
||||||
|
toast()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
170
quacc/plot.py
170
quacc/plot.py
|
@ -1,54 +1,40 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from cycler import cycler
|
||||||
|
|
||||||
from quacc.environ import env
|
from quacc.environment import env
|
||||||
|
|
||||||
|
matplotlib.use("agg")
|
||||||
|
|
||||||
|
|
||||||
def _get_markers(n: int):
|
def _get_markers(n: int):
|
||||||
ls = [
|
ls = "ovx+sDph*^1234X><.Pd"
|
||||||
"o",
|
|
||||||
"v",
|
|
||||||
"x",
|
|
||||||
"+",
|
|
||||||
"s",
|
|
||||||
"D",
|
|
||||||
"p",
|
|
||||||
"h",
|
|
||||||
"*",
|
|
||||||
"^",
|
|
||||||
"1",
|
|
||||||
"2",
|
|
||||||
"3",
|
|
||||||
"4",
|
|
||||||
"X",
|
|
||||||
">",
|
|
||||||
"<",
|
|
||||||
".",
|
|
||||||
"P",
|
|
||||||
"d",
|
|
||||||
]
|
|
||||||
if n > len(ls):
|
if n > len(ls):
|
||||||
ls = ls * (n / len(ls) + 1)
|
ls = ls * (n / len(ls) + 1)
|
||||||
return ls[:n]
|
return list(ls)[:n]
|
||||||
|
|
||||||
|
|
||||||
def plot_delta(
|
def plot_delta(
|
||||||
base_prevs,
|
base_prevs,
|
||||||
dict_vals,
|
dict_vals,
|
||||||
*,
|
*,
|
||||||
|
stdevs=None,
|
||||||
pos_class=1,
|
pos_class=1,
|
||||||
metric="acc",
|
metric="acc",
|
||||||
name="default",
|
name="default",
|
||||||
train_prev=None,
|
train_prev=None,
|
||||||
|
fit_scores=None,
|
||||||
legend=True,
|
legend=True,
|
||||||
) -> Path:
|
) -> Path:
|
||||||
|
_base_title = "delta_stdev" if stdevs is not None else "delta"
|
||||||
if train_prev is not None:
|
if train_prev is not None:
|
||||||
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
||||||
title = f"delta_{name}_{t_prev_pos}_{metric}"
|
title = f"{_base_title}_{name}_{t_prev_pos}_{metric}"
|
||||||
else:
|
else:
|
||||||
title = f"delta_{name}_{metric}"
|
title = f"{_base_title}_{name}_avg_{metric}"
|
||||||
|
|
||||||
fig, ax = plt.subplots()
|
fig, ax = plt.subplots()
|
||||||
ax.set_aspect("auto")
|
ax.set_aspect("auto")
|
||||||
|
@ -58,24 +44,37 @@ def plot_delta(
|
||||||
cm = plt.get_cmap("tab10")
|
cm = plt.get_cmap("tab10")
|
||||||
if NUM_COLORS > 10:
|
if NUM_COLORS > 10:
|
||||||
cm = plt.get_cmap("tab20")
|
cm = plt.get_cmap("tab20")
|
||||||
ax.set_prop_cycle(
|
cy = cycler(color=[cm(i) for i in range(NUM_COLORS)])
|
||||||
color=[cm(1.0 * i / NUM_COLORS) for i in range(NUM_COLORS)],
|
|
||||||
)
|
|
||||||
|
|
||||||
base_prevs = [bp[pos_class] for bp in base_prevs]
|
base_prevs = base_prevs[:, pos_class]
|
||||||
for method, deltas in dict_vals.items():
|
for (method, deltas), _cy in zip(dict_vals.items(), cy):
|
||||||
avg = np.array([np.mean(d, axis=-1) for d in deltas])
|
|
||||||
# std = np.array([np.std(d, axis=-1) for d in deltas])
|
|
||||||
ax.plot(
|
ax.plot(
|
||||||
base_prevs,
|
base_prevs,
|
||||||
avg,
|
deltas,
|
||||||
label=method,
|
label=method,
|
||||||
|
color=_cy["color"],
|
||||||
linestyle="-",
|
linestyle="-",
|
||||||
marker="o",
|
marker="o",
|
||||||
markersize=3,
|
markersize=3,
|
||||||
zorder=2,
|
zorder=2,
|
||||||
)
|
)
|
||||||
# ax.fill_between(base_prevs, avg - std, avg + std, alpha=0.25)
|
if stdevs is not None:
|
||||||
|
stdev = stdevs[method]
|
||||||
|
ax.fill_between(
|
||||||
|
base_prevs,
|
||||||
|
deltas - stdev,
|
||||||
|
deltas + stdev,
|
||||||
|
color=_cy["color"],
|
||||||
|
alpha=0.25,
|
||||||
|
)
|
||||||
|
if fit_scores is not None and method in fit_scores:
|
||||||
|
ax.plot(
|
||||||
|
base_prevs,
|
||||||
|
np.repeat(fit_scores[method], base_prevs.shape[0]),
|
||||||
|
color=_cy["color"],
|
||||||
|
linestyle="--",
|
||||||
|
markersize=0,
|
||||||
|
)
|
||||||
|
|
||||||
ax.set(xlabel="test prevalence", ylabel=metric, title=title)
|
ax.set(xlabel="test prevalence", ylabel=metric, title=title)
|
||||||
|
|
||||||
|
@ -106,42 +105,62 @@ def plot_diagonal(
|
||||||
fig, ax = plt.subplots()
|
fig, ax = plt.subplots()
|
||||||
ax.set_aspect("auto")
|
ax.set_aspect("auto")
|
||||||
ax.grid()
|
ax.grid()
|
||||||
|
ax.set_aspect("equal")
|
||||||
|
|
||||||
NUM_COLORS = len(dict_vals)
|
NUM_COLORS = len(dict_vals)
|
||||||
cm = plt.get_cmap("tab10")
|
cm = plt.get_cmap("tab10")
|
||||||
ax.set_prop_cycle(
|
if NUM_COLORS > 10:
|
||||||
marker=_get_markers(NUM_COLORS) * 2,
|
cm = plt.get_cmap("tab20")
|
||||||
color=[cm(1.0 * i / NUM_COLORS) for i in range(NUM_COLORS)] * 2,
|
cy = cycler(
|
||||||
|
color=[cm(i) for i in range(NUM_COLORS)],
|
||||||
|
marker=_get_markers(NUM_COLORS),
|
||||||
)
|
)
|
||||||
|
|
||||||
reference = np.array(reference)
|
reference = np.array(reference)
|
||||||
x_ticks = np.unique(reference)
|
x_ticks = np.unique(reference)
|
||||||
x_ticks.sort()
|
x_ticks.sort()
|
||||||
|
|
||||||
for _, deltas in dict_vals.items():
|
for (_, deltas), _cy in zip(dict_vals.items(), cy):
|
||||||
deltas = np.array(deltas)
|
|
||||||
ax.plot(
|
ax.plot(
|
||||||
reference,
|
reference,
|
||||||
deltas,
|
deltas,
|
||||||
|
color=_cy["color"],
|
||||||
linestyle="None",
|
linestyle="None",
|
||||||
|
marker=_cy["marker"],
|
||||||
markersize=3,
|
markersize=3,
|
||||||
zorder=2,
|
zorder=2,
|
||||||
|
alpha=0.25,
|
||||||
)
|
)
|
||||||
|
|
||||||
for method, deltas in dict_vals.items():
|
# ensure limits are equal for both axes
|
||||||
deltas = np.array(deltas)
|
_alims = np.stack(((ax.get_xlim(), ax.get_ylim())), axis=-1)
|
||||||
x_interp = x_ticks[[0, -1]]
|
_lims = np.array([f(ls) for f, ls in zip([np.min, np.max], _alims)])
|
||||||
y_interp = np.interp(x_interp, reference, deltas)
|
ax.set(xlim=tuple(_lims), ylim=tuple(_lims))
|
||||||
|
|
||||||
|
for (method, deltas), _cy in zip(dict_vals.items(), cy):
|
||||||
|
slope, interc = np.polyfit(reference, deltas, 1)
|
||||||
|
y_lr = np.array([slope * x + interc for x in _lims])
|
||||||
ax.plot(
|
ax.plot(
|
||||||
x_interp,
|
_lims,
|
||||||
y_interp,
|
y_lr,
|
||||||
label=method,
|
label=method,
|
||||||
|
color=_cy["color"],
|
||||||
linestyle="-",
|
linestyle="-",
|
||||||
markersize="0",
|
markersize="0",
|
||||||
zorder=1,
|
zorder=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
ax.set(xlabel="test prevalence", ylabel=metric, title=title)
|
# plot reference line
|
||||||
|
ax.plot(
|
||||||
|
_lims,
|
||||||
|
_lims,
|
||||||
|
color="black",
|
||||||
|
linestyle="--",
|
||||||
|
markersize=0,
|
||||||
|
zorder=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
ax.set(xlabel=f"true {metric}", ylabel=f"estim. {metric}", title=title)
|
||||||
|
|
||||||
if legend:
|
if legend:
|
||||||
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
||||||
|
@ -151,62 +170,55 @@ def plot_diagonal(
|
||||||
|
|
||||||
|
|
||||||
def plot_shift(
|
def plot_shift(
|
||||||
base_prevs,
|
shift_prevs,
|
||||||
dict_vals,
|
shift_dict,
|
||||||
*,
|
*,
|
||||||
pos_class=1,
|
pos_class=1,
|
||||||
metric="acc",
|
metric="acc",
|
||||||
name="default",
|
name="default",
|
||||||
train_prev=None,
|
train_prev=None,
|
||||||
|
fit_scores=None,
|
||||||
legend=True,
|
legend=True,
|
||||||
) -> Path:
|
) -> Path:
|
||||||
if train_prev is None:
|
if train_prev is not None:
|
||||||
raise AttributeError("train_prev cannot be None.")
|
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
||||||
|
title = f"shift_{name}_{t_prev_pos}_{metric}"
|
||||||
train_prev = train_prev[pos_class]
|
else:
|
||||||
t_prev_pos = int(round(train_prev * 100))
|
title = f"shift_{name}_avg_{metric}"
|
||||||
title = f"shift_{name}_{t_prev_pos}_{metric}"
|
|
||||||
|
|
||||||
fig, ax = plt.subplots()
|
fig, ax = plt.subplots()
|
||||||
ax.set_aspect("auto")
|
ax.set_aspect("auto")
|
||||||
ax.grid()
|
ax.grid()
|
||||||
|
|
||||||
NUM_COLORS = len(dict_vals)
|
NUM_COLORS = len(shift_dict)
|
||||||
cm = plt.get_cmap("tab10")
|
cm = plt.get_cmap("tab10")
|
||||||
if NUM_COLORS > 10:
|
if NUM_COLORS > 10:
|
||||||
cm = plt.get_cmap("tab20")
|
cm = plt.get_cmap("tab20")
|
||||||
ax.set_prop_cycle(
|
cy = cycler(color=[cm(i) for i in range(NUM_COLORS)])
|
||||||
color=[cm(1.0 * i / NUM_COLORS) for i in range(NUM_COLORS)],
|
|
||||||
)
|
|
||||||
|
|
||||||
base_prevs = np.around(
|
|
||||||
[abs(bp[pos_class] - train_prev) for bp in base_prevs], decimals=2
|
|
||||||
)
|
|
||||||
for method, deltas in dict_vals.items():
|
|
||||||
delta_bins = {}
|
|
||||||
for bp, delta in zip(base_prevs, deltas):
|
|
||||||
if bp not in delta_bins:
|
|
||||||
delta_bins[bp] = []
|
|
||||||
delta_bins[bp].append(delta)
|
|
||||||
|
|
||||||
bp_unique, delta_avg = zip(
|
|
||||||
*sorted(
|
|
||||||
{k: np.mean(v) for k, v in delta_bins.items()}.items(),
|
|
||||||
key=lambda db: db[0],
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
shift_prevs = shift_prevs[:, pos_class]
|
||||||
|
for (method, shifts), _cy in zip(shift_dict.items(), cy):
|
||||||
ax.plot(
|
ax.plot(
|
||||||
bp_unique,
|
shift_prevs,
|
||||||
delta_avg,
|
shifts,
|
||||||
label=method,
|
label=method,
|
||||||
|
color=_cy["color"],
|
||||||
linestyle="-",
|
linestyle="-",
|
||||||
marker="o",
|
marker="o",
|
||||||
markersize=3,
|
markersize=3,
|
||||||
zorder=2,
|
zorder=2,
|
||||||
)
|
)
|
||||||
|
|
||||||
ax.set(xlabel="test prevalence", ylabel=metric, title=title)
|
if fit_scores is not None and method in fit_scores:
|
||||||
|
ax.plot(
|
||||||
|
shift_prevs,
|
||||||
|
np.repeat(fit_scores[method], shift_prevs.shape[0]),
|
||||||
|
color=_cy["color"],
|
||||||
|
linestyle="--",
|
||||||
|
markersize=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
ax.set(xlabel="dataset shift", ylabel=metric, title=title)
|
||||||
|
|
||||||
if legend:
|
if legend:
|
||||||
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
||||||
|
|
|
@ -1,7 +1,12 @@
|
||||||
import functools
|
import functools
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
from quacc.environment import env
|
||||||
|
|
||||||
|
|
||||||
def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame:
|
def combine_dataframes(dfs, df_index=[]) -> pd.DataFrame:
|
||||||
if len(dfs) < 1:
|
if len(dfs) < 1:
|
||||||
|
@ -32,3 +37,23 @@ def avg_group_report(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
|
||||||
def fmt_line_md(s):
|
def fmt_line_md(s):
|
||||||
return f"> {s} \n"
|
return f"> {s} \n"
|
||||||
|
|
||||||
|
|
||||||
|
def create_dataser_dir(dir_name, update=False):
|
||||||
|
base_out_dir = Path(env.OUT_DIR_NAME)
|
||||||
|
if not base_out_dir.exists():
|
||||||
|
os.mkdir(base_out_dir)
|
||||||
|
|
||||||
|
dataset_dir = base_out_dir / dir_name
|
||||||
|
env.OUT_DIR = dataset_dir
|
||||||
|
if update:
|
||||||
|
if not dataset_dir.exists():
|
||||||
|
os.mkdir(dataset_dir)
|
||||||
|
else:
|
||||||
|
shutil.rmtree(dataset_dir, ignore_errors=True)
|
||||||
|
os.mkdir(dataset_dir)
|
||||||
|
|
||||||
|
plot_dir_path = dataset_dir / "plot"
|
||||||
|
env.PLOT_OUT_DIR = plot_dir_path
|
||||||
|
if not plot_dir_path.exists():
|
||||||
|
os.mkdir(plot_dir_path)
|
||||||
|
|
Loading…
Reference in New Issue