This commit is contained in:
Lorenzo Volpi 2024-01-31 18:06:58 +01:00
parent 1e7161e681
commit 531d22573b
9 changed files with 77 additions and 14 deletions

View File

@ -71,14 +71,14 @@ test_conf: &test_conf
main: main:
confs: &main_confs confs: &main_confs
- DATASET_NAME: imdb
- DATASET_NAME: rcv1 - DATASET_NAME: rcv1
DATASET_TARGET: CCAT DATASET_TARGET: CCAT
other_confs:
- DATASET_NAME: imdb
- DATASET_NAME: rcv1 - DATASET_NAME: rcv1
DATASET_TARGET: GCAT DATASET_TARGET: GCAT
- DATASET_NAME: rcv1 - DATASET_NAME: rcv1
DATASET_TARGET: MCAT DATASET_TARGET: MCAT
other_confs:
sld_lr_conf: &sld_lr_conf sld_lr_conf: &sld_lr_conf
@ -348,9 +348,7 @@ baselines_conf: &baselines_conf
COMP_ESTIMATORS: COMP_ESTIMATORS:
- doc - doc
- atc_mc - atc_mc
- mandoline - naive
- rca
- rca_star
N_JOBS: -2 N_JOBS: -2
confs: *main_confs confs: *main_confs
@ -406,4 +404,4 @@ timing_conf: &timing_conf
confs: *main_confs confs: *main_confs
exec: *kde_lr_gs_conf exec: *baselines_conf

View File

@ -1,7 +1,9 @@
#!/bin/bash #!/bin/bash
CMD="cp" CMD="scp"
DEST="~/tesi_docker/" DEST="andreaesuli@edge-nd1.isti.cnr.it:~/raid/lorenzo/"
# CMD="cp"
# DEST="~/tesi_docker/"
bash -c "${CMD} -r quacc ${DEST}" bash -c "${CMD} -r quacc ${DEST}"
bash -c "${CMD} -r baselines ${DEST}" bash -c "${CMD} -r baselines ${DEST}"

2
log
View File

@ -3,6 +3,8 @@
if [[ "${1}" == "r" ]]; then if [[ "${1}" == "r" ]]; then
scp volpi@ilona.isti.cnr.it:~/tesi/quacc.log ~/tesi/remote.log &>/dev/null scp volpi@ilona.isti.cnr.it:~/tesi/quacc.log ~/tesi/remote.log &>/dev/null
ssh volpi@ilona.isti.cnr.it tail -n 500 -f /home/volpi/tesi/quacc.log | bat -P --language=log ssh volpi@ilona.isti.cnr.it tail -n 500 -f /home/volpi/tesi/quacc.log | bat -P --language=log
elif [[ "${1}" == "d" ]]; then
ssh andreaesuli@edge-nd1.isti.cnr.it tail -n 500 -f /home/andreaesuli/raid/lorenzo/quacc.log | bat -P --language=log
else else
tail -n 500 -f /home/lorev/tesi/quacc.log | bat --paging=never --language log tail -n 500 -f /home/lorev/tesi/quacc.log | bat --paging=never --language log
fi fi

View File

@ -126,7 +126,9 @@ class DatasetProvider:
# provare min_df=5 # provare min_df=5
def __imdb(self, **kwargs): def __imdb(self, **kwargs):
return qp.datasets.fetch_reviews("imdb", tfidf=True, min_df=3).train_test return qp.datasets.fetch_reviews(
"imdb", data_home="./quapy_data", tfidf=True, min_df=3
).train_test
def __rcv1(self, target, **kwargs): def __rcv1(self, target, **kwargs):
n_train = 23149 n_train = 23149
@ -135,7 +137,7 @@ class DatasetProvider:
if target is None or target not in available_targets: if target is None or target not in available_targets:
raise ValueError(f"Invalid target {target}") raise ValueError(f"Invalid target {target}")
dataset = fetch_rcv1() dataset = fetch_rcv1(data_home="./scikit_learn_data")
target_index = np.where(dataset.target_names == target)[0] target_index = np.where(dataset.target_names == target)[0]
all_train_d = dataset.data[:n_train, :] all_train_d = dataset.data[:n_train, :]
test_d = dataset.data[n_train:, :] test_d = dataset.data[n_train:, :]

View File

@ -68,6 +68,38 @@ def kfcv(
return report return report
@baseline
def naive(
c_model: BaseEstimator,
validation: LabelledCollection,
protocol: AbstractStochasticSeededProtocol,
predict_method="predict",
):
c_model_predict = getattr(c_model, predict_method)
f1_average = "binary" if validation.n_classes == 2 else "macro"
val_preds = c_model_predict(validation.X)
val_acc = metrics.accuracy_score(validation.y, val_preds)
val_f1 = metrics.f1_score(validation.y, val_preds, average=f1_average)
report = EvaluationReport(name="naive")
for test in protocol():
test_preds = c_model_predict(test.X)
acc_score = metrics.accuracy_score(test.y, test_preds)
f1_score = metrics.f1_score(test.y, test_preds, average=f1_average)
meta_acc = abs(val_acc - acc_score)
meta_f1 = abs(val_f1 - f1_score)
report.append_row(
test.prevalence(),
acc_score=acc_score,
f1_score=f1_score,
acc=meta_acc,
f1=meta_f1,
)
return report
@baseline @baseline
def ref( def ref(
c_model: BaseEstimator, c_model: BaseEstimator,
@ -556,4 +588,3 @@ def kdex2(
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc) report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
return report return report

View File

@ -380,9 +380,9 @@ __kde_lr_set = [
M("mul_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ), M("mul_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ),
M("m3w_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], cf=True), M("m3w_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], cf=True),
# gs kde # gs kde
G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="spider" ), G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="grid" ),
G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider" ), G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid" ),
G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider", cf=True), G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid", cf=True),
E("kde_lr_gs"), E("kde_lr_gs"),
] ]
@ -458,6 +458,7 @@ __methods_set = (
+ __kde_lr_set + __kde_lr_set
+ __dense_kde_lr_set + __dense_kde_lr_set
+ __dense_kde_rbf_set + __dense_kde_rbf_set
+ [E("QuAcc")]
) )
_methods = {m.name: m for m in __methods_set} _methods = {m.name: m for m in __methods_set}

View File

@ -140,6 +140,14 @@ class CompReport:
"mul_kde_lr_gs", "mul_kde_lr_gs",
"m3w_kde_lr_gs", "m3w_kde_lr_gs",
], ],
"QuAcc": [
"bin_sld_lr_gs",
"mul_sld_lr_gs",
"m3w_sld_lr_gs",
"bin_kde_lr_gs",
"mul_kde_lr_gs",
"m3w_kde_lr_gs",
],
} }
for name, methods in _mapping.items(): for name, methods in _mapping.items():

15
rates.md Normal file
View File

@ -0,0 +1,15 @@
# Additional covariates percentage
Rate of usage of additional covariates, recalibration and "balanced" class_weight
during grid search:
| method | av % | recalib % | rebalance % |
| --------------: | :----: | :-------: | :---------: |
| imdb_sld_lr | 81.49% | 77.78% | 59.26% |
| imdb_kde_lr | 71.43% | NA | 88.18% |
| rcv1_CCAT_sld_lr| 62.97% | 70.38% | 77.78% |
| rcv1_CCAT_kde_lr| 78.06% | NA | 84.82% |
| rcv1_GCAT_sld_lr| 76.93% | 61.54% | 65.39% |
| rcv1_GCAT_kde_lr| 71.36% | NA | 78.65% |
| rcv1_MCAT_sld_lr| 62.97% | 48.15% | 74.08% |
| rcv1_MCAT_kde_lr| 71.03% | NA | 68.70% |

4
run.py
View File

@ -15,3 +15,7 @@ def run():
run_local() run_local()
elif args.remote: elif args.remote:
run_remote(detatch=args.detatch) run_remote(detatch=args.detatch)
if __name__ == "__main__":
run()