update
This commit is contained in:
parent
1e7161e681
commit
531d22573b
10
conf.yaml
10
conf.yaml
|
@ -71,14 +71,14 @@ test_conf: &test_conf
|
||||||
|
|
||||||
main:
|
main:
|
||||||
confs: &main_confs
|
confs: &main_confs
|
||||||
|
- DATASET_NAME: imdb
|
||||||
- DATASET_NAME: rcv1
|
- DATASET_NAME: rcv1
|
||||||
DATASET_TARGET: CCAT
|
DATASET_TARGET: CCAT
|
||||||
other_confs:
|
|
||||||
- DATASET_NAME: imdb
|
|
||||||
- DATASET_NAME: rcv1
|
- DATASET_NAME: rcv1
|
||||||
DATASET_TARGET: GCAT
|
DATASET_TARGET: GCAT
|
||||||
- DATASET_NAME: rcv1
|
- DATASET_NAME: rcv1
|
||||||
DATASET_TARGET: MCAT
|
DATASET_TARGET: MCAT
|
||||||
|
other_confs:
|
||||||
|
|
||||||
sld_lr_conf: &sld_lr_conf
|
sld_lr_conf: &sld_lr_conf
|
||||||
|
|
||||||
|
@ -348,9 +348,7 @@ baselines_conf: &baselines_conf
|
||||||
COMP_ESTIMATORS:
|
COMP_ESTIMATORS:
|
||||||
- doc
|
- doc
|
||||||
- atc_mc
|
- atc_mc
|
||||||
- mandoline
|
- naive
|
||||||
- rca
|
|
||||||
- rca_star
|
|
||||||
N_JOBS: -2
|
N_JOBS: -2
|
||||||
|
|
||||||
confs: *main_confs
|
confs: *main_confs
|
||||||
|
@ -406,4 +404,4 @@ timing_conf: &timing_conf
|
||||||
|
|
||||||
confs: *main_confs
|
confs: *main_confs
|
||||||
|
|
||||||
exec: *kde_lr_gs_conf
|
exec: *baselines_conf
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
CMD="cp"
|
CMD="scp"
|
||||||
DEST="~/tesi_docker/"
|
DEST="andreaesuli@edge-nd1.isti.cnr.it:~/raid/lorenzo/"
|
||||||
|
# CMD="cp"
|
||||||
|
# DEST="~/tesi_docker/"
|
||||||
|
|
||||||
bash -c "${CMD} -r quacc ${DEST}"
|
bash -c "${CMD} -r quacc ${DEST}"
|
||||||
bash -c "${CMD} -r baselines ${DEST}"
|
bash -c "${CMD} -r baselines ${DEST}"
|
||||||
|
|
2
log
2
log
|
@ -3,6 +3,8 @@
|
||||||
if [[ "${1}" == "r" ]]; then
|
if [[ "${1}" == "r" ]]; then
|
||||||
scp volpi@ilona.isti.cnr.it:~/tesi/quacc.log ~/tesi/remote.log &>/dev/null
|
scp volpi@ilona.isti.cnr.it:~/tesi/quacc.log ~/tesi/remote.log &>/dev/null
|
||||||
ssh volpi@ilona.isti.cnr.it tail -n 500 -f /home/volpi/tesi/quacc.log | bat -P --language=log
|
ssh volpi@ilona.isti.cnr.it tail -n 500 -f /home/volpi/tesi/quacc.log | bat -P --language=log
|
||||||
|
elif [[ "${1}" == "d" ]]; then
|
||||||
|
ssh andreaesuli@edge-nd1.isti.cnr.it tail -n 500 -f /home/andreaesuli/raid/lorenzo/quacc.log | bat -P --language=log
|
||||||
else
|
else
|
||||||
tail -n 500 -f /home/lorev/tesi/quacc.log | bat --paging=never --language log
|
tail -n 500 -f /home/lorev/tesi/quacc.log | bat --paging=never --language log
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -126,7 +126,9 @@ class DatasetProvider:
|
||||||
|
|
||||||
# provare min_df=5
|
# provare min_df=5
|
||||||
def __imdb(self, **kwargs):
|
def __imdb(self, **kwargs):
|
||||||
return qp.datasets.fetch_reviews("imdb", tfidf=True, min_df=3).train_test
|
return qp.datasets.fetch_reviews(
|
||||||
|
"imdb", data_home="./quapy_data", tfidf=True, min_df=3
|
||||||
|
).train_test
|
||||||
|
|
||||||
def __rcv1(self, target, **kwargs):
|
def __rcv1(self, target, **kwargs):
|
||||||
n_train = 23149
|
n_train = 23149
|
||||||
|
@ -135,7 +137,7 @@ class DatasetProvider:
|
||||||
if target is None or target not in available_targets:
|
if target is None or target not in available_targets:
|
||||||
raise ValueError(f"Invalid target {target}")
|
raise ValueError(f"Invalid target {target}")
|
||||||
|
|
||||||
dataset = fetch_rcv1()
|
dataset = fetch_rcv1(data_home="./scikit_learn_data")
|
||||||
target_index = np.where(dataset.target_names == target)[0]
|
target_index = np.where(dataset.target_names == target)[0]
|
||||||
all_train_d = dataset.data[:n_train, :]
|
all_train_d = dataset.data[:n_train, :]
|
||||||
test_d = dataset.data[n_train:, :]
|
test_d = dataset.data[n_train:, :]
|
||||||
|
|
|
@ -68,6 +68,38 @@ def kfcv(
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
||||||
|
@baseline
|
||||||
|
def naive(
|
||||||
|
c_model: BaseEstimator,
|
||||||
|
validation: LabelledCollection,
|
||||||
|
protocol: AbstractStochasticSeededProtocol,
|
||||||
|
predict_method="predict",
|
||||||
|
):
|
||||||
|
c_model_predict = getattr(c_model, predict_method)
|
||||||
|
f1_average = "binary" if validation.n_classes == 2 else "macro"
|
||||||
|
|
||||||
|
val_preds = c_model_predict(validation.X)
|
||||||
|
val_acc = metrics.accuracy_score(validation.y, val_preds)
|
||||||
|
val_f1 = metrics.f1_score(validation.y, val_preds, average=f1_average)
|
||||||
|
|
||||||
|
report = EvaluationReport(name="naive")
|
||||||
|
for test in protocol():
|
||||||
|
test_preds = c_model_predict(test.X)
|
||||||
|
acc_score = metrics.accuracy_score(test.y, test_preds)
|
||||||
|
f1_score = metrics.f1_score(test.y, test_preds, average=f1_average)
|
||||||
|
meta_acc = abs(val_acc - acc_score)
|
||||||
|
meta_f1 = abs(val_f1 - f1_score)
|
||||||
|
report.append_row(
|
||||||
|
test.prevalence(),
|
||||||
|
acc_score=acc_score,
|
||||||
|
f1_score=f1_score,
|
||||||
|
acc=meta_acc,
|
||||||
|
f1=meta_f1,
|
||||||
|
)
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
||||||
|
|
||||||
@baseline
|
@baseline
|
||||||
def ref(
|
def ref(
|
||||||
c_model: BaseEstimator,
|
c_model: BaseEstimator,
|
||||||
|
@ -556,4 +588,3 @@ def kdex2(
|
||||||
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
|
report.append_row(test.prevalence(), acc=meta_score, acc_score=estim_acc)
|
||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|
|
@ -380,9 +380,9 @@ __kde_lr_set = [
|
||||||
M("mul_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ),
|
M("mul_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], ),
|
||||||
M("m3w_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], cf=True),
|
M("m3w_kde_lr_a", __kde_lr(), "mul", conf=["max_conf", "entropy", "isoft"], cf=True),
|
||||||
# gs kde
|
# gs kde
|
||||||
G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="spider" ),
|
G("bin_kde_lr_gs", __kde_lr(), "bin", pg="kde_lr", search="grid" ),
|
||||||
G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider" ),
|
G("mul_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid" ),
|
||||||
G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="spider", cf=True),
|
G("m3w_kde_lr_gs", __kde_lr(), "mul", pg="kde_lr", search="grid", cf=True),
|
||||||
E("kde_lr_gs"),
|
E("kde_lr_gs"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -458,6 +458,7 @@ __methods_set = (
|
||||||
+ __kde_lr_set
|
+ __kde_lr_set
|
||||||
+ __dense_kde_lr_set
|
+ __dense_kde_lr_set
|
||||||
+ __dense_kde_rbf_set
|
+ __dense_kde_rbf_set
|
||||||
|
+ [E("QuAcc")]
|
||||||
)
|
)
|
||||||
|
|
||||||
_methods = {m.name: m for m in __methods_set}
|
_methods = {m.name: m for m in __methods_set}
|
||||||
|
|
|
@ -140,6 +140,14 @@ class CompReport:
|
||||||
"mul_kde_lr_gs",
|
"mul_kde_lr_gs",
|
||||||
"m3w_kde_lr_gs",
|
"m3w_kde_lr_gs",
|
||||||
],
|
],
|
||||||
|
"QuAcc": [
|
||||||
|
"bin_sld_lr_gs",
|
||||||
|
"mul_sld_lr_gs",
|
||||||
|
"m3w_sld_lr_gs",
|
||||||
|
"bin_kde_lr_gs",
|
||||||
|
"mul_kde_lr_gs",
|
||||||
|
"m3w_kde_lr_gs",
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
for name, methods in _mapping.items():
|
for name, methods in _mapping.items():
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
# Additional covariates percentage
|
||||||
|
|
||||||
|
Rate of usage of additional covariates, recalibration and "balanced" class_weight
|
||||||
|
during grid search:
|
||||||
|
|
||||||
|
| method | av % | recalib % | rebalance % |
|
||||||
|
| --------------: | :----: | :-------: | :---------: |
|
||||||
|
| imdb_sld_lr | 81.49% | 77.78% | 59.26% |
|
||||||
|
| imdb_kde_lr | 71.43% | NA | 88.18% |
|
||||||
|
| rcv1_CCAT_sld_lr| 62.97% | 70.38% | 77.78% |
|
||||||
|
| rcv1_CCAT_kde_lr| 78.06% | NA | 84.82% |
|
||||||
|
| rcv1_GCAT_sld_lr| 76.93% | 61.54% | 65.39% |
|
||||||
|
| rcv1_GCAT_kde_lr| 71.36% | NA | 78.65% |
|
||||||
|
| rcv1_MCAT_sld_lr| 62.97% | 48.15% | 74.08% |
|
||||||
|
| rcv1_MCAT_kde_lr| 71.03% | NA | 68.70% |
|
Loading…
Reference in New Issue