logger implemented
This commit is contained in:
parent
568f200e3f
commit
345807977c
|
@ -12,4 +12,6 @@ elsahar19_rca/__pycache__/*
|
|||
*.coverage
|
||||
.coverage
|
||||
scp_sync.py
|
||||
out/*
|
||||
out/*
|
||||
output/*
|
||||
*.log
|
|
@ -7,6 +7,7 @@ debug_conf: &debug_conf
|
|||
datasets:
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
- DATASET_NAME: imdb
|
||||
|
||||
plot_confs:
|
||||
debug:
|
||||
|
@ -49,14 +50,14 @@ main_conf: &main_conf
|
|||
DATASET_N_PREVS: 9
|
||||
|
||||
datasets:
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
- DATASET_NAME: imdb
|
||||
datasets_bck:
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: GCAT
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: MCAT
|
||||
- DATASET_NAME: imdb
|
||||
- DATASET_NAME: rcv1
|
||||
DATASET_TARGET: CCAT
|
||||
|
||||
plot_confs:
|
||||
gs_vs_atc:
|
||||
|
@ -99,4 +100,4 @@ main_conf: &main_conf
|
|||
- atc_ne
|
||||
- doc_feat
|
||||
|
||||
exec: *debug_conf
|
||||
exec: *main_conf
|
26
quacc.log
26
quacc.log
|
@ -1,26 +0,0 @@
|
|||
dataset rcv1_CCAT
|
||||
28/10/23 00:45:46| INFO: dataset rcv1_CCAT
|
||||
Dataset sample 0.50 of dataset rcv1_CCAT started
|
||||
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
|
||||
28/10/23 00:45:50| INFO: Dataset sample 0.50 of dataset rcv1_CCAT started
|
||||
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
|
||||
28/10/23 00:45:51| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
|
||||
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
|
||||
28/10/23 00:45:52| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
|
||||
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
|
||||
28/10/23 00:45:52| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
|
||||
Dataset sample 0.50 of dataset rcv1_CCAT finished [took 1.8041s
|
||||
28/10/23 00:45:52| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
|
||||
28/10/23 00:45:52| INFO: Dataset sample 0.50 of dataset rcv1_CCAT finished [took 1.8041s
|
||||
Configuration rcv1_CCAT_1prevs failed. Exception: too many indices for array: array is 1-dimensional, but 2 were indexed
|
||||
28/10/23 00:45:52| ERROR: Configuration rcv1_CCAT_1prevs failed. Exception: too many indices for array: array is 1-dimensional, but 2 were indexed
|
||||
dataset rcv1_CCAT
|
||||
28/10/23 00:47:52| INFO: dataset rcv1_CCAT
|
||||
Dataset sample 0.50 of dataset rcv1_CCAT started
|
||||
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
|
||||
28/10/23 00:47:56| INFO: Dataset sample 0.50 of dataset rcv1_CCAT started
|
||||
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
|
||||
28/10/23 00:47:57| ERROR: Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
|
||||
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
|
||||
Dataset sample 0.50 of dataset rcv1_CCAT failed. Exception: Queue objects should only be shared between processes through inheritance
|
||||
Dataset sample 0.50 of dataset rcv1_CCAT finished [took 1.7186s
|
|
@ -34,9 +34,17 @@ class DatasetSample:
|
|||
|
||||
|
||||
class Dataset:
|
||||
def __init__(self, name, n_prevalences=9, target=None):
|
||||
def __init__(self, name, n_prevalences=9, prevs=None, target=None):
|
||||
self._name = name
|
||||
self._target = target
|
||||
|
||||
self.prevs = None
|
||||
if prevs is not None:
|
||||
prevs = np.unique([p for p in prevs if p > 0.0 and p < 1.0])
|
||||
if prevs.shape[0] > 0:
|
||||
self.prevs = np.sort(prevs)
|
||||
self.n_prevs = self.prevs.shape[0]
|
||||
|
||||
self.n_prevs = n_prevalences
|
||||
|
||||
def __spambase(self):
|
||||
|
@ -92,10 +100,14 @@ class Dataset:
|
|||
)
|
||||
|
||||
# sample prevalences
|
||||
prevalences = np.linspace(0.0, 1.0, num=self.n_prevs + 1, endpoint=False)[1:]
|
||||
at_size = min(math.floor(len(all_train) * 0.5 / p) for p in prevalences)
|
||||
if self.prevs is not None:
|
||||
prevs = self.prevs
|
||||
else:
|
||||
prevs = np.linspace(0.0, 1.0, num=self.n_prevs + 1, endpoint=False)[1:]
|
||||
|
||||
at_size = min(math.floor(len(all_train) * 0.5 / p) for p in prevs)
|
||||
datasets = []
|
||||
for p in prevalences:
|
||||
for p in prevs:
|
||||
all_train_sampled = all_train.sampling(at_size, p, random_state=0)
|
||||
train, validation = all_train_sampled.split_stratified(
|
||||
train_prop=TRAIN_VAL_PROP, random_state=0
|
||||
|
|
|
@ -8,6 +8,7 @@ defalut_env = {
|
|||
"PLOT_ESTIMATORS": [],
|
||||
"PLOT_STDEV": False,
|
||||
"DATASET_N_PREVS": 9,
|
||||
"DATASET_PREVS": None,
|
||||
"OUT_DIR_NAME": "output",
|
||||
"OUT_DIR": None,
|
||||
"PLOT_DIR_NAME": "plot",
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
import multiprocessing
|
||||
import time
|
||||
import traceback
|
||||
from typing import List
|
||||
|
||||
import pandas as pd
|
||||
import quapy as qp
|
||||
from quapy.protocol import APP
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
from quacc.dataset import Dataset
|
||||
from quacc.environment import env
|
||||
from quacc.evaluation import baseline, method
|
||||
from quacc.evaluation.report import CompReport, DatasetReport, EvaluationReport
|
||||
from quacc.logger import Logger, SubLogger
|
||||
|
||||
qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
|
||||
from quacc.evaluation.worker import estimate_worker
|
||||
from quacc.logging import Logger
|
||||
|
||||
pd.set_option("display.float_format", "{:.4f}".format)
|
||||
qp.environ["SAMPLE_SIZE"] = env.SAMPLE_SIZE
|
||||
log = Logger.logger()
|
||||
|
||||
|
||||
class CompEstimator:
|
||||
|
@ -40,45 +40,9 @@ class CompEstimator:
|
|||
CE = CompEstimator
|
||||
|
||||
|
||||
def fit_and_estimate(_estimate, train, validation, test, _env=None, q=None):
|
||||
_env = env if _env is None else _env
|
||||
SubLogger.setup(q)
|
||||
log = SubLogger.logger()
|
||||
|
||||
model = LogisticRegression()
|
||||
|
||||
model.fit(*train.Xy)
|
||||
protocol = APP(
|
||||
test,
|
||||
n_prevalences=_env.PROTOCOL_N_PREVS,
|
||||
repeats=_env.PROTOCOL_REPEATS,
|
||||
return_type="labelled_collection",
|
||||
)
|
||||
start = time.time()
|
||||
try:
|
||||
result = _estimate(model, validation, protocol)
|
||||
except Exception as e:
|
||||
log.error(f"Method {_estimate.__name__} failed. Exception: {e}")
|
||||
return {
|
||||
"name": _estimate.__name__,
|
||||
"result": None,
|
||||
"time": 0,
|
||||
}
|
||||
|
||||
end = time.time()
|
||||
log.info(f"{_estimate.__name__} finished [took {end-start:.4f}s]")
|
||||
|
||||
return {
|
||||
"name": _estimate.__name__,
|
||||
"result": result,
|
||||
"time": end - start,
|
||||
}
|
||||
|
||||
|
||||
def evaluate_comparison(
|
||||
dataset: Dataset, estimators=["OUR_BIN_SLD", "OUR_MUL_SLD"]
|
||||
) -> EvaluationReport:
|
||||
log = Logger.logger()
|
||||
# with multiprocessing.Pool(1) as pool:
|
||||
with multiprocessing.Pool(len(estimators)) as pool:
|
||||
dr = DatasetReport(dataset.name)
|
||||
|
@ -90,9 +54,7 @@ def evaluate_comparison(
|
|||
tstart = time.time()
|
||||
tasks = [(estim, d.train, d.validation, d.test) for estim in CE[estimators]]
|
||||
results = [
|
||||
pool.apply_async(
|
||||
fit_and_estimate, t, {"_env": env, "q": Logger.queue()}
|
||||
)
|
||||
pool.apply_async(estimate_worker, t, {"_env": env, "q": Logger.queue()})
|
||||
for t in tasks
|
||||
]
|
||||
|
||||
|
@ -103,7 +65,7 @@ def evaluate_comparison(
|
|||
if r["result"] is not None:
|
||||
results_got.append(r)
|
||||
except Exception as e:
|
||||
log.error(
|
||||
log.warning(
|
||||
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
|
||||
)
|
||||
|
||||
|
@ -111,14 +73,21 @@ def evaluate_comparison(
|
|||
times = {r["name"]: r["time"] for r in results_got}
|
||||
times["tot"] = tend - tstart
|
||||
log.info(
|
||||
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} finished [took {times['tot']:.4f}s"
|
||||
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} finished [took {times['tot']:.4f}s]"
|
||||
)
|
||||
dr += CompReport(
|
||||
[r["result"] for r in results_got],
|
||||
name=dataset.name,
|
||||
train_prev=d.train_prev,
|
||||
valid_prev=d.validation_prev,
|
||||
times=times,
|
||||
)
|
||||
|
||||
try:
|
||||
cr = CompReport(
|
||||
[r["result"] for r in results_got],
|
||||
name=dataset.name,
|
||||
train_prev=d.train_prev,
|
||||
valid_prev=d.validation_prev,
|
||||
times=times,
|
||||
)
|
||||
except Exception as e:
|
||||
log.warning(
|
||||
f"Dataset sample {d.train_prev[1]:.2f} of dataset {dataset.name} failed. Exception: {e}"
|
||||
)
|
||||
traceback(e)
|
||||
cr = None
|
||||
dr += cr
|
||||
return dr
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
import time
|
||||
|
||||
import quapy as qp
|
||||
from quapy.protocol import APP
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
from quacc.logging import SubLogger
|
||||
|
||||
|
||||
def estimate_worker(_estimate, train, validation, test, _env=None, q=None):
|
||||
qp.environ["SAMPLE_SIZE"] = _env.SAMPLE_SIZE
|
||||
SubLogger.setup(q)
|
||||
log = SubLogger.logger()
|
||||
|
||||
model = LogisticRegression()
|
||||
|
||||
model.fit(*train.Xy)
|
||||
protocol = APP(
|
||||
test,
|
||||
n_prevalences=_env.PROTOCOL_N_PREVS,
|
||||
repeats=_env.PROTOCOL_REPEATS,
|
||||
return_type="labelled_collection",
|
||||
)
|
||||
start = time.time()
|
||||
try:
|
||||
result = _estimate(model, validation, protocol)
|
||||
except Exception as e:
|
||||
log.warning(f"Method {_estimate.__name__} failed. Exception: {e}")
|
||||
return {
|
||||
"name": _estimate.__name__,
|
||||
"result": None,
|
||||
"time": 0,
|
||||
}
|
||||
|
||||
end = time.time()
|
||||
log.info(f"{_estimate.__name__} finished [took {end-start:.4f}s]")
|
||||
|
||||
return {
|
||||
"name": _estimate.__name__,
|
||||
"result": result,
|
||||
"time": end - start,
|
||||
}
|
|
@ -7,6 +7,7 @@ import threading
|
|||
class Logger:
|
||||
__logger_file = "quacc.log"
|
||||
__logger_name = "queue_logger"
|
||||
__manager = None
|
||||
__queue = None
|
||||
__thread = None
|
||||
__setup = False
|
||||
|
@ -17,7 +18,7 @@ class Logger:
|
|||
record = q.get()
|
||||
if record is None:
|
||||
break
|
||||
root = logging.getLogger()
|
||||
root = logging.getLogger("listener")
|
||||
root.handle(record)
|
||||
|
||||
@classmethod
|
||||
|
@ -26,13 +27,19 @@ class Logger:
|
|||
return
|
||||
|
||||
# setup root
|
||||
root = logging.getLogger()
|
||||
root = logging.getLogger("listener")
|
||||
root.setLevel(logging.DEBUG)
|
||||
rh = logging.FileHandler(cls.__logger_file, mode="a")
|
||||
rh.setLevel(logging.DEBUG)
|
||||
root.addHandler(rh)
|
||||
root.info("-" * 100)
|
||||
|
||||
# setup logger
|
||||
if cls.__manager is None:
|
||||
cls.__manager = multiprocessing.Manager()
|
||||
|
||||
if cls.__queue is None:
|
||||
cls.__queue = multiprocessing.Queue()
|
||||
cls.__queue = cls.__manager.Queue()
|
||||
|
||||
logger = logging.getLogger(cls.__logger_name)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
@ -70,9 +77,11 @@ class Logger:
|
|||
return logging.getLogger(cls.__logger_name)
|
||||
|
||||
@classmethod
|
||||
def join_listener(cls):
|
||||
def close(cls):
|
||||
if cls.__setup and cls.__thread is not None:
|
||||
cls.__queue.put(None)
|
||||
cls.__thread.join()
|
||||
# cls.__manager.close()
|
||||
|
||||
|
||||
class SubLogger:
|
||||
|
@ -88,7 +97,9 @@ class SubLogger:
|
|||
|
||||
# setup root
|
||||
root = logging.getLogger()
|
||||
root.setLevel(logging.DEBUG)
|
||||
rh = logging.handlers.QueueHandler(q)
|
||||
rh.setLevel(logging.DEBUG)
|
||||
rh.setFormatter(
|
||||
logging.Formatter(
|
||||
fmt="%(asctime)s| %(levelname)s: %(message)s",
|
|
@ -4,7 +4,7 @@ from sys import platform
|
|||
import quacc.evaluation.comp as comp
|
||||
from quacc.dataset import Dataset
|
||||
from quacc.environment import env
|
||||
from quacc.logger import Logger
|
||||
from quacc.logging import Logger
|
||||
from quacc.utils import create_dataser_dir
|
||||
|
||||
log = Logger.logger()
|
||||
|
@ -24,6 +24,7 @@ def estimate_comparison():
|
|||
env.DATASET_NAME,
|
||||
target=env.DATASET_TARGET,
|
||||
n_prevalences=env.DATASET_N_PREVS,
|
||||
prevs=env.DATASET_PREVS,
|
||||
)
|
||||
try:
|
||||
dr = comp.evaluate_comparison(dataset, estimators=env.COMP_ESTIMATORS)
|
||||
|
@ -48,9 +49,14 @@ def estimate_comparison():
|
|||
|
||||
|
||||
def main():
|
||||
estimate_comparison()
|
||||
try:
|
||||
estimate_comparison()
|
||||
except Exception as e:
|
||||
log.error(f"estimate comparison failed. Exceprion: {e}")
|
||||
traceback(e)
|
||||
|
||||
toast()
|
||||
Logger.join_listener()
|
||||
Logger.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Reference in New Issue