-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstudy5.py
More file actions
executable file
·66 lines (56 loc) · 1.98 KB
/
study5.py
File metadata and controls
executable file
·66 lines (56 loc) · 1.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import numpy as np
from tqdm import tqdm
from pathlib import Path
# local imports
from evaluate import Evaluator
N_SAMPLE = 1000
N_ITER = 500
BALANCED = [0.1, 0.3, 0.5, 0.7, 0.9] # portion of positive cases
CONF = [0.1, 0.3, 0.5, 0.7, 0.9] # confidence level
SEED = 24061
PATH_OUT = Path(__file__).resolve().parent / "out" / "study5_n.csv"
BETA_POS_A = 5
BETA_POS_B = 2
BETA_NEG_A = 4
BETA_NEG_B = 3
def main():
dict_eval = init_evaluators()
for i in tqdm(range(N_ITER), desc="Iteration"):
for b in BALANCED:
y_obs, y_pre = generate_data(b)
for c in CONF:
name = f"pos_{int(b * 10)}-conf_{int(c * 10)}"
dict_eval[name].log(y_obs, y_pre, conf=c)
save_results(dict_eval)
def init_evaluators():
dict_eval = dict()
for b in BALANCED:
for c in CONF:
name = f"pos_{int(b * 10)}-conf_{int(c * 10)}"
dict_eval[name] = Evaluator("classification")
return dict_eval
def generate_data(pos):
y_obs = np.random.choice([0, 1], size=N_SAMPLE, p=[1 - pos, pos])
y_pre = np.zeros(N_SAMPLE)
for i, y in enumerate(y_obs):
# beta distribution
if y == 1:
# when the observation is positive
y_pre[i] = np.random.beta(BETA_POS_A, BETA_POS_B)
else:
# when the observation is negative
y_pre[i] = np.random.beta(BETA_NEG_A, BETA_NEG_B)
return y_obs, y_pre
def save_results(dict_eval):
for key in dict_eval.keys():
df = dict_eval[key].summary()
df["trial"] = key
df = df.pivot(index="trial", columns="metric", values="mean").reset_index()
df["pos"] = df["trial"].apply(lambda x: x.split("-")[0].split("_")[1])
df["conf"] = df["trial"].apply(lambda x: x.split("-")[1].split("_")[1])
if PATH_OUT.exists():
df.to_csv(PATH_OUT, index=False, header=False, mode='a')
else:
df.to_csv(PATH_OUT, index=False)
if __name__ == "__main__":
main()