CrossPred-LVLM/active_eval.py at main · Qinyu-Allen-Zhao/CrossPred-LVLM · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import argparse
import numpy as np

import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist

from method.matrix import MatrixManager
from method.active_selector import RandomSelector, ActiveSelector, WorstSelector
from method.pmf import PMF
from utils.metric import recomm_eval, rmse

parser = argparse.ArgumentParser()
parser.add_argument("--method", type=str, default="random", help="random or active")
parser.add_argument("--seed", type=int, default=0, help="random seed")
args = parser.parse_args()

np.random.seed(args.seed)

if args.method == "random":
    selector = RandomSelector() # Random baseline
elif args.method == "active":
    selector = ActiveSelector() # Active learning
elif args.method == 'worst':
    selector = WorstSelector() # Worst case scenario

# Load data
manager = MatrixManager()
train, test, mu, sigma = manager.load_data_for_pmf(percent_test=0.8)

num_samples = (~np.isnan(train)).sum() + (~np.isnan(test)).sum()

print("Train: ", train.shape)
print("Test: ", test.shape)
print("Num samples: ", num_samples)
print("Num samples in test: ", (~np.isnan(test)).sum())

model = PMF(train, dim=10, alpha=2, std=0.05)
model.draw_samples(draws=100, tune=500)
mcmc_pred, results = model.running_rmse(test, train, plot=False)
original_rmse, original_mae, original_r2 = recomm_eval(mcmc_pred, test, mu, sigma, "PMF Original")
print(f"Original RMSE: {original_rmse:.4f}")
print(f"Original MAE: {original_mae:.4f}")
print(f"Original R2: {original_r2:.4f}")

for select_ratio in [0.05, 0.05, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]:
    # Select what to evaluate next
    selected = selector.select(model, train, test, mu, sigma, num_samples, select_ratio)
    print("Selected: ", len(selected))

    # Transfer random sample from test set to train set.
    for idx in selected:
        train[idx] = test[idx]  # transfer to train set

    model = PMF(train, dim=10, alpha=2, std=0.05)
    model.draw_samples(draws=100, tune=500)
    mcmc_pred, results = model.running_rmse(test, train, plot=False)
    mcmc_pred[~np.isnan(train)] = train[~np.isnan(train)]  # We know the actual value

    new_rmse, new_mae, new_r2 = recomm_eval(mcmc_pred, test, mu, sigma, "PMF Update")

    print(f"RMSE: {new_rmse:.4f}")
    print(f"Improvement: {original_rmse - new_rmse:.4f}")
    print(f"Improvement (%): {(original_rmse - new_rmse) / original_rmse * 100:.2f}%")

    print(f"MAE: {new_mae:.4f}")
    print(f"Improvement: {original_mae - new_mae:.4f}")
    print(f"Improvement (%): {(original_mae - new_mae) / original_mae * 100:.2f}%")

    print(f"R2: {new_r2:.4f}")
    print(f"Improvement: {new_r2 - original_r2:.4f}")
    print(f"Improvement (%): {(new_r2 - original_r2) / original_r2 * 100:.2f}%")