-
Notifications
You must be signed in to change notification settings - Fork 311
Expand file tree
/
Copy pathevaluation.py
More file actions
167 lines (137 loc) · 5.59 KB
/
evaluation.py
File metadata and controls
167 lines (137 loc) · 5.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module to evaluate a device model for the specified task."""
import random
from typing import Final
import torch
from datasets import load_dataset
from tqdm import tqdm
from modelopt.torch._deploy.device_model import DeviceModel
ACCURACY: Final[str] = "accuracy"
class ImageNetWrapper(torch.utils.data.Dataset):
"""Wrapper for the ILSVRC/imagenet-1k Hugging Face dataset."""
def __init__(self, hf_dataset, transform=None):
"""Initialize the wrapper.
Args:
hf_dataset: The Hugging Face dataset object.
transform: Optional transform to apply to images.
"""
self.dataset = hf_dataset
self.transform = transform
def __len__(self):
return len(self.dataset)
def __getitem__(self, idx):
item = self.dataset[idx]
image = item["image"]
# Convert to RGB if needed
if image.mode != "RGB":
image = image.convert("RGB")
if self.transform:
image = self.transform(image)
label = item["label"]
return image, label
def evaluate(
model: torch.nn.Module | DeviceModel,
transform,
evaluation_type: str = ACCURACY,
batch_size=1,
num_examples=None,
device="cuda",
dataset_path="ILSVRC/imagenet-1k",
):
"""Evaluate a model for the given dataset.
Args:
model: PyTorch model or DeviceModel to evaluate.
transform: Transform to apply to the dataset images.
evaluation_type: Type of evaluation to perform. Currently only accuracy is supported.
batch_size: Batch size to use for evaluation. Currently only batch_size=1 is supported.
num_examples: Number of examples to evaluate on. If None, evaluate on the entire dataset.
device: Device to run evaluation on. Supported devices: "cpu" and "cuda". Defaults to "cuda".
dataset_path: HF dataset card or local path to the imagenet dataset. Defaults to "ILSVRC/imagenet-1k".
Returns:
The evaluation result.
"""
# Load imagenet-1k from Hugging Face
dataset = load_dataset(
dataset_path,
split="validation",
data_files={
"validation": "data/validation*",
},
verification_mode="no_checks",
)
val_dataset = ImageNetWrapper(dataset, transform=transform)
val_loader = torch.utils.data.DataLoader(
val_dataset, batch_size=batch_size, shuffle=True, num_workers=4
)
# TODO: Add support for segmentation tasks.
if evaluation_type == ACCURACY:
return evaluate_accuracy(
model, val_loader, num_examples, batch_size, topk=(1, 5), device=device
)
else:
raise ValueError(f"Unsupported evaluation type: {evaluation_type}")
def evaluate_accuracy(
model, val_loader, num_examples, batch_size, topk=(1,), random_seed=None, device="cuda"
):
"""Evaluate the accuracy of the model on the validation dataset.
Args:
model: Model to evaluate.
val_loader: DataLoader for the validation dataset.
num_examples: Number of examples to evaluate on. If None, evaluate on the entire dataset.
batch_size: Batch size to use for evaluation.
topk: function support topk accuracy. Return list of accuracy equal to topk length.
example of usage `top1, top5 = evaluate_accuracy(..., topk=(1,5))`
`top1, top5, top10 = evaluate_accuracy(..., topk=(1,5,10))`
random_seed: Random seed to use for evaluation.
device: Device to run evaluation on. Supported devices: "cpu" and "cuda". Defaults to "cuda".
Returns:
The accuracy of the model on the validation dataset.
"""
if random_seed:
torch.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
random.seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
if isinstance(model, torch.nn.Module):
model.eval()
model = model.to(device)
total = 0
corrects = [0] * len(topk)
for _, (inputs, labels) in tqdm(
enumerate(val_loader),
total=num_examples // batch_size if num_examples is not None else len(val_loader),
desc="Evaluation progress: ",
):
if num_examples is not None and total >= num_examples:
break
# Forward pass
if not isinstance(model, torch.nn.Module):
inputs = [inputs]
else:
inputs = inputs.to(device)
outputs = model(inputs)
# Calculate accuracy
outputs = outputs[0] if isinstance(outputs, list) else outputs.data
labels_size = labels.size(0)
outputs = outputs[:labels_size]
total += labels_size
labels = labels.to(outputs.device)
for ind, k in enumerate(topk):
_, predicted = torch.topk(outputs, k, dim=1)
corrects[ind] += (predicted == labels.unsqueeze(1)).any(dim=1).sum().item()
res = [100 * corr / total for corr in corrects]
return res