-
Notifications
You must be signed in to change notification settings - Fork 271
/
Copy pathresnet50_general_inference_script.py
68 lines (48 loc) · 1.75 KB
/
resnet50_general_inference_script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import torch
import torchvision.models as models
def inference(model, data):
with torch.no_grad():
# warm up
for _ in range(100):
model(data)
# measure
import time
start = time.time()
for _ in range(100):
output = model(data)
end = time.time()
print("Inference took {:.2f} ms in average".format((end - start) / 100 * 1000))
def main(args):
model = models.resnet50(weights="ResNet50_Weights.DEFAULT")
model.eval()
data = torch.rand(128, 3, 224, 224)
import intel_extension_for_pytorch as ipex
model = model.to(memory_format=torch.channels_last)
data = data.to(memory_format=torch.channels_last)
if args.dtype == "float32":
model = ipex.optimize(model, dtype=torch.float32)
elif args.dtype == "bfloat16":
model = ipex.optimize(model, dtype=torch.bfloat16)
else: # int8
from intel_extension_for_pytorch.quantization import prepare, convert
qconfig = ipex.quantization.default_static_qconfig_mapping
model = prepare(model, qconfig, example_inputs=data, inplace=False)
# calibration
n_iter = 100
with torch.no_grad():
for i in range(n_iter):
model(data)
model = convert(model)
with torch.cpu.amp.autocast(enabled=args.dtype == "bfloat16"):
with torch.no_grad():
model = torch.jit.trace(model, data)
model = torch.jit.freeze(model)
inference(model, data)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"--dtype", default="float32", choices=["float32", "bfloat16", "int8"]
)
main(parser.parse_args())
print("Execution finished")