Skip to content

Commit fd282ef

Browse files
committed
Windows Build Support
Adds windows build support via pytorch cuda build extensions. Performance was not evaluated, but the results appear reasonable.
1 parent ba50a82 commit fd282ef

File tree

4 files changed

+135
-2
lines changed

4 files changed

+135
-2
lines changed

lib/build_windows.bat

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
cd nms
2+
python setup_windows.py clean build_ext --inplace
3+
cd ../../

lib/nms/gpu_nms.hpp

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,9 @@
1-
void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
1+
#if defined(__linux__)
2+
#define NMS_TYPE int
3+
#endif
4+
#if defined(_WIN64)
5+
#define NMS_TYPE long
6+
#endif
7+
8+
void _nms(NMS_TYPE* keep_out, int* num_out, const float* boxes_host, int boxes_num,
29
int boxes_dim, float nms_overlap_thresh, int device_id);

lib/nms/nms_kernel.cu

+8-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@
88
#include <vector>
99
#include <iostream>
1010

11+
#if defined(__linux__)
12+
#define NMS_TYPE int
13+
#endif
14+
#if defined(_WIN64)
15+
#define NMS_TYPE long
16+
#endif
17+
1118
#define CUDA_CHECK(condition) \
1219
/* Code block avoids redefinition of cudaError_t error */ \
1320
do { \
@@ -87,7 +94,7 @@ void _set_device(int device_id) {
8794
CUDA_CHECK(cudaSetDevice(device_id));
8895
}
8996

90-
void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
97+
void _nms(NMS_TYPE* keep_out, int* num_out, const float* boxes_host, int boxes_num,
9198
int boxes_dim, float nms_overlap_thresh, int device_id) {
9299
_set_device(device_id);
93100

lib/nms/setup_windows.py

+116
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# ------------------------------------------------------------------
2+
# Copyright (c) Nvidia
3+
# Licensed under BSD 3-Clause "New" or "Revised" License
4+
# Modified from Apex (https://github.com/NVIDIA/apex/)
5+
# ------------------------------------------------------------------
6+
7+
import torch
8+
from setuptools import setup, find_packages
9+
import subprocess
10+
from distutils.extension import Extension
11+
12+
import sys
13+
import warnings
14+
import os
15+
import numpy as np
16+
17+
# ninja build does not work unless include_dirs are abs path
18+
this_dir = os.path.dirname(os.path.abspath(__file__))
19+
20+
# Obtain the numpy include directory. This logic works across numpy versions.
21+
try:
22+
numpy_include = np.get_include()
23+
except AttributeError:
24+
numpy_include = np.get_numpy_include()
25+
26+
if not torch.cuda.is_available():
27+
# https://github.com/NVIDIA/apex/issues/486
28+
# Extension builds after https://github.com/pytorch/pytorch/pull/23408 attempt to query torch.cuda.get_device_capability(),
29+
# which will fail if you are compiling in an environment without visible GPUs (e.g. during an nvidia-docker build command).
30+
print('\nWarning: Torch did not find available GPUs on this system.\n',
31+
'If your intention is to cross-compile, this is not an error.\n'
32+
'Volta (compute capability 7.0), and Turing (compute capability 7.5).\n'
33+
'If you wish to cross-compile for a single specific architecture,\n'
34+
'export TORCH_CUDA_ARCH_LIST="compute capability" before running setup.py.\n')
35+
if os.environ.get("TORCH_CUDA_ARCH_LIST", None) is None:
36+
os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5"
37+
38+
print("torch.__version__ = ", torch.__version__)
39+
TORCH_MAJOR = int(torch.__version__.split('.')[0])
40+
TORCH_MINOR = int(torch.__version__.split('.')[1])
41+
42+
cmdclass = {}
43+
ext_modules = []
44+
extras = {}
45+
46+
from torch.utils.cpp_extension import BuildExtension
47+
from torch.utils.cpp_extension import CUDAExtension
48+
cmdclass['build_ext'] = BuildExtension
49+
50+
def check_cuda_torch_binary_vs_bare_metal(cuda_dir):
51+
raw_output = subprocess.check_output([cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True)
52+
output = raw_output.split()
53+
release_idx = output.index("release") + 1
54+
release = output[release_idx].split(".")
55+
bare_metal_major = release[0]
56+
bare_metal_minor = release[1][0]
57+
torch_binary_major = torch.version.cuda.split(".")[0]
58+
torch_binary_minor = torch.version.cuda.split(".")[1]
59+
60+
print("\nCompiling cuda extensions with")
61+
print(raw_output + "from " + cuda_dir + "/bin\n")
62+
63+
if (bare_metal_major != torch_binary_major) or (bare_metal_minor != torch_binary_minor):
64+
raise RuntimeError("Cuda extensions are being compiled with a version of Cuda that does " +
65+
"not match the version used to compile Pytorch binaries. " +
66+
"Pytorch binaries were compiled with Cuda {}.\n".format(torch.version.cuda) +
67+
"In some cases, a minor-version mismatch will not cause later errors")
68+
69+
# Set up macros for forward/backward compatibility hack around
70+
# https://github.com/pytorch/pytorch/commit/4404762d7dd955383acee92e6f06b48144a0742e
71+
# and
72+
# https://github.com/NVIDIA/apex/issues/456
73+
# https://github.com/pytorch/pytorch/commit/eb7b39e02f7d75c26d8a795ea8c7fd911334da7e#diff-4632522f237f1e4e728cb824300403ac
74+
version_ge_1_1 = []
75+
if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 0):
76+
version_ge_1_1 = ['-DVERSION_GE_1_1']
77+
version_ge_1_3 = []
78+
if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 2):
79+
version_ge_1_3 = ['-DVERSION_GE_1_3']
80+
version_ge_1_5 = []
81+
if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 4):
82+
version_ge_1_5 = ['-DVERSION_GE_1_5']
83+
version_dependent_macros = version_ge_1_1 + version_ge_1_3 + version_ge_1_5
84+
85+
check_cuda_torch_binary_vs_bare_metal(torch.utils.cpp_extension.CUDA_HOME)
86+
87+
ext_modules.append(
88+
Extension(
89+
"cpu_nms",
90+
["cpu_nms.pyx"],
91+
extra_compile_args={'cxx': ['/MD']},
92+
include_dirs = [numpy_include]
93+
),
94+
)
95+
96+
ext_modules.append(
97+
CUDAExtension(name='gpu_nms',
98+
sources=['nms_kernel.cu', 'gpu_nms.pyx'],
99+
include_dirs = [numpy_include],
100+
extra_compile_args={'cxx': ['-O3',] + version_dependent_macros,
101+
'nvcc':['-O3',
102+
'-gencode', 'arch=compute_70,code=sm_70',
103+
'-U__CUDA_NO_HALF_OPERATORS__',
104+
'-U__CUDA_NO_HALF_CONVERSIONS__',
105+
'--expt-relaxed-constexpr',
106+
'--expt-extended-lambda',
107+
'--use_fast_math'] + version_dependent_macros}))
108+
109+
setup(
110+
name='nms',
111+
version='0.1',
112+
description='',
113+
ext_modules=ext_modules,
114+
cmdclass=cmdclass,
115+
extras_require=extras,
116+
)

0 commit comments

Comments
 (0)