From 104e21ea16d2e018018560d0a4b2074564c48a1b Mon Sep 17 00:00:00 2001 From: michael Date: Mon, 11 May 2020 14:22:19 +0200 Subject: [PATCH 1/4] xxx --- lib/nms/setup_windows.py | 168 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 lib/nms/setup_windows.py diff --git a/lib/nms/setup_windows.py b/lib/nms/setup_windows.py new file mode 100644 index 00000000..e24858a8 --- /dev/null +++ b/lib/nms/setup_windows.py @@ -0,0 +1,168 @@ +# -------------------------------------------------------- +# Pose.gluon +# Copyright (c) 2018-present Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) +# -------------------------------------------------------- + +import os +from os.path import join as pjoin +from setuptools import setup +from distutils.extension import Extension +from Cython.Distutils import build_ext +import numpy as np +from shutil import which + + +def find_in_path(name, path): + "Find a file in a search path" + # Adapted fom + # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ + for dir in path.split(os.pathsep): + binpath = pjoin(dir, name) + if os.path.exists(binpath): + return os.path.abspath(binpath) + return None + + +def locate_cuda(): + """Locate the CUDA environment on the system + Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' + and values giving the absolute path to each directory. + Starts by looking for the CUDA_PATH env variable. If not found, everything + is based on finding 'nvcc.exe' in the PATH. + """ + + # CUDA_PATH C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2 + if 'CUDA_PATH1' in os.environ: + home = os.environ['CUDA_PATH'] + nvcc = pjoin(home, 'bin', 'nvcc.exe') + else: + nvcc = which('nvcc.exe') + if nvcc is None: + raise EnvironmentError('The nvcc binary could not be ' + 'located in your $PATH. Either add it to your path, or set $CUDA_PATH') + home = os.path.dirname(os.path.dirname(nvcc)) + + cudaconfig = {'home':home, 'nvcc':nvcc, + 'include': pjoin(home, 'include'), + 'lib64': pjoin(home, 'lib', 'x64')} + for k, v in cudaconfig.items(): + if not os.path.exists(v): + raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) + + return cudaconfig +CUDA = locate_cuda() + + +# Obtain the numpy include directory. This logic works across numpy versions. +try: + numpy_include = np.get_include() +except AttributeError: + numpy_include = np.get_numpy_include() + + +def customize_compiler_for_nvcc(self): + """inject deep into distutils to customize how the dispatch + to gcc/nvcc works. + If you subclass UnixCCompiler, it's not trivial to get your subclass + injected in, and still have the right customizations (i.e. + distutils.sysconfig.customize_compiler) run on it. So instead of going + the OO route, I have this. Note, it's kindof like a wierd functional + subclassing going on.""" + + #print(self.__class__.__dict__) + #print(self.src_extensions) + + # tell the compiler it can processes .cu + self.src_extensions.append('.cu') + #self.set_executable('compiler', CUDA['nvcc']) + + # save references to the default compiler_so and _comple methods + #default_compiler_so = self.compiler_so + super = self.compile + + def compile(sources, + output_dir=None, macros=None, include_dirs=None, debug=0, + extra_preargs=None, extra_postargs=None, depends=None): + sources_cpp = [] + for src in sources: + if os.path.splitext(src)[1] == '.cu': + # use the cuda for .cu files + args = [CUDA['nvcc']] + extra_postargs['nvcc'] + [src] + print(args) + if not self.initialized: + self.initialize() + compile_info = self._setup_compile(output_dir, macros, include_dirs, + sources, depends, extra_postargs) + macros, objects, extra_postargs, pp_opts, build = compile_info + self.spawn(args) + else: + sources_cpp.append(src) + + super(sources_cpp, + output_dir, macros, include_dirs, debug, + extra_preargs, extra_postargs['cl'], depends) + + + # now redefine the _compile method. This gets executed for each + # object but distutils doesn't have the ability to change compilers + # based on source extension: we add it. + def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): + if os.path.splitext(src)[1] == '.cu': + # use the cuda for .cu files + print("!", src) + self.set_executable('compiler', CUDA['nvcc']) + # use only a subset of the extra_postargs, which are 1-1 translated + # from the extra_compile_args in the Extension class + postargs = extra_postargs['nvcc'] + else: + postargs = extra_postargs['cl'] + + super(obj, src, ext, cc_args, postargs, pp_opts) + # reset the default compiler_so, which we might have changed for cuda + #self.compiler_so = default_compiler_so + + # inject our redefined _compile method into the class + self.compile = compile + + +# run the customize_compiler +class custom_build_ext(build_ext): + def build_extensions(self): + customize_compiler_for_nvcc(self.compiler) + build_ext.build_extensions(self) + + +ext_modules = [ + Extension( + "cpu_nms", + ["cpu_nms.pyx"], + extra_compile_args={'cl': []}, + include_dirs = [numpy_include] + ), + Extension('gpu_nms', + ['nms_kernel.cu', 'gpu_nms.pyx'], + library_dirs=[CUDA['lib64']], + libraries=['cudart'], + language='c++', + runtime_library_dirs=[CUDA['lib64']], + # this syntax is specific to this build system + # we're only going to use certain compiler args with nvcc and not with + # gcc the implementation of this trick is in customize_compiler() below + extra_compile_args={'cl': [], + 'nvcc': ['-arch=sm_35', + '--ptxas-options=-v', + '-c', + '--compiler-options', + "'-fPIC'"]}, + include_dirs = [numpy_include, CUDA['include']] + ), +] + +setup( + name='nms', + ext_modules=ext_modules, + # inject our custom trigger + cmdclass={'build_ext': custom_build_ext}, +) From fa67d88033b91dd0aaf04aa01955e3fde22c039a Mon Sep 17 00:00:00 2001 From: michael Date: Tue, 12 May 2020 00:26:34 +0200 Subject: [PATCH 2/4] build nms on Windows --- lib/nms/nms_kernel_externc.cu | 6 ++ lib/nms/setup_windows.py | 164 ++++++++++------------------------ 2 files changed, 53 insertions(+), 117 deletions(-) create mode 100644 lib/nms/nms_kernel_externc.cu diff --git a/lib/nms/nms_kernel_externc.cu b/lib/nms/nms_kernel_externc.cu new file mode 100644 index 00000000..ef6df492 --- /dev/null +++ b/lib/nms/nms_kernel_externc.cu @@ -0,0 +1,6 @@ +// +// by happy coincidence the first thing encountered in nms_kernel.cu is +// the declaration of _nms, so extern "C" will work on that declaration. +// +extern "C" +#include "nms_kernel.cu" diff --git a/lib/nms/setup_windows.py b/lib/nms/setup_windows.py index e24858a8..1efb2d28 100644 --- a/lib/nms/setup_windows.py +++ b/lib/nms/setup_windows.py @@ -4,27 +4,32 @@ # Licensed under The MIT License [see LICENSE for details] # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) # -------------------------------------------------------- +# +# Adapted for building the extensions on Windows. +# +# All this file needs to do is to build two .pyd files from 3 source files, +# so rather than shoe-horning nvcc into distutils.extension, just build the +# two extensions "ad hoc" +# import os from os.path import join as pjoin from setuptools import setup -from distutils.extension import Extension -from Cython.Distutils import build_ext +#from distutils.extension import Extension +#from Cython.Distutils import build_ext import numpy as np -from shutil import which +import shutil +import sysconfig +# -------------------------------------------------------- -def find_in_path(name, path): - "Find a file in a search path" - # Adapted fom - # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ - for dir in path.split(os.pathsep): - binpath = pjoin(dir, name) - if os.path.exists(binpath): - return os.path.abspath(binpath) - return None - +if shutil.which('cython.exe') is None: + raise RuntimeError('cython.exe must be in your path.') +if shutil.which('cl.exe') is None: + raise RuntimeError('Can\'t find "cl.exe". This script must be run inside a Visual Studio environment.') + + def locate_cuda(): """Locate the CUDA environment on the system Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' @@ -38,7 +43,7 @@ def locate_cuda(): home = os.environ['CUDA_PATH'] nvcc = pjoin(home, 'bin', 'nvcc.exe') else: - nvcc = which('nvcc.exe') + nvcc = shutil.which('nvcc.exe') if nvcc is None: raise EnvironmentError('The nvcc binary could not be ' 'located in your $PATH. Either add it to your path, or set $CUDA_PATH') @@ -52,6 +57,8 @@ def locate_cuda(): raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) return cudaconfig + + CUDA = locate_cuda() @@ -61,108 +68,31 @@ def locate_cuda(): except AttributeError: numpy_include = np.get_numpy_include() +syspaths = sysconfig.get_paths() +python_include = syspaths['include'] +python_lib = syspaths['stdlib'] + +# -------------------------------------------------------- +# +# build cpu_nms.pyd +# -def customize_compiler_for_nvcc(self): - """inject deep into distutils to customize how the dispatch - to gcc/nvcc works. - If you subclass UnixCCompiler, it's not trivial to get your subclass - injected in, and still have the right customizations (i.e. - distutils.sysconfig.customize_compiler) run on it. So instead of going - the OO route, I have this. Note, it's kindof like a wierd functional - subclassing going on.""" +command = "cython cpu_nms.pyx" +os.system(command) + +command = "cl /D_USRDLL /D_WINDLL /I{} /I{} cpu_nms.c /link /DLL /LIBPATH:{}s /OUT:cpu_nms.pyd".format(python_include,numpy_include, python_lib) +os.system(command) + +# -------------------------------------------------------- +# +# build gpu_nms.pyd +# +command = "cython gpu_nms.pyx" +os.system(command) + +command = '"{}" -c -arch=sm_35 --ptxas-options=-v -I{} -I{} nms_kernel_externc.cu'.format(CUDA['nvcc'], python_include, numpy_include) +os.system(command) + +command = 'cl /D_USRDLL /D_WINDLL /I{} /I{} gpu_nms.c nms_kernel_externc.obj "{}\cudart.lib" /link /DLL /LIBPATH:{}s /OUT:gpu_nms.pyd'.format(python_include, numpy_include, CUDA['lib64'], python_lib) +os.system(command) - #print(self.__class__.__dict__) - #print(self.src_extensions) - - # tell the compiler it can processes .cu - self.src_extensions.append('.cu') - #self.set_executable('compiler', CUDA['nvcc']) - - # save references to the default compiler_so and _comple methods - #default_compiler_so = self.compiler_so - super = self.compile - - def compile(sources, - output_dir=None, macros=None, include_dirs=None, debug=0, - extra_preargs=None, extra_postargs=None, depends=None): - sources_cpp = [] - for src in sources: - if os.path.splitext(src)[1] == '.cu': - # use the cuda for .cu files - args = [CUDA['nvcc']] + extra_postargs['nvcc'] + [src] - print(args) - if not self.initialized: - self.initialize() - compile_info = self._setup_compile(output_dir, macros, include_dirs, - sources, depends, extra_postargs) - macros, objects, extra_postargs, pp_opts, build = compile_info - self.spawn(args) - else: - sources_cpp.append(src) - - super(sources_cpp, - output_dir, macros, include_dirs, debug, - extra_preargs, extra_postargs['cl'], depends) - - - # now redefine the _compile method. This gets executed for each - # object but distutils doesn't have the ability to change compilers - # based on source extension: we add it. - def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): - if os.path.splitext(src)[1] == '.cu': - # use the cuda for .cu files - print("!", src) - self.set_executable('compiler', CUDA['nvcc']) - # use only a subset of the extra_postargs, which are 1-1 translated - # from the extra_compile_args in the Extension class - postargs = extra_postargs['nvcc'] - else: - postargs = extra_postargs['cl'] - - super(obj, src, ext, cc_args, postargs, pp_opts) - # reset the default compiler_so, which we might have changed for cuda - #self.compiler_so = default_compiler_so - - # inject our redefined _compile method into the class - self.compile = compile - - -# run the customize_compiler -class custom_build_ext(build_ext): - def build_extensions(self): - customize_compiler_for_nvcc(self.compiler) - build_ext.build_extensions(self) - - -ext_modules = [ - Extension( - "cpu_nms", - ["cpu_nms.pyx"], - extra_compile_args={'cl': []}, - include_dirs = [numpy_include] - ), - Extension('gpu_nms', - ['nms_kernel.cu', 'gpu_nms.pyx'], - library_dirs=[CUDA['lib64']], - libraries=['cudart'], - language='c++', - runtime_library_dirs=[CUDA['lib64']], - # this syntax is specific to this build system - # we're only going to use certain compiler args with nvcc and not with - # gcc the implementation of this trick is in customize_compiler() below - extra_compile_args={'cl': [], - 'nvcc': ['-arch=sm_35', - '--ptxas-options=-v', - '-c', - '--compiler-options', - "'-fPIC'"]}, - include_dirs = [numpy_include, CUDA['include']] - ), -] - -setup( - name='nms', - ext_modules=ext_modules, - # inject our custom trigger - cmdclass={'build_ext': custom_build_ext}, -) From 66a0483db3a7e497ab00192d25e67f472e48bd48 Mon Sep 17 00:00:00 2001 From: zmic Date: Tue, 12 May 2020 14:50:40 +0200 Subject: [PATCH 3/4] Update README.md --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index d984f854..8f0e56eb 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,13 @@ +# How to build on Windows + +- Install Visual Studio 2017 or 2019 with C++ options. +- Open "x64 Native Tools Command Prompt for VS 201x" +- cd lib\nms +- python build_windows.py + + +# Original readme: + # Deep High-Resolution Representation Learning for Human Pose Estimation (CVPR 2019) ## News - [2020/03/13] A longer version is accepted by TPAMI: [Deep High-Resolution Representation Learning for Visual Recognition](https://arxiv.org/pdf/1908.07919.pdf). It includes more HRNet applications, and the codes are available: [semantic segmentation](https://github.com/HRNet/HRNet-Semantic-Segmentation), [objection detection](https://github.com/HRNet/HRNet-Object-Detection), [facial landmark detection](https://github.com/HRNet/HRNet-Facial-Landmark-Detection), and [image classification](https://github.com/HRNet/HRNet-Image-Classification). From 10946163545a624da4455e0410ea612c7699c70d Mon Sep 17 00:00:00 2001 From: zmic <1315889+zmic@users.noreply.github.com> Date: Tue, 12 May 2020 18:46:32 +0200 Subject: [PATCH 4/4] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 8f0e56eb..b3ef802b 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,9 @@ - cd lib\nms - python build_windows.py +# cocoapi +- pip install pycocoapi-windows + # Original readme: