From 104e21ea16d2e018018560d0a4b2074564c48a1b Mon Sep 17 00:00:00 2001
From: michael <michael.vanslembrouck@gmail.com>
Date: Mon, 11 May 2020 14:22:19 +0200
Subject: [PATCH 1/4] xxx

---
 lib/nms/setup_windows.py | 168 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 168 insertions(+)
 create mode 100644 lib/nms/setup_windows.py

diff --git a/lib/nms/setup_windows.py b/lib/nms/setup_windows.py
new file mode 100644
index 00000000..e24858a8
--- /dev/null
+++ b/lib/nms/setup_windows.py
@@ -0,0 +1,168 @@
+# --------------------------------------------------------
+# Pose.gluon
+# Copyright (c) 2018-present Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
+# --------------------------------------------------------
+
+import os
+from os.path import join as pjoin
+from setuptools import setup
+from distutils.extension import Extension
+from Cython.Distutils import build_ext
+import numpy as np
+from shutil import which
+
+
+def find_in_path(name, path):
+    "Find a file in a search path"
+    # Adapted fom
+    # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
+    for dir in path.split(os.pathsep):
+        binpath = pjoin(dir, name)
+        if os.path.exists(binpath):
+            return os.path.abspath(binpath)
+    return None
+
+
+def locate_cuda():
+    """Locate the CUDA environment on the system
+    Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
+    and values giving the absolute path to each directory.
+    Starts by looking for the CUDA_PATH env variable. If not found, everything
+    is based on finding 'nvcc.exe' in the PATH.    
+    """
+
+    # CUDA_PATH  C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2
+    if 'CUDA_PATH1' in os.environ:
+        home = os.environ['CUDA_PATH']
+        nvcc = pjoin(home, 'bin', 'nvcc.exe')
+    else:
+        nvcc = which('nvcc.exe')
+        if nvcc is None:
+            raise EnvironmentError('The nvcc binary could not be '
+                'located in your $PATH. Either add it to your path, or set $CUDA_PATH')
+        home = os.path.dirname(os.path.dirname(nvcc))
+    
+    cudaconfig = {'home':home, 'nvcc':nvcc,
+                  'include': pjoin(home, 'include'),
+                  'lib64': pjoin(home, 'lib', 'x64')}
+    for k, v in cudaconfig.items():
+        if not os.path.exists(v):
+            raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
+
+    return cudaconfig
+CUDA = locate_cuda()
+
+
+# Obtain the numpy include directory.  This logic works across numpy versions.
+try:
+    numpy_include = np.get_include()
+except AttributeError:
+    numpy_include = np.get_numpy_include()
+
+
+def customize_compiler_for_nvcc(self):
+    """inject deep into distutils to customize how the dispatch
+    to gcc/nvcc works.
+    If you subclass UnixCCompiler, it's not trivial to get your subclass
+    injected in, and still have the right customizations (i.e.
+    distutils.sysconfig.customize_compiler) run on it. So instead of going
+    the OO route, I have this. Note, it's kindof like a wierd functional
+    subclassing going on."""
+
+    #print(self.__class__.__dict__)
+    #print(self.src_extensions)
+    
+    # tell the compiler it can processes .cu
+    self.src_extensions.append('.cu')
+    #self.set_executable('compiler', CUDA['nvcc'])
+
+    # save references to the default compiler_so and _comple methods
+    #default_compiler_so = self.compiler_so
+    super = self.compile
+
+    def compile(sources,
+                output_dir=None, macros=None, include_dirs=None, debug=0,
+                extra_preargs=None, extra_postargs=None, depends=None):
+        sources_cpp = []
+        for src in sources:
+            if os.path.splitext(src)[1] == '.cu':
+                # use the cuda for .cu files
+                args = [CUDA['nvcc']] + extra_postargs['nvcc'] + [src]
+                print(args)
+                if not self.initialized:
+                    self.initialize()
+                compile_info = self._setup_compile(output_dir, macros, include_dirs,
+                                           sources, depends, extra_postargs)
+                macros, objects, extra_postargs, pp_opts, build = compile_info                    
+                self.spawn(args)
+            else:
+                sources_cpp.append(src)
+        
+        super(sources_cpp,
+              output_dir, macros, include_dirs, debug,
+              extra_preargs, extra_postargs['cl'], depends)
+
+
+    # now redefine the _compile method. This gets executed for each
+    # object but distutils doesn't have the ability to change compilers
+    # based on source extension: we add it.
+    def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
+        if os.path.splitext(src)[1] == '.cu':
+            # use the cuda for .cu files
+            print("!", src)
+            self.set_executable('compiler', CUDA['nvcc'])
+            # use only a subset of the extra_postargs, which are 1-1 translated
+            # from the extra_compile_args in the Extension class
+            postargs = extra_postargs['nvcc']
+        else:
+            postargs = extra_postargs['cl']
+
+        super(obj, src, ext, cc_args, postargs, pp_opts)
+        # reset the default compiler_so, which we might have changed for cuda
+        #self.compiler_so = default_compiler_so
+
+    # inject our redefined _compile method into the class
+    self.compile = compile
+
+
+# run the customize_compiler
+class custom_build_ext(build_ext):
+    def build_extensions(self):
+        customize_compiler_for_nvcc(self.compiler)
+        build_ext.build_extensions(self)
+
+
+ext_modules = [
+    Extension(
+        "cpu_nms",
+        ["cpu_nms.pyx"],
+        extra_compile_args={'cl': []},
+        include_dirs = [numpy_include]
+    ),
+    Extension('gpu_nms',
+        ['nms_kernel.cu', 'gpu_nms.pyx'],
+        library_dirs=[CUDA['lib64']],
+        libraries=['cudart'],
+        language='c++',
+        runtime_library_dirs=[CUDA['lib64']],
+        # this syntax is specific to this build system
+        # we're only going to use certain compiler args with nvcc and not with
+        # gcc the implementation of this trick is in customize_compiler() below
+        extra_compile_args={'cl': [],
+                            'nvcc': ['-arch=sm_35',
+                                     '--ptxas-options=-v',
+                                     '-c',
+                                     '--compiler-options',
+                                     "'-fPIC'"]},
+        include_dirs = [numpy_include, CUDA['include']]
+    ),
+]
+
+setup(
+    name='nms',
+    ext_modules=ext_modules,
+    # inject our custom trigger
+    cmdclass={'build_ext': custom_build_ext},
+)

From fa67d88033b91dd0aaf04aa01955e3fde22c039a Mon Sep 17 00:00:00 2001
From: michael <michael.vanslembrouck@gmail.com>
Date: Tue, 12 May 2020 00:26:34 +0200
Subject: [PATCH 2/4] build nms on Windows

---
 lib/nms/nms_kernel_externc.cu |   6 ++
 lib/nms/setup_windows.py      | 164 ++++++++++------------------------
 2 files changed, 53 insertions(+), 117 deletions(-)
 create mode 100644 lib/nms/nms_kernel_externc.cu

diff --git a/lib/nms/nms_kernel_externc.cu b/lib/nms/nms_kernel_externc.cu
new file mode 100644
index 00000000..ef6df492
--- /dev/null
+++ b/lib/nms/nms_kernel_externc.cu
@@ -0,0 +1,6 @@
+// 
+// by happy coincidence the first thing encountered in nms_kernel.cu is 
+// the declaration of _nms, so extern "C" will work on that declaration.
+// 
+extern "C"
+#include "nms_kernel.cu"
diff --git a/lib/nms/setup_windows.py b/lib/nms/setup_windows.py
index e24858a8..1efb2d28 100644
--- a/lib/nms/setup_windows.py
+++ b/lib/nms/setup_windows.py
@@ -4,27 +4,32 @@
 # Licensed under The MIT License [see LICENSE for details]
 # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 # --------------------------------------------------------
+#
+# Adapted for building the extensions on Windows. 
+#
+# All this file needs to do is to build two .pyd files from 3 source files,
+# so rather than shoe-horning nvcc into distutils.extension, just build the 
+# two extensions "ad hoc" 
+# 
 
 import os
 from os.path import join as pjoin
 from setuptools import setup
-from distutils.extension import Extension
-from Cython.Distutils import build_ext
+#from distutils.extension import Extension
+#from Cython.Distutils import build_ext
 import numpy as np
-from shutil import which
+import shutil
+import sysconfig
 
+# --------------------------------------------------------
 
-def find_in_path(name, path):
-    "Find a file in a search path"
-    # Adapted fom
-    # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
-    for dir in path.split(os.pathsep):
-        binpath = pjoin(dir, name)
-        if os.path.exists(binpath):
-            return os.path.abspath(binpath)
-    return None
-
+if shutil.which('cython.exe') is None:
+    raise RuntimeError('cython.exe must be in your path.')
 
+if shutil.which('cl.exe') is None:
+    raise RuntimeError('Can\'t find "cl.exe". This script must be run inside a Visual Studio environment.')
+    
+    
 def locate_cuda():
     """Locate the CUDA environment on the system
     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
@@ -38,7 +43,7 @@ def locate_cuda():
         home = os.environ['CUDA_PATH']
         nvcc = pjoin(home, 'bin', 'nvcc.exe')
     else:
-        nvcc = which('nvcc.exe')
+        nvcc = shutil.which('nvcc.exe')
         if nvcc is None:
             raise EnvironmentError('The nvcc binary could not be '
                 'located in your $PATH. Either add it to your path, or set $CUDA_PATH')
@@ -52,6 +57,8 @@ def locate_cuda():
             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 
     return cudaconfig
+    
+    
 CUDA = locate_cuda()
 
 
@@ -61,108 +68,31 @@ def locate_cuda():
 except AttributeError:
     numpy_include = np.get_numpy_include()
 
+syspaths = sysconfig.get_paths()
+python_include = syspaths['include']
+python_lib = syspaths['stdlib']
+
+# --------------------------------------------------------
+#
+# build cpu_nms.pyd
+#
 
-def customize_compiler_for_nvcc(self):
-    """inject deep into distutils to customize how the dispatch
-    to gcc/nvcc works.
-    If you subclass UnixCCompiler, it's not trivial to get your subclass
-    injected in, and still have the right customizations (i.e.
-    distutils.sysconfig.customize_compiler) run on it. So instead of going
-    the OO route, I have this. Note, it's kindof like a wierd functional
-    subclassing going on."""
+command = "cython cpu_nms.pyx"
+os.system(command)
+
+command = "cl /D_USRDLL /D_WINDLL /I{} /I{} cpu_nms.c /link /DLL /LIBPATH:{}s /OUT:cpu_nms.pyd".format(python_include,numpy_include, python_lib)
+os.system(command)
+
+# --------------------------------------------------------
+#
+# build gpu_nms.pyd
+#
+command = "cython gpu_nms.pyx"
+os.system(command)
+
+command = '"{}" -c -arch=sm_35 --ptxas-options=-v -I{} -I{} nms_kernel_externc.cu'.format(CUDA['nvcc'], python_include, numpy_include)
+os.system(command)
+
+command = 'cl /D_USRDLL /D_WINDLL /I{} /I{} gpu_nms.c nms_kernel_externc.obj "{}\cudart.lib" /link /DLL /LIBPATH:{}s /OUT:gpu_nms.pyd'.format(python_include, numpy_include, CUDA['lib64'], python_lib)
+os.system(command)
 
-    #print(self.__class__.__dict__)
-    #print(self.src_extensions)
-    
-    # tell the compiler it can processes .cu
-    self.src_extensions.append('.cu')
-    #self.set_executable('compiler', CUDA['nvcc'])
-
-    # save references to the default compiler_so and _comple methods
-    #default_compiler_so = self.compiler_so
-    super = self.compile
-
-    def compile(sources,
-                output_dir=None, macros=None, include_dirs=None, debug=0,
-                extra_preargs=None, extra_postargs=None, depends=None):
-        sources_cpp = []
-        for src in sources:
-            if os.path.splitext(src)[1] == '.cu':
-                # use the cuda for .cu files
-                args = [CUDA['nvcc']] + extra_postargs['nvcc'] + [src]
-                print(args)
-                if not self.initialized:
-                    self.initialize()
-                compile_info = self._setup_compile(output_dir, macros, include_dirs,
-                                           sources, depends, extra_postargs)
-                macros, objects, extra_postargs, pp_opts, build = compile_info                    
-                self.spawn(args)
-            else:
-                sources_cpp.append(src)
-        
-        super(sources_cpp,
-              output_dir, macros, include_dirs, debug,
-              extra_preargs, extra_postargs['cl'], depends)
-
-
-    # now redefine the _compile method. This gets executed for each
-    # object but distutils doesn't have the ability to change compilers
-    # based on source extension: we add it.
-    def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
-        if os.path.splitext(src)[1] == '.cu':
-            # use the cuda for .cu files
-            print("!", src)
-            self.set_executable('compiler', CUDA['nvcc'])
-            # use only a subset of the extra_postargs, which are 1-1 translated
-            # from the extra_compile_args in the Extension class
-            postargs = extra_postargs['nvcc']
-        else:
-            postargs = extra_postargs['cl']
-
-        super(obj, src, ext, cc_args, postargs, pp_opts)
-        # reset the default compiler_so, which we might have changed for cuda
-        #self.compiler_so = default_compiler_so
-
-    # inject our redefined _compile method into the class
-    self.compile = compile
-
-
-# run the customize_compiler
-class custom_build_ext(build_ext):
-    def build_extensions(self):
-        customize_compiler_for_nvcc(self.compiler)
-        build_ext.build_extensions(self)
-
-
-ext_modules = [
-    Extension(
-        "cpu_nms",
-        ["cpu_nms.pyx"],
-        extra_compile_args={'cl': []},
-        include_dirs = [numpy_include]
-    ),
-    Extension('gpu_nms',
-        ['nms_kernel.cu', 'gpu_nms.pyx'],
-        library_dirs=[CUDA['lib64']],
-        libraries=['cudart'],
-        language='c++',
-        runtime_library_dirs=[CUDA['lib64']],
-        # this syntax is specific to this build system
-        # we're only going to use certain compiler args with nvcc and not with
-        # gcc the implementation of this trick is in customize_compiler() below
-        extra_compile_args={'cl': [],
-                            'nvcc': ['-arch=sm_35',
-                                     '--ptxas-options=-v',
-                                     '-c',
-                                     '--compiler-options',
-                                     "'-fPIC'"]},
-        include_dirs = [numpy_include, CUDA['include']]
-    ),
-]
-
-setup(
-    name='nms',
-    ext_modules=ext_modules,
-    # inject our custom trigger
-    cmdclass={'build_ext': custom_build_ext},
-)

From 66a0483db3a7e497ab00192d25e67f472e48bd48 Mon Sep 17 00:00:00 2001
From: zmic <michael.vanslembrouck@gmail.com>
Date: Tue, 12 May 2020 14:50:40 +0200
Subject: [PATCH 3/4] Update README.md

---
 README.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/README.md b/README.md
index d984f854..8f0e56eb 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,13 @@
+# How to build on Windows
+
+- Install Visual Studio 2017 or 2019 with C++ options.
+- Open "x64 Native Tools Command Prompt for VS 201x"
+- cd lib\nms
+- python build_windows.py
+
+
+# Original readme:
+
 # Deep High-Resolution Representation Learning for Human Pose Estimation (CVPR 2019)
 ## News
 - [2020/03/13] A longer version is accepted by TPAMI: [Deep High-Resolution Representation Learning for Visual Recognition](https://arxiv.org/pdf/1908.07919.pdf). It includes more HRNet applications, and the codes are available: [semantic segmentation](https://github.com/HRNet/HRNet-Semantic-Segmentation),  [objection detection](https://github.com/HRNet/HRNet-Object-Detection),  [facial landmark detection](https://github.com/HRNet/HRNet-Facial-Landmark-Detection), and [image classification](https://github.com/HRNet/HRNet-Image-Classification).

From 10946163545a624da4455e0410ea612c7699c70d Mon Sep 17 00:00:00 2001
From: zmic <1315889+zmic@users.noreply.github.com>
Date: Tue, 12 May 2020 18:46:32 +0200
Subject: [PATCH 4/4] Update README.md

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 8f0e56eb..b3ef802b 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,9 @@
 - cd lib\nms
 - python build_windows.py
 
+# cocoapi
+- pip install pycocoapi-windows
+
 
 # Original readme: