diff --git a/AUTHORS b/AUTHORS
index 46084c30039d4..c17ac199be99e 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -231,4 +231,9 @@ a license to everyone to use it as detailed in LICENSE.)
 * Noam T.Cohen <noam@ecb.co.il>
 * Nick Shin <nick.shin@gmail.com>
 * Gregg Tavares <github@greggman.com>
-
+* Tanner Rogalsky <tanner@tannerrogalsky.com>
+* Richard Cook <rcook@tableau.com> (copyright owned by Tableau Software, Inc.)
+* Arnab Choudhury <achoudhury@tableau.com> (copyright owned by Tableau Software, Inc.)
+* Charles Vaughn <cvaughn@tableau.com> (copyright owned by Tableau Software, Inc.)
+* Pierre Krieger <pierre.krieger1708@gmail.com>
+* Jakob Stoklund Olesen <stoklund@2pi.dk>
diff --git a/emcc.py b/emcc.py
index 16ad6ac12f682..6d422c27a7ee3 100755
--- a/emcc.py
+++ b/emcc.py
@@ -685,6 +685,24 @@ def validate_arg_level(level_string, max_level, err_msg):
         newargs.append('-D__SSE__=1')
         newargs.append('-D__SSE2__=1')
         newargs[i] = ''
+      elif newargs[i] == '-msse3':
+        newargs.append('-D__SSE__=1')
+        newargs.append('-D__SSE2__=1')
+        newargs.append('-D__SSE3__=1')
+        newargs[i] = ''
+      elif newargs[i] == '-mssse3':
+        newargs.append('-D__SSE__=1')
+        newargs.append('-D__SSE2__=1')
+        newargs.append('-D__SSE3__=1')
+        newargs.append('-D__SSSE3__=1')
+        newargs[i] = ''
+      elif newargs[i] == '-msse4.1':
+        newargs.append('-D__SSE__=1')
+        newargs.append('-D__SSE2__=1')
+        newargs.append('-D__SSE3__=1')
+        newargs.append('-D__SSSE3__=1')
+        newargs.append('-D__SSE4_1__=1')
+        newargs[i] = ''
 
     if should_exit:
       sys.exit(0)
@@ -753,7 +771,7 @@ def validate_arg_level(level_string, max_level, err_msg):
 
       if i > 0:
         prev = newargs[i-1]
-        if prev in ['-MT', '-MF', '-MQ', '-D', '-U', '-o', '-x', '-Xpreprocessor', '-include', '-imacros', '-idirafter', '-iprefix', '-iwithprefix', '-iwithprefixbefore', '-isysroot', '-imultilib', '-A', '-isystem', '-iquote', '-install_name', '-compatibility_version', '-current_version', '-I', '-L']: continue # ignore this gcc-style argument
+        if prev in ['-MT', '-MF', '-MQ', '-D', '-U', '-o', '-x', '-Xpreprocessor', '-include', '-imacros', '-idirafter', '-iprefix', '-iwithprefix', '-iwithprefixbefore', '-isysroot', '-imultilib', '-A', '-isystem', '-iquote', '-install_name', '-compatibility_version', '-current_version', '-I', '-L', '-include-pch']: continue # ignore this gcc-style argument
 
       if os.path.islink(arg) and os.path.realpath(arg).endswith(SOURCE_ENDINGS + BITCODE_ENDINGS + DYNAMICLIB_ENDINGS + ASSEMBLY_ENDINGS + HEADER_ENDINGS):
         arg = os.path.realpath(arg)
@@ -955,9 +973,6 @@ def check(input_file):
     elif shared.Settings.SIDE_MODULE:
       assert not shared.Settings.MAIN_MODULE
       memory_init_file = False # memory init file is not supported with side modules, must be executable synchronously (for dlopen)
-      if shared.Settings.WASM:
-        logging.warning('disabling WASM in SIDE_MODULE')
-        shared.Settings.WASM = 0
 
     if shared.Settings.MAIN_MODULE or shared.Settings.SIDE_MODULE:
       assert shared.Settings.ASM_JS, 'module linking requires asm.js output (-s ASM_JS=1)'
@@ -972,9 +987,6 @@ def check(input_file):
       logging.warning('not all asm.js optimizations are possible with ALLOW_MEMORY_GROWTH, disabling those')
       shared.Settings.ASM_JS = 2 # memory growth does not validate as asm.js http://discourse.wicg.io/t/request-for-comments-switching-resizing-heaps-in-asm-js/641/23
 
-    if shared.Settings.WASM:
-      assert not shared.Settings.ALLOW_MEMORY_GROWTH, 'memory growth is not supported with WASM=1'
-
     if shared.Settings.EMULATE_FUNCTION_POINTER_CASTS:
       shared.Settings.ALIASING_FUNCTION_POINTERS = 0
 
@@ -1046,8 +1058,9 @@ def check(input_file):
     if proxy_to_worker:
       shared.Settings.PROXY_TO_WORKER = 1
 
-    if use_preload_plugins:
-      shared.Settings.FORCE_FILESYSTEM = 1 # preload plugins require preload support which is part of the filesystem
+    if use_preload_plugins or len(preload_files) > 0 or len(embed_files) > 0:
+      # if we include any files, or intend to use preload plugins, then we definitely need filesystem support
+      shared.Settings.FORCE_FILESYSTEM = 1
 
     if proxy_to_worker or use_preload_plugins:
       shared.Settings.DEFAULT_LIBRARY_FUNCS_TO_INCLUDE += ['$Browser']
@@ -1639,6 +1652,11 @@ def do_minify(): # minifies the code. this is also when we do certain optimizati
         else:
           JSOptimizer.queue += ['registerize']
 
+      # NOTE: Important that this comes after registerize/registerizeHarder
+      if shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS and opt_level >= 2:
+        JSOptimizer.flush()
+        shared.Building.eliminate_duplicate_funcs(final)
+
       if not shared.Settings.EMTERPRETIFY:
         do_minify()
 
@@ -1905,11 +1923,6 @@ def un_src(): # use this if you want to modify the script and need it to be inli
         worker_target_basename = target_basename + '.worker'
         open(target, 'w').write(open(shared.path_from_root('src', 'webGLClient.js')).read() + '\n' + open(shared.path_from_root('src', 'proxyClient.js')).read().replace('{{{ filename }}}', shared.Settings.PROXY_TO_WORKER_FILENAME or worker_target_basename).replace('{{{ IDBStore.js }}}', open(shared.path_from_root('src', 'IDBStore.js')).read()))
 
-    if shared.Settings.WASM:
-      logging.debug('converting to WebAssembly')
-      wasm_target = unsuffixed(js_target) + '.wasm'
-      subprocess.check_call([shared.PYTHON, shared.path_from_root('third_party', 'wasm-polyfill', 'wasmator.py'), js_target, wasm_target, shared.Settings.EXPORT_NAME])
-
     log_time('final emitting')
 
     if DEBUG: logging.debug('total time: %.2f seconds', (time.time() - start_time))
diff --git a/emscripten-version.txt b/emscripten-version.txt
index b6b783386b601..b9808b21d5a2e 100644
--- a/emscripten-version.txt
+++ b/emscripten-version.txt
@@ -1,2 +1,2 @@
-"1.35.23"
+"1.36.0"
 
diff --git a/site/source/docs/getting_started/FAQ.rst b/site/source/docs/getting_started/FAQ.rst
index ce71499f2bbe1..3274aa521323f 100644
--- a/site/source/docs/getting_started/FAQ.rst
+++ b/site/source/docs/getting_started/FAQ.rst
@@ -64,6 +64,10 @@ The main tips for improving build time are:
 
 - When you have multiple bitcode files as inputs, put the largest file first (LLVM linking links the second and later ones into the first, so less copying is done on the first input to the linker).
 
+- Having fewer bitcode files can be faster, so you might want to link files into larger files in parallel in your build system (you might already do this if you have logical libraries), and then the final command has fewer things to operate on.
+
+- You don't need to link into a single bitcode file yourself, you can call the final ``emcc`` command that emits JS with a list of files. ``emcc`` can then defer linking and avoid an intermediary step, if possible (this optimization is disabled by LTO and by `EMCC_DEBUG=2`).
+
 	
 Why does my code run slowly?
 ============================
diff --git a/site/source/docs/porting/simd.rst b/site/source/docs/porting/simd.rst
index 6291d90ef6531..8f00cb776e83b 100644
--- a/site/source/docs/porting/simd.rst
+++ b/site/source/docs/porting/simd.rst
@@ -17,7 +17,7 @@ There are three different ways to generate code to benefit from SIMD instruction
 
 - Emscripten supports the GCC/Clang compiler specific `SIMD Vector Extensions <https://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html>`_. These constructs do not require any changes to the command line build flags, but any code that utilizes the vector built-ins will always unconditionally emit SIMD.js vector instructions.
 
-- A third option is to use the x86 SSE intrinsics. Emscripten has full support for compiling code that utilizes the SSE1 and SSE2 intrinsic function calls. To enable SSE1 intrinsics support, pass the compiler flag -msse, and add in a #include <xmmintrin.h>. To build SSE2 intrinsics code, pass the compiler flag -msse2, and use #include <emmintrin.h>.
+- A third option is to use the x86 SSE intrinsics. Emscripten has full support for compiling code that utilizes the SSE1, SSE2, SSE3 and SSSE3 intrinsic function calls. To enable SSE1 intrinsics support, pass the compiler flag -msse, and add in a #include <xmmintrin.h>. To build SSE2 intrinsics code, pass the compiler flag -msse2, and use #include <emmintrin.h>. For SSE3, pass -msse3 and #include <pmmintrin.h>, and for SSSE3, pass -mssse3 and #include <tmmintrin.h>.
 
 These three methods are not mutually exclusive, but may freely be combined.
 
@@ -30,9 +30,9 @@ When porting native SIMD code, it should be noted that because of portability co
 
  - The SIMD types supported by SIMD.js are Float32x4, Int32x4, Uint32x4, Int16x8, Uint16x8, Int8x16 and Uint8x16. In particular, Float64x2 and Int64x2 are currently not supported, however Float64x2 is emulated in software in the current polyfill. 256-bit or wider SIMD types (AVX) are not supported either.
 
- - Even though the full set of SSE1 and SSE2 intrinsics are supported, because of the platform-abstract nature of SIMD.js, some of these intrinsics will compile down to scalarized instructions to emulate. To verify which instructions are accelerated and which are not, examine the code in the platform headers `xmmintrin.h <https://github.com/kripken/emscripten/blob/incoming/system/include/emscripten/xmmintrin.h>`_ and `emmintrin.h <https://github.com/kripken/emscripten/blob/incoming/system/include/emscripten/xmmintrin.h>`_.
+ - Even though the full set of SSE1, SSE2, SSE3 and SSSE3 intrinsics are supported, because of the platform-abstract nature of SIMD.js, some of these intrinsics will compile down to scalarized instructions to emulate. To verify which instructions are accelerated and which are not, examine the code in the platform headers `xmmintrin.h <https://github.com/kripken/emscripten/blob/incoming/system/include/emscripten/xmmintrin.h>`_ and `emmintrin.h <https://github.com/kripken/emscripten/blob/incoming/system/include/emscripten/xmmintrin.h>`_.
 
- - Currently the Intel x86 SIMD support is limited to SSE1 and SSE2 instruction sets. The Intel x86 SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and FMA instruction sets or newer are not supported. Also, the old Intel x86 MMX instruction set is not supported.
+ - Currently the Intel x86 SIMD support is limited to SSE1, SSE2, SSE3 and SSSE3 instruction sets. The Intel x86 SSE4.1, SSE4.2, AVX, AVX2 and FMA instruction sets or newer are not supported. Also, the old Intel x86 MMX instruction set is not supported.
 
  - SIMD.js does not have control over managing floating point rounding modes or handling denormals.
 
diff --git a/src/emrun_postjs.js b/src/emrun_postjs.js
index d34ff0b6ec2ec..3192491dd5b1a 100644
--- a/src/emrun_postjs.js
+++ b/src/emrun_postjs.js
@@ -44,14 +44,15 @@ if (typeof window === "object" && (typeof ENVIRONMENT_IS_PTHREAD === 'undefined'
       post('^pageload^');
     }
   }
-  window.addEventListener('load', emrun_register_handlers);
-}
 
-// POSTs the given binary data represented as a (typed) array data back to the emrun-based web server.
-// To use from C code, call e.g. EM_ASM_({emrun_file_dump("file.dat", HEAPU8.subarray($0, $0 + $1));}, my_data_pointer, my_data_pointer_byte_length);
-function emrun_file_dump(filename, data) {
-  var http = new XMLHttpRequest();
-  Module['print']('Dumping out file "' + filename + '" with ' + data.length + ' bytes of data.');
-  http.open("POST", "stdio.html?file=" + filename, true);
-  http.send(data); // XXX  this does not work in workers, for some odd reason (issue #2681)
+  // POSTs the given binary data represented as a (typed) array data back to the emrun-based web server.
+  // To use from C code, call e.g. EM_ASM_({emrun_file_dump("file.dat", HEAPU8.subarray($0, $0 + $1));}, my_data_pointer, my_data_pointer_byte_length);
+  function emrun_file_dump(filename, data) {
+    var http = new XMLHttpRequest();
+    Module['print']('Dumping out file "' + filename + '" with ' + data.length + ' bytes of data.');
+    http.open("POST", "stdio.html?file=" + filename, true);
+    http.send(data); // XXX  this does not work in workers, for some odd reason (issue #2681)
+  }
+
+  if (typeof Module !== 'undefined' && typeof document !== 'undefined') emrun_register_handlers();
 }
diff --git a/src/library.js b/src/library.js
index d7cdd540158a6..af0a07cc6a7ca 100644
--- a/src/library.js
+++ b/src/library.js
@@ -494,6 +494,10 @@ LibraryManager.library = {
   exit: function(status) {
     __exit(status);
   },
+  _Exit__deps: ['exit'],
+  _Exit: function(status) {
+    __exit(status);
+  },
 
   _ZSt9terminatev__deps: ['exit'],
   _ZSt9terminatev: function() {
diff --git a/src/library_openal.js b/src/library_openal.js
index b7422adc7f9c0..5edd31810f2a4 100644
--- a/src/library_openal.js
+++ b/src/library_openal.js
@@ -472,7 +472,7 @@ var LibraryOpenAL = {
           // Disconnect from the panner.
           src.gain.disconnect();
 
-          src.gain.connect(AL.currentContext.ctx.destination);
+          src.gain.connect(AL.currentContext.gain);
         }
       } else if (value === 0 /* AL_FALSE */) {
         if (!src.panner) {
@@ -484,7 +484,7 @@ var LibraryOpenAL = {
           panner.rolloffFactor = src.rolloffFactor;
           panner.setPosition(src.position[0], src.position[1], src.position[2]);
           panner.setVelocity(src.velocity[0], src.velocity[1], src.velocity[2]);
-          panner.connect(AL.currentContext.ctx.destination);
+          panner.connect(AL.currentContext.gain);
 
           // Disconnect from the default source.
           src.gain.disconnect();
@@ -860,7 +860,6 @@ var LibraryOpenAL = {
     }
   },
 
-  alSourcePlay__deps: ['setSourceState'],
   alSourcePlay: function(source) {
     if (!AL.currentContext) {
 #if OPENAL_DEBUG
@@ -879,7 +878,6 @@ var LibraryOpenAL = {
     AL.setSourceState(src, 0x1012 /* AL_PLAYING */);
   },
 
-  alSourceStop__deps: ['setSourceState'],
   alSourceStop: function(source) {
     if (!AL.currentContext) {
 #if OPENAL_DEBUG
@@ -898,7 +896,6 @@ var LibraryOpenAL = {
     AL.setSourceState(src, 0x1014 /* AL_STOPPED */);
   },
 
-  alSourceRewind__deps: ['setSourceState'],
   alSourceRewind: function(source) {
     if (!AL.currentContext) {
 #if OPENAL_DEBUG
@@ -920,7 +917,6 @@ var LibraryOpenAL = {
     AL.setSourceState(src, 0x1011 /* AL_INITIAL */);
   },
 
-  alSourcePause__deps: ['setSourceState'],
   alSourcePause: function(source) {
     if (!AL.currentContext) {
 #if OPENAL_DEBUG
@@ -1169,7 +1165,7 @@ var LibraryOpenAL = {
     }
   },
 
-  alGetListenerf: function(pname, values) {
+  alGetListenerf: function(pname, value) {
     if (!AL.currentContext) {
 #if OPENAL_DEBUG
       console.error("alGetListenerf called without a valid context");
@@ -1178,7 +1174,7 @@ var LibraryOpenAL = {
     }
     switch (pname) {
     case 0x100A /* AL_GAIN */:
-      {{{ makeSetValue('value', '0', 'AL.currentContext.gain.gain', 'float') }}}
+      {{{ makeSetValue('value', '0', 'AL.currentContext.gain.gain.value', 'float') }}}
       break;
     default:
 #if OPENAL_DEBUG
@@ -1254,7 +1250,7 @@ var LibraryOpenAL = {
     }
     switch (param) {
     case 0x100A /* AL_GAIN */:
-      AL.currentContext.gain.value = value;
+      AL.currentContext.gain.gain.value = value;
       break;
     default:
 #if OPENAL_DEBUG
diff --git a/src/modules.js b/src/modules.js
index 8f14ab680a337..13efa7217f5bf 100644
--- a/src/modules.js
+++ b/src/modules.js
@@ -164,9 +164,10 @@ var LibraryManager = {
       if (typeof lib[x] === 'string') {
         var target = x;
         while (typeof lib[target] === 'string') {
-          if (lib[target].indexOf('(') >= 0) continue libloop;
+          // ignore code, aliases are just simple names
+          if (lib[target].search(/[({; ]/) >= 0) continue libloop;
+          // ignore trivial pass-throughs to Math.*
           if (lib[target].indexOf('Math_') == 0) continue libloop;
-          if (lib[target].indexOf(';') > 0) continue libloop; // ignore code
           target = lib[target];
         }
         if (lib[target + '__asm']) continue; // This is an alias of an asm library function. Also needs to be fully optimized.
diff --git a/src/preamble.js b/src/preamble.js
index 391ef3d56f6fb..fe8bb74435f85 100644
--- a/src/preamble.js
+++ b/src/preamble.js
@@ -192,7 +192,7 @@ var cwrap, ccall;
   }
 
 #if NO_DYNAMIC_EXECUTION == 0
-  var sourceRegex = /^function\s*\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/;
+  var sourceRegex = /^function\s*[a-zA-Z$_0-9]*\s*\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/;
   function parseJSFunc(jsfunc) {
     // Match the body and the return value of a javascript function source
     var parsed = jsfunc.toString().match(sourceRegex).slice(1);
@@ -1826,4 +1826,27 @@ if (!ENVIRONMENT_IS_PTHREAD) addOnPreRun(function() {
 if (!ENVIRONMENT_IS_PTHREAD) addOnPreRun(function() { if (typeof SharedArrayBuffer !== 'undefined') { addRunDependency('pthreads'); PThread.allocateUnusedWorkers({{{PTHREAD_POOL_SIZE}}}, function() { removeRunDependency('pthreads'); }); }});
 #endif
 
+#if ASSERTIONS
+#if NO_FILESYSTEM
+var /* show errors on likely calls to FS when it was not included */ FS = {
+  error: function() {
+    abort('Filesystem support (FS) was not included. The problem is that you are using files from JS, but files were not used from C/C++, so filesystem support was not auto-included. You can force-include filesystem support with  -s FORCE_FILESYSTEM=1');
+  },
+  init: function() { FS.error() },
+  createDataFile: function() { FS.error() },
+  createPreloadedFile: function() { FS.error() },
+  createLazyFile: function() { FS.error() },
+  open: function() { FS.error() },
+  mkdev: function() { FS.error() },
+  registerDevice: function() { FS.error() },
+  analyzePath: function() { FS.error() },
+  loadFilesFromDB: function() { FS.error() },
+
+  ErrnoError: function ErrnoError() { FS.error() },
+};
+Module['FS_createDataFile'] = FS.createDataFile;
+Module['FS_createPreloadedFile'] = FS.createPreloadedFile;
+#endif
+#endif
+
 // === Body ===
diff --git a/src/settings.js b/src/settings.js
index 8eb8a6e77dda8..7d21a993a6b03 100644
--- a/src/settings.js
+++ b/src/settings.js
@@ -629,12 +629,6 @@ var WASM_BACKEND = 0; // Whether to use the WebAssembly backend that is in devel
                       // This requires that BINARYEN be set, as we use Binaryen's s2wasm to
                       // translate the backend output.
 
-var WASM = 0; // Older WebAssembly experiment. Compress the asm.js module into an early proposal for WebAssembly,
-              // and ship a decompressor that runs on the client.
-              // Note that wasm loading is asynchronous in the browser, and for that reason we wrap the entire emitted
-              // code in a function - things will not reach the global scope by default. You can access things on the
-              // Module object.
-
 // Ports
 
 var USE_SDL = 1; // Specify the SDL version that is being linked against.
@@ -687,4 +681,14 @@ var PTHREADS_PROFILING = 0; // True when building with --threadprofiler
 
 var MAX_GLOBAL_ALIGN = -1; // received from the backend
 
+// Duplicate function elimination. This coalesces function bodies that are
+// identical, which can happen e.g. if two methods have different C/C++
+// or LLVM types, but end up identical at the asm.js level (all pointers
+// are the same as int32_t in asm.js, for example).
+// This option is quite slow to run, as it processes and hashes all methods
+// in the codebase in multiple passes.
+var ELIMINATE_DUPLICATE_FUNCTIONS = 0; // disabled by default
+var ELIMINATE_DUPLICATE_FUNCTIONS_PASSES = 5;
+var ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS = 0;
+
 // Reserved: variables containing POINTER_MASKING.
diff --git a/system/include/emscripten/emmintrin.h b/system/include/emscripten/emmintrin.h
index aecf7d7109877..d216dd5377a32 100644
--- a/system/include/emscripten/emmintrin.h
+++ b/system/include/emscripten/emmintrin.h
@@ -64,6 +64,10 @@ typedef long long __v2di __attribute__ ((__vector_size__ (16)));
 typedef short __v8hi __attribute__((__vector_size__(16)));
 typedef char __v16qi __attribute__((__vector_size__(16)));
 
+/* We need an explicitly signed variant for char. Note that this shouldn't
+ * appear in the interface though. */
+typedef signed char __v16qs __attribute__((__vector_size__(16)));
+
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_add_sd(__m128d __a, __m128d __b)
 {
@@ -1660,9 +1664,6 @@ _mm_cmpeq_epi32(__m128i __a, __m128i __b)
 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_epi8(__m128i __a, __m128i __b)
 {
-  /* This function always performs a signed comparison, but __v16qi is a char
-     which may be signed or unsigned. */
-  typedef signed char __v16qs __attribute__((__vector_size__(16)));
   return (__m128i)((__v16qs)__a > (__v16qs)__b);
 }
 
diff --git a/system/include/emscripten/pmmintrin.h b/system/include/emscripten/pmmintrin.h
new file mode 100644
index 0000000000000..ffb6c4830e052
--- /dev/null
+++ b/system/include/emscripten/pmmintrin.h
@@ -0,0 +1,156 @@
+/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __PMMINTRIN_H
+#define __PMMINTRIN_H
+
+#include <emmintrin.h>
+
+#ifndef __SSE3__
+#error "SSE3 instruction set not enabled"
+#endif
+
+/* Define the default attributes for the functions in this file. */
+#ifdef __EMSCRIPTEN__
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#else
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse3")))
+#endif
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_lddqu_si128(__m128i const *__p)
+{
+#ifdef __EMSCRIPTEN__
+  return _mm_loadu_si128(__p);
+#else
+  return (__m128i)__builtin_ia32_lddqu((char const *)__p);
+#endif
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_addsub_ps(__m128 __a, __m128 __b)
+{
+#ifdef __EMSCRIPTEN__
+  return _mm_add_ps(__a, _mm_mul_ps(__b, _mm_set_ps(1.f, -1.f, 1.f, -1.f)));
+#else
+  return __builtin_ia32_addsubps(__a, __b);
+#endif
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_hadd_ps(__m128 __a, __m128 __b)
+{
+#ifdef __EMSCRIPTEN__
+  return _mm_add_ps(_mm_shuffle_ps(__a, __b, _MM_SHUFFLE(2, 0, 2, 0)), _mm_shuffle_ps(__a, __b, _MM_SHUFFLE(3, 1, 3, 1)));
+#else
+  return __builtin_ia32_haddps(__a, __b);
+#endif
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_hsub_ps(__m128 __a, __m128 __b)
+{
+#ifdef __EMSCRIPTEN__
+  return _mm_sub_ps(_mm_shuffle_ps(__a, __b, _MM_SHUFFLE(2, 0, 2, 0)), _mm_shuffle_ps(__a, __b, _MM_SHUFFLE(3, 1, 3, 1)));
+#else
+  return __builtin_ia32_hsubps(__a, __b);
+#endif
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_movehdup_ps(__m128 __a)
+{
+  return __builtin_shufflevector(__a, __a, 1, 1, 3, 3);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_moveldup_ps(__m128 __a)
+{
+  return __builtin_shufflevector(__a, __a, 0, 0, 2, 2);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_addsub_pd(__m128d __a, __m128d __b)
+{
+#ifdef __EMSCRIPTEN__
+  return _mm_add_pd(__a, _mm_mul_pd(__b, _mm_set_pd(1.0, -1.0)));
+#else
+  return __builtin_ia32_addsubpd(__a, __b);
+#endif
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_hadd_pd(__m128d __a, __m128d __b)
+{
+#ifdef __EMSCRIPTEN__
+  return _mm_add_pd(_mm_shuffle_pd(__a, __b, _MM_SHUFFLE2(0, 0)), _mm_shuffle_pd(__a, __b, _MM_SHUFFLE2(1, 1)));
+#else
+  return __builtin_ia32_haddpd(__a, __b);
+#endif
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_hsub_pd(__m128d __a, __m128d __b)
+{
+#ifdef __EMSCRIPTEN__
+  return _mm_sub_pd(_mm_shuffle_pd(__a, __b, _MM_SHUFFLE2(0, 0)), _mm_shuffle_pd(__a, __b, _MM_SHUFFLE2(1, 1)));
+#else
+  return __builtin_ia32_hsubpd(__a, __b);
+#endif
+}
+
+#define        _mm_loaddup_pd(dp)        _mm_load1_pd(dp)
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_movedup_pd(__m128d __a)
+{
+  return __builtin_shufflevector(__a, __a, 0, 0);
+}
+
+#define _MM_DENORMALS_ZERO_ON   (0x0040)
+#define _MM_DENORMALS_ZERO_OFF  (0x0000)
+
+#define _MM_DENORMALS_ZERO_MASK (0x0040)
+
+#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
+#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
+
+#ifndef __EMSCRIPTEN__
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
+{
+  __builtin_ia32_monitor((void *)__p, __extensions, __hints);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mwait(unsigned __extensions, unsigned __hints)
+{
+  __builtin_ia32_mwait(__extensions, __hints);
+}
+
+#endif /* __EMSCRIPTEN__ */
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif /* __PMMINTRIN_H */
diff --git a/system/include/emscripten/smmintrin.h b/system/include/emscripten/smmintrin.h
new file mode 100644
index 0000000000000..10b5af1243d7b
--- /dev/null
+++ b/system/include/emscripten/smmintrin.h
@@ -0,0 +1,628 @@
+/*===---- smmintrin.h - SSE4 intrinsics ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef _SMMINTRIN_H
+#define _SMMINTRIN_H
+
+#include <tmmintrin.h>
+
+/* Define the default attributes for the functions in this file. */
+#ifdef __EMSCRIPTEN__
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#else 
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1")))
+#endif
+
+/* SSE4 Rounding macros. */
+#define _MM_FROUND_TO_NEAREST_INT    0x00
+#define _MM_FROUND_TO_NEG_INF        0x01
+#define _MM_FROUND_TO_POS_INF        0x02
+#define _MM_FROUND_TO_ZERO           0x03
+#define _MM_FROUND_CUR_DIRECTION     0x04
+
+#define _MM_FROUND_RAISE_EXC         0x00
+#define _MM_FROUND_NO_EXC            0x08
+
+#define _MM_FROUND_NINT      (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT)
+#define _MM_FROUND_FLOOR     (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF)
+#define _MM_FROUND_CEIL      (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF)
+#define _MM_FROUND_TRUNC     (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO)
+#define _MM_FROUND_RINT      (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION)
+#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_ceil_ps(X)       _mm_round_ps((X), _MM_FROUND_CEIL)
+#define _mm_ceil_pd(X)       _mm_round_pd((X), _MM_FROUND_CEIL)
+#define _mm_ceil_ss(X, Y)    _mm_round_ss((X), (Y), _MM_FROUND_CEIL)
+#define _mm_ceil_sd(X, Y)    _mm_round_sd((X), (Y), _MM_FROUND_CEIL)
+
+#define _mm_floor_ps(X)      _mm_round_ps((X), _MM_FROUND_FLOOR)
+#define _mm_floor_pd(X)      _mm_round_pd((X), _MM_FROUND_FLOOR)
+#define _mm_floor_ss(X, Y)   _mm_round_ss((X), (Y), _MM_FROUND_FLOOR)
+#define _mm_floor_sd(X, Y)   _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)
+
+#define _mm_round_ps(X, M) __extension__ ({ \
+  (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)); })
+
+#define _mm_round_ss(X, Y, M) __extension__ ({ \
+  (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
+                                 (__v4sf)(__m128)(Y), (M)); })
+
+#define _mm_round_pd(X, M) __extension__ ({ \
+  (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)); })
+
+#define _mm_round_sd(X, Y, M) __extension__ ({ \
+  (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \
+                                  (__v2df)(__m128d)(Y), (M)); })
+
+/* SSE4 Packed Blending Intrinsics.  */
+#define _mm_blend_pd(V1, V2, M) __extension__ ({ \
+  (__m128d)__builtin_shufflevector((__v2df)(__m128d)(V1), \
+                                   (__v2df)(__m128d)(V2), \
+                                   (((M) & 0x01) ? 2 : 0), \
+                                   (((M) & 0x02) ? 3 : 1)); })
+
+#define _mm_blend_ps(V1, V2, M) __extension__ ({ \
+  (__m128)__builtin_shufflevector((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \
+                                  (((M) & 0x01) ? 4 : 0), \
+                                  (((M) & 0x02) ? 5 : 1), \
+                                  (((M) & 0x04) ? 6 : 2), \
+                                  (((M) & 0x08) ? 7 : 3)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2,
+                                            (__v2df)__M);
+#endif
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2,
+                                           (__v4sf)__M);
+#endif
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2,
+                                               (__v16qi)__M);
+#endif
+}
+
+#define _mm_blend_epi16(V1, V2, M) __extension__ ({ \
+  (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(V1), \
+                                   (__v8hi)(__m128i)(V2), \
+                                   (((M) & 0x01) ?  8 : 0), \
+                                   (((M) & 0x02) ?  9 : 1), \
+                                   (((M) & 0x04) ? 10 : 2), \
+                                   (((M) & 0x08) ? 11 : 3), \
+                                   (((M) & 0x10) ? 12 : 4), \
+                                   (((M) & 0x20) ? 13 : 5), \
+                                   (((M) & 0x40) ? 14 : 6), \
+                                   (((M) & 0x80) ? 15 : 7)); })
+
+/* SSE4 Dword Multiply Instructions.  */
+static __inline__  __m128i __DEFAULT_FN_ATTRS
+_mm_mullo_epi32 (__m128i __V1, __m128i __V2)
+{
+  return (__m128i) ((__v4si)__V1 * (__v4si)__V2);
+}
+
+static __inline__  __m128i __DEFAULT_FN_ATTRS
+_mm_mul_epi32 (__m128i __V1, __m128i __V2)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else  
+  return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2);
+#endif
+}
+
+/* SSE4 Floating Point Dot Product Instructions.  */
+#define _mm_dp_ps(X, Y, M) __extension__ ({ \
+  (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \
+                               (__v4sf)(__m128)(Y), (M)); })
+
+#define _mm_dp_pd(X, Y, M) __extension__ ({\
+  (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \
+                                (__v2df)(__m128d)(Y), (M)); })
+
+/* SSE4 Streaming Load Hint Instruction.  */
+static __inline__  __m128i __DEFAULT_FN_ATTRS
+_mm_stream_load_si128 (__m128i const *__V)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return (__m128i) __builtin_ia32_movntdqa ((const __v2di *) __V);
+#endif
+}
+
+/* SSE4 Packed Integer Min/Max Instructions.  */
+static __inline__  __m128i __DEFAULT_FN_ATTRS
+_mm_min_epi8 (__m128i __V1, __m128i __V2)
+{
+#ifdef __EMSCRIPTEN__
+  return _mm_xor_si128(__V2, _mm_and_si128(_mm_xor_si128(__V1, __V2), _mm_cmplt_epi8(__V1, __V2)));
+#else
+  return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);
+#endif
+}
+
+static __inline__  __m128i __DEFAULT_FN_ATTRS
+_mm_max_epi8 (__m128i __V1, __m128i __V2)
+{
+#ifdef __EMSCRIPTEN__
+  return _mm_xor_si128(__V1, _mm_and_si128(_mm_xor_si128(__V1, __V2), _mm_cmplt_epi8(__V1, __V2)));
+#else
+  return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);
+#endif
+}
+
+static __inline__  __m128i __DEFAULT_FN_ATTRS
+_mm_min_epu16 (__m128i __V1, __m128i __V2)
+{
+#ifdef __EMSCRIPTEN__
+  __m128 __shift = (__m128)emscripten_int16x8_splat(-32768);
+  return _mm_xor_si128(__V2, _mm_and_si128(_mm_xor_si128(__V1, __V2), _mm_cmplt_epi16(_mm_sub_epi16(__V1, __shift), _mm_sub_epi16(__V2, __shift))));
+#else
+  return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);
+#endif
+}
+
+static __inline__  __m128i __DEFAULT_FN_ATTRS
+_mm_max_epu16 (__m128i __V1, __m128i __V2)
+{
+#ifdef __EMSCRIPTEN__
+  __m128 __shift = (__m128)emscripten_int16x8_splat(-32768);
+  return _mm_xor_si128(__V1, _mm_and_si128(_mm_xor_si128(__V1, __V2), _mm_cmplt_epi16(_mm_sub_epi16(__V1, __shift), _mm_sub_epi16(__V2, __shift))));
+#else
+  return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);
+#endif
+}
+
+static __inline__  __m128i __DEFAULT_FN_ATTRS
+_mm_min_epi32 (__m128i __V1, __m128i __V2)
+{
+#ifdef __EMSCRIPTEN__
+  return _mm_xor_si128(__V2, _mm_and_si128(_mm_xor_si128(__V1, __V2), _mm_cmplt_epi32(__V1, __V2)));
+#else
+  return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);
+#endif
+}
+
+static __inline__  __m128i __DEFAULT_FN_ATTRS
+_mm_max_epi32 (__m128i __V1, __m128i __V2)
+{
+#ifdef __EMSCRIPTEN__
+  return _mm_xor_si128(__V1, _mm_and_si128(_mm_xor_si128(__V1, __V2), _mm_cmplt_epi32(__V1, __V2)));
+#else
+  return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);
+#endif
+}
+
+static __inline__  __m128i __DEFAULT_FN_ATTRS
+_mm_min_epu32 (__m128i __V1, __m128i __V2)
+{
+#ifdef __EMSCRIPTEN__
+  __m128 __shift = (__m128)emscripten_int32x4_splat((int)0x80000000U);
+  return _mm_xor_si128(__V2, _mm_and_si128(_mm_xor_si128(__V1, __V2), _mm_cmplt_epi32(_mm_sub_epi16(__V1, __shift), _mm_sub_epi32(__V2, __shift))));
+#else
+  return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);
+#endif
+}
+
+static __inline__  __m128i __DEFAULT_FN_ATTRS
+_mm_max_epu32 (__m128i __V1, __m128i __V2)
+{
+#ifdef __EMSCRIPTEN__
+  __m128 __shift = (__m128)emscripten_int32x4_splat((int)0x80000000U);
+  return _mm_xor_si128(__V1, _mm_and_si128(_mm_xor_si128(__V1, __V2), _mm_cmplt_epi32(_mm_sub_epi16(__V1, __shift), _mm_sub_epi32(__V2, __shift))));
+#else
+  return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2);
+#endif
+}
+
+/* SSE4 Insertion and Extraction from XMM Register Instructions.  */
+#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
+#define _mm_extract_ps(X, N) (__extension__                      \
+                              ({ union { int __i; float __f; } __t;  \
+                                 __v4sf __a = (__v4sf)(__m128)(X);       \
+                                 __t.__f = __a[(N) & 3];                 \
+                                 __t.__i;}))
+
+/* Miscellaneous insert and extract macros.  */
+/* Extract a single-precision float from X at index N into D.  */
+#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)(X); \
+                                                    (D) = __a[N]; }))
+
+/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create
+   an index suitable for _mm_insert_ps.  */
+#define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z))
+
+/* Extract a float from X at index N into the first index of the return.  */
+#define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X),   \
+                                             _MM_MK_INSERTPS_NDX((N), 0, 0x0e))
+
+/* Insert int into packed integer array at index.  */
+#define _mm_insert_epi8(X, I, N) (__extension__                           \
+                                  ({ __v16qi __a = (__v16qi)(__m128i)(X); \
+                                     __a[(N) & 15] = (I);                 \
+                                     __a;}))
+#define _mm_insert_epi32(X, I, N) (__extension__                         \
+                                   ({ __v4si __a = (__v4si)(__m128i)(X); \
+                                      __a[(N) & 3] = (I);                \
+                                      __a;}))
+#ifdef __x86_64__
+#define _mm_insert_epi64(X, I, N) (__extension__                         \
+                                   ({ __v2di __a = (__v2di)(__m128i)(X); \
+                                      __a[(N) & 1] = (I);                \
+                                      __a;}))
+#endif /* __x86_64__ */
+
+/* Extract int from packed integer array at index.  This returns the element
+ * as a zero extended value, so it is unsigned.
+ */
+#define _mm_extract_epi8(X, N) (__extension__                           \
+                                ({ __v16qi __a = (__v16qi)(__m128i)(X); \
+                                   (int)(unsigned char) __a[(N) & 15];}))
+#define _mm_extract_epi32(X, N) (__extension__                         \
+                                 ({ __v4si __a = (__v4si)(__m128i)(X); \
+                                    (int)__a[(N) & 3];}))
+#ifdef __x86_64__
+#define _mm_extract_epi64(X, N) (__extension__                         \
+                                 ({ __v2di __a = (__v2di)(__m128i)(X); \
+                                    (long long)__a[(N) & 1];}))
+#endif /* __x86_64 */
+
+/* SSE4 128-bit Packed Integer Comparisons.  */
+static __inline__ int __DEFAULT_FN_ATTRS
+_mm_testz_si128(__m128i __M, __m128i __V)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
+#endif
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS
+_mm_testc_si128(__m128i __M, __m128i __V)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
+#endif
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS
+_mm_testnzc_si128(__m128i __M, __m128i __V)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
+#endif
+}
+
+#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))
+#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
+#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
+
+/* SSE4 64-bit Packed Integer Comparisons.  */
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cmpeq_epi64(__m128i __V1, __m128i __V2)
+{
+  return (__m128i)((__v2di)__V1 == (__v2di)__V2);
+}
+
+/* SSE4 Packed Integer Sign-Extension.  */
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepi8_epi16(__m128i __V)
+{
+  /* This function always performs a signed extension, but __v16qi is a char
+     which may be signed or unsigned, so use __v16qs. */
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepi8_epi32(__m128i __V)
+{
+  /* This function always performs a signed extension, but __v16qi is a char
+     which may be signed or unsigned, so use __v16qs. */
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepi8_epi64(__m128i __V)
+{
+  /* This function always performs a signed extension, but __v16qi is a char
+     which may be signed or unsigned, so use __v16qs. */
+  typedef signed char __v16qs __attribute__((__vector_size__(16)));
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepi16_epi32(__m128i __V)
+{
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepi16_epi64(__m128i __V)
+{
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepi32_epi64(__m128i __V)
+{
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di);
+}
+
+/* SSE4 Packed Integer Zero-Extension.  */
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepu8_epi16(__m128i __V)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return (__m128i) __builtin_ia32_pmovzxbw128((__v16qi) __V);
+#endif
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepu8_epi32(__m128i __V)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return (__m128i) __builtin_ia32_pmovzxbd128((__v16qi)__V);
+#endif
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepu8_epi64(__m128i __V)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return (__m128i) __builtin_ia32_pmovzxbq128((__v16qi)__V);
+#endif
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepu16_epi32(__m128i __V)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return (__m128i) __builtin_ia32_pmovzxwd128((__v8hi)__V);
+#endif
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepu16_epi64(__m128i __V)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return (__m128i) __builtin_ia32_pmovzxwq128((__v8hi)__V);
+#endif
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepu32_epi64(__m128i __V)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return (__m128i) __builtin_ia32_pmovzxdq128((__v4si)__V);
+#endif
+}
+
+/* SSE4 Pack with Unsigned Saturation.  */
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_packus_epi32(__m128i __V1, __m128i __V2)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2);
+#endif
+}
+
+/* SSE4 Multiple Packed Sums of Absolute Difference.  */
+#define _mm_mpsadbw_epu8(X, Y, M) __extension__ ({ \
+  (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \
+                                      (__v16qi)(__m128i)(Y), (M)); })
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_minpos_epu16(__m128i __V)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V);
+#endif
+}
+
+/* Handle the sse4.2 definitions here. */
+
+/* These definitions are normally in nmmintrin.h, but gcc puts them in here
+   so we'll do the same.  */
+
+#undef __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
+
+/* These specify the type of data that we're comparing.  */
+#define _SIDD_UBYTE_OPS                 0x00
+#define _SIDD_UWORD_OPS                 0x01
+#define _SIDD_SBYTE_OPS                 0x02
+#define _SIDD_SWORD_OPS                 0x03
+
+/* These specify the type of comparison operation.  */
+#define _SIDD_CMP_EQUAL_ANY             0x00
+#define _SIDD_CMP_RANGES                0x04
+#define _SIDD_CMP_EQUAL_EACH            0x08
+#define _SIDD_CMP_EQUAL_ORDERED         0x0c
+
+/* These macros specify the polarity of the operation.  */
+#define _SIDD_POSITIVE_POLARITY         0x00
+#define _SIDD_NEGATIVE_POLARITY         0x10
+#define _SIDD_MASKED_POSITIVE_POLARITY  0x20
+#define _SIDD_MASKED_NEGATIVE_POLARITY  0x30
+
+/* These macros are used in _mm_cmpXstri() to specify the return.  */
+#define _SIDD_LEAST_SIGNIFICANT         0x00
+#define _SIDD_MOST_SIGNIFICANT          0x40
+
+/* These macros are used in _mm_cmpXstri() to specify the return.  */
+#define _SIDD_BIT_MASK                  0x00
+#define _SIDD_UNIT_MASK                 0x40
+
+/* SSE4.2 Packed Comparison Intrinsics.  */
+#define _mm_cmpistrm(A, B, M) \
+  (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \
+                                       (__v16qi)(__m128i)(B), (int)(M))
+#define _mm_cmpistri(A, B, M) \
+  (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \
+                                   (__v16qi)(__m128i)(B), (int)(M))
+
+#define _mm_cmpestrm(A, LA, B, LB, M) \
+  (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \
+                                       (__v16qi)(__m128i)(B), (int)(LB), \
+                                       (int)(M))
+#define _mm_cmpestri(A, LA, B, LB, M) \
+  (int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \
+                                   (__v16qi)(__m128i)(B), (int)(LB), \
+                                   (int)(M))
+
+/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading.  */
+#define _mm_cmpistra(A, B, M) \
+  (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \
+                                    (__v16qi)(__m128i)(B), (int)(M))
+#define _mm_cmpistrc(A, B, M) \
+  (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \
+                                    (__v16qi)(__m128i)(B), (int)(M))
+#define _mm_cmpistro(A, B, M) \
+  (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \
+                                    (__v16qi)(__m128i)(B), (int)(M))
+#define _mm_cmpistrs(A, B, M) \
+  (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \
+                                    (__v16qi)(__m128i)(B), (int)(M))
+#define _mm_cmpistrz(A, B, M) \
+  (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \
+                                    (__v16qi)(__m128i)(B), (int)(M))
+
+#define _mm_cmpestra(A, LA, B, LB, M) \
+  (int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \
+                                    (__v16qi)(__m128i)(B), (int)(LB), \
+                                    (int)(M))
+#define _mm_cmpestrc(A, LA, B, LB, M) \
+  (int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \
+                                    (__v16qi)(__m128i)(B), (int)(LB), \
+                                    (int)(M))
+#define _mm_cmpestro(A, LA, B, LB, M) \
+  (int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \
+                                    (__v16qi)(__m128i)(B), (int)(LB), \
+                                    (int)(M))
+#define _mm_cmpestrs(A, LA, B, LB, M) \
+  (int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \
+                                    (__v16qi)(__m128i)(B), (int)(LB), \
+                                    (int)(M))
+#define _mm_cmpestrz(A, LA, B, LB, M) \
+  (int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \
+                                    (__v16qi)(__m128i)(B), (int)(LB), \
+                                    (int)(M))
+
+/* SSE4.2 Compare Packed Data -- Greater Than.  */
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cmpgt_epi64(__m128i __V1, __m128i __V2)
+{
+  return (__m128i)((__v2di)__V1 > (__v2di)__V2);
+}
+
+/* SSE4.2 Accumulate CRC32.  */
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_mm_crc32_u8(unsigned int __C, unsigned char __D)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return __builtin_ia32_crc32qi(__C, __D);
+#endif
+}
+
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_mm_crc32_u16(unsigned int __C, unsigned short __D)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return __builtin_ia32_crc32hi(__C, __D);
+#endif
+}
+
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_mm_crc32_u32(unsigned int __C, unsigned int __D)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return __builtin_ia32_crc32si(__C, __D);
+#endif
+}
+
+#ifdef __x86_64__
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_mm_crc32_u64(unsigned long long __C, unsigned long long __D)
+{
+#ifdef __EMSCRIPTEN__
+  //
+#else
+  return __builtin_ia32_crc32di(__C, __D);
+#endif
+}
+#endif /* __x86_64__ */
+
+#undef __DEFAULT_FN_ATTRS
+
+#ifdef __POPCNT__
+#include <popcntintrin.h>
+#endif
+
+#endif /* _SMMINTRIN_H */
diff --git a/system/include/emscripten/tmmintrin.h b/system/include/emscripten/tmmintrin.h
new file mode 100644
index 0000000000000..331ad212e718c
--- /dev/null
+++ b/system/include/emscripten/tmmintrin.h
@@ -0,0 +1,362 @@
+/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __TMMINTRIN_H
+#define __TMMINTRIN_H
+
+#include <pmmintrin.h>
+
+#ifndef __SSSE3__
+#error "SSSE3 instruction set not enabled"
+#endif
+
+/* Define the default attributes for the functions in this file. */
+#ifdef __EMSCRIPTEN__
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#else
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3")))
+#endif
+
+#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */
+static __inline__ __m64 __DEFAULT_FN_ATTRS
+_mm_abs_pi8(__m64 __a)
+{
+    return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
+}
+#endif
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_abs_epi8(__m128i __a)
+{
+#ifdef __EMSCRIPTEN__
+    __m128i __mask = (__m128i)emscripten_int8x16_shiftRightByScalar((int8x16)__a, 7);
+    return _mm_xor_si128(_mm_add_epi8(__a, __mask), __mask);
+#else
+    return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
+#endif
+}
+
+#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */
+static __inline__ __m64 __DEFAULT_FN_ATTRS
+_mm_abs_pi16(__m64 __a)
+{
+    return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
+}
+#endif
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_abs_epi16(__m128i __a)
+{
+#ifdef __EMSCRIPTEN__
+    __m128i __mask = (__m128i)emscripten_int16x8_shiftRightByScalar((int16x8)__a, 15);
+    return _mm_xor_si128(_mm_add_epi16(__a, __mask), __mask);
+#else
+    return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
+#endif
+}
+
+#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */
+static __inline__ __m64 __DEFAULT_FN_ATTRS
+_mm_abs_pi32(__m64 __a)
+{
+    return (__m64)__builtin_ia32_pabsd((__v2si)__a);
+}
+#endif
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_abs_epi32(__m128i __a)
+{
+#ifdef __EMSCRIPTEN__
+    __m128i __mask = (__m128i)emscripten_int32x4_shiftRightByScalar((int32x4)__a, 31);
+    return _mm_xor_si128(_mm_add_epi32(__a, __mask), __mask);
+#else
+    return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
+#endif
+}
+
+#ifdef __EMSCRIPTEN__
+#define _mm_alignr_epi8(__a, __b, __count) \
+    ((__count <= 16) \
+    ? (_mm_or_si128(_mm_bslli_si128((__a), 16 - (((unsigned int)(__count)) & 0xFF)), _mm_bsrli_si128((__b), (((unsigned int)(__count)) & 0xFF)))) \
+    : (_mm_bsrli_si128((__a), (((unsigned int)(__count)) & 0xFF) - 16)))
+#else
+#define _mm_alignr_epi8(a, b, n) __extension__ ({ \
+  (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
+                                     (__v16qi)(__m128i)(b), (n)); })
+#endif
+
+#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */
+#define _mm_alignr_pi8(a, b, n) __extension__ ({ \
+  (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); })
+#endif
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_hadd_epi16(__m128i __a, __m128i __b)
+{
+#ifdef __EMSCRIPTEN__
+    return _mm_add_epi16(__builtin_shufflevector((int16x8)__a, (int16x8)__b, 0, 2, 4, 6, 8, 10, 12, 14), __builtin_shufflevector((int16x8)__a, (int16x8)__b, 1, 3, 5, 7, 9, 11, 13, 15));
+#else
+    return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
+#endif
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_hadd_epi32(__m128i __a, __m128i __b)
+{
+#ifdef __EMSCRIPTEN__
+    return _mm_add_epi32(__builtin_shufflevector(__a, __b, 0, 2, 4, 6), __builtin_shufflevector(__a, __b, 1, 3, 5, 7));
+#else
+    return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
+#endif
+}
+
+#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */
+static __inline__ __m64 __DEFAULT_FN_ATTRS
+_mm_hadd_pi16(__m64 __a, __m64 __b)
+{
+    return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
+}
+
+static __inline__ __m64 __DEFAULT_FN_ATTRS
+_mm_hadd_pi32(__m64 __a, __m64 __b)
+{
+    return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
+}
+#endif
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_hadds_epi16(__m128i __a, __m128i __b)
+{
+#ifdef __EMSCRIPTEN__
+    return (__m128i)emscripten_int16x8_addSaturate(__builtin_shufflevector((int16x8)__a, (int16x8)__b, 0, 2, 4, 6, 8, 10, 12, 14), __builtin_shufflevector((int16x8)__a, (int16x8)__b, 1, 3, 5, 7, 9, 11, 13, 15));
+#else
+    return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
+#endif
+}
+
+#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */
+static __inline__ __m64 __DEFAULT_FN_ATTRS
+_mm_hadds_pi16(__m64 __a, __m64 __b)
+{
+    return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
+}
+#endif
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_hsub_epi16(__m128i __a, __m128i __b)
+{
+#ifdef __EMSCRIPTEN__
+     return _mm_sub_epi16(__builtin_shufflevector((int16x8)__a, (int16x8)__b, 0, 2, 4, 6, 8, 10, 12, 14), __builtin_shufflevector((int16x8)__a, (int16x8)__b, 1, 3, 5, 7, 9, 11, 13, 15));
+#else
+    return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
+#endif
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_hsub_epi32(__m128i __a, __m128i __b)
+{
+#ifdef __EMSCRIPTEN__
+    return _mm_sub_epi32(__builtin_shufflevector(__a, __b, 0, 2, 4, 6), __builtin_shufflevector(__a, __b, 1, 3, 5, 7));
+#else
+    return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
+#endif
+}
+
+#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */
+static __inline__ __m64 __DEFAULT_FN_ATTRS
+_mm_hsub_pi16(__m64 __a, __m64 __b)
+{
+    return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
+}
+
+static __inline__ __m64 __DEFAULT_FN_ATTRS
+_mm_hsub_pi32(__m64 __a, __m64 __b)
+{
+    return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
+}
+#endif
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_hsubs_epi16(__m128i __a, __m128i __b)
+{
+#ifdef __EMSCRIPTEN__
+     return (__m128i)emscripten_int16x8_subSaturate(__builtin_shufflevector((int16x8)__a, (int16x8)__b, 0, 2, 4, 6, 8, 10, 12, 14), __builtin_shufflevector((int16x8)__a, (int16x8)__b, 1, 3, 5, 7, 9, 11, 13, 15));
+#else
+    return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
+#endif
+}
+
+#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */
+static __inline__ __m64 __DEFAULT_FN_ATTRS
+_mm_hsubs_pi16(__m64 __a, __m64 __b)
+{
+    return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
+}
+#endif
+
+#ifdef __EMSCRIPTEN__
+static __inline__ short __DEFAULT_FN_ATTRS
+__Saturate_To_Int16(int __x)
+{
+    return __x <= -32768 ? -32768 : (__x >= 32767 ? 32767 : __x);
+}
+#endif
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maddubs_epi16(__m128i __a, __m128i __b)
+{
+#ifdef __EMSCRIPTEN__
+    union {
+      char __x[16];
+      short __s[8];
+      __m128i __m;
+    } __src, __src2, __dst;
+    __src.__m = __a;
+    __src2.__m = __b;
+    for(int __i = 0; __i < 16; __i += 2)
+        __dst.__s[__i>>1] = __Saturate_To_Int16((unsigned char)__src.__x[__i+1] * __src2.__x[__i+1] + (unsigned char)__src.__x[__i] * __src2.__x[__i]);
+    return __dst.__m;
+#else
+    return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
+#endif
+}
+
+#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */
+static __inline__ __m64 __DEFAULT_FN_ATTRS
+_mm_maddubs_pi16(__m64 __a, __m64 __b)
+{
+    return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
+}
+#endif
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mulhrs_epi16(__m128i __a, __m128i __b)
+{
+#ifdef __EMSCRIPTEN__
+    union {
+      short __x[8];
+      __m128i __m;
+    } __src, __src2, __dst;
+    __src.__m = __a;
+    __src2.__m = __b;
+    for(int __i = 0; __i < 8; ++__i)
+        __dst.__x[__i] = (((__src.__x[__i] * __src2.__x[__i]) >> 14) + 1) >> 1;
+    return __dst.__m;
+#else
+    return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
+#endif
+}
+
+#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */
+static __inline__ __m64 __DEFAULT_FN_ATTRS
+_mm_mulhrs_pi16(__m64 __a, __m64 __b)
+{
+    return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
+}
+#endif
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_shuffle_epi8(__m128i __a, __m128i __b)
+{
+#ifdef __EMSCRIPTEN__
+    union {
+      unsigned char __x[16];
+      __m128i __m;
+    } __src, __src2, __dst;
+    __src.__m = __a;
+    __src2.__m = __b;
+    for(int __i = 0; __i < 16; ++__i)
+        __dst.__x[__i] = (__src2.__x[__i] & 0x80) ? 0 : __src.__x[__src2.__x[__i]&15];
+    return __dst.__m;
+#else
+    return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
+#endif
+}
+
+#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */
+static __inline__ __m64 __DEFAULT_FN_ATTRS
+_mm_shuffle_pi8(__m64 __a, __m64 __b)
+{
+    return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
+}
+#endif
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_sign_epi8(__m128i __a, __m128i __b)
+{
+#ifdef __EMSCRIPTEN__
+    __m128i __mask = (__m128i)emscripten_int8x16_shiftRightByScalar((int8x16)__b, 7);
+    __m128i __zeromask = (__m128i)emscripten_int8x16_notEqual((int8x16)__b, emscripten_int8x16_splat(0));
+    return _mm_and_si128(__zeromask, _mm_xor_si128(_mm_add_epi8(__a, __mask), __mask));
+#else
+    return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
+#endif
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_sign_epi16(__m128i __a, __m128i __b)
+{
+#ifdef __EMSCRIPTEN__
+    __m128i __mask = (__m128i)emscripten_int16x8_shiftRightByScalar((int16x8)__b, 15);
+    __m128i __zeromask = (__m128i)emscripten_int16x8_notEqual((int16x8)__b, emscripten_int16x8_splat(0));
+    return _mm_and_si128(__zeromask, _mm_xor_si128(_mm_add_epi16(__a, __mask), __mask));
+#else
+    return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
+#endif
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_sign_epi32(__m128i __a, __m128i __b)
+{
+#ifdef __EMSCRIPTEN__
+    __m128i __mask = (__m128i)emscripten_int32x4_shiftRightByScalar((int32x4)__b, 31);
+    __m128i __zeromask = (__m128i)emscripten_int32x4_notEqual((int32x4)__b, emscripten_int32x4_splat(0));
+    return _mm_and_si128(__zeromask, _mm_xor_si128(_mm_add_epi32(__a, __mask), __mask));
+#else
+    return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
+#endif
+}
+
+#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */
+static __inline__ __m64 __DEFAULT_FN_ATTRS
+_mm_sign_pi8(__m64 __a, __m64 __b)
+{
+    return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
+}
+
+static __inline__ __m64 __DEFAULT_FN_ATTRS
+_mm_sign_pi16(__m64 __a, __m64 __b)
+{
+    return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
+}
+
+static __inline__ __m64 __DEFAULT_FN_ATTRS
+_mm_sign_pi32(__m64 __a, __m64 __b)
+{
+    return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
+}
+#endif
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif /* __TMMINTRIN_H */
diff --git a/system/include/emscripten/x86intrin.h b/system/include/emscripten/x86intrin.h
new file mode 100644
index 0000000000000..bf18cf56dea6e
--- /dev/null
+++ b/system/include/emscripten/x86intrin.h
@@ -0,0 +1,28 @@
+#ifndef __X86INTRIN_H
+#define __X86INTRIN_H
+
+// x86intrin.h is the standard include-all for all supported intrinsics.
+
+#if __SSE__
+#include <xmmintrin.h>
+#else
+#warning x86intrin.h included without SIMD.js support enabled.
+#endif
+
+#if __SSE2__
+#include <emmintrin.h>
+#endif
+
+#if __SSE3__
+#include <pmmintrin.h>
+#endif
+
+#if __SSSE3__
+#include <tmmintrin.h>
+#endif
+
+#if __SSE4_1__
+#include <smmintrin.h>
+#endif
+
+#endif
diff --git a/system/include/emscripten/xmmintrin.h b/system/include/emscripten/xmmintrin.h
index 52641e58a84b9..6266dea13bd97 100644
--- a/system/include/emscripten/xmmintrin.h
+++ b/system/include/emscripten/xmmintrin.h
@@ -125,7 +125,12 @@ _mm_store_ps(float *__p, __m128 __a)
 #define _MM_HINT_T2 1
 #define _MM_HINT_NTA 0
 // No prefetch available, dummy it out.
-#define _mm_prefetch(a, sel) ((void)0)
+static __inline__ void __attribute__((__always_inline__))
+_mm_prefetch(void *__p, int __i)
+{
+  ((void)__p);
+  ((void)__i);
+}
 
 static __inline__ void __attribute__((__always_inline__))
 _mm_sfence(void)
diff --git a/system/lib/libc/musl/arch/emscripten/bits/float.h b/system/lib/libc/musl/arch/emscripten/bits/float.h
index 89e9eb6efcfa5..53ec2d10876e6 100644
--- a/system/lib/libc/musl/arch/emscripten/bits/float.h
+++ b/system/lib/libc/musl/arch/emscripten/bits/float.h
@@ -1,17 +1,17 @@
 #define FLT_ROUNDS 1
-#define FLT_EVAL_METHOD 0
+#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__
 
-#define LDBL_TRUE_MIN 4.9406564584124654e-324
-#define LDBL_MIN 2.2250738585072014e-308
-#define LDBL_MAX 1.7976931348623157e+308
-#define LDBL_EPSILON 2.2204460492503131e-16
+#define LDBL_TRUE_MIN __LDBL_DENORM_MIN__
+#define LDBL_MIN __LDBL_MIN__
+#define LDBL_MAX __LDBL_MAX__
+#define LDBL_EPSILON __LDBL_EPSILON__
 
-#define LDBL_MANT_DIG 53
-#define LDBL_MIN_EXP (-1021)
-#define LDBL_MAX_EXP 1024
+#define LDBL_MANT_DIG __LDBL_MANT_DIG__
+#define LDBL_MIN_EXP __LDBL_MIN_EXP__
+#define LDBL_MAX_EXP __LDBL_MAX_EXP__
 
-#define LDBL_DIG 15
-#define LDBL_MIN_10_EXP (-307)
-#define LDBL_MAX_10_EXP 308
+#define LDBL_DIG __LDBL_DIG__
+#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__
+#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__
 
-#define DECIMAL_DIG 17
+#define DECIMAL_DIG __DECIMAL_DIG__
diff --git a/tests/aniso.c b/tests/aniso.c
index b925149d3d369..1aa9e5f2095fc 100644
--- a/tests/aniso.c
+++ b/tests/aniso.c
@@ -219,6 +219,8 @@ int main(int argc, char *argv[])
     SDL_Quit();
 
     // check for asm compilation bug with aliased functions with different sigs
+
+    glBegin( GL_TRIANGLE_STRIP );
     void (*f)(int, int) = glVertex2i;
     if ((int)f % 16 == 4) f(5, 7);
     void (*g)(int, int) = glVertex3f;
diff --git a/tests/cases/rust_struct.ll b/tests/cases/rust_struct.ll
new file mode 100644
index 0000000000000..9b24b6c439820
--- /dev/null
+++ b/tests/cases/rust_struct.ll
@@ -0,0 +1,39 @@
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+@.str = private unnamed_addr constant [9 x i8] c"*%d,%d*\0A\00" ; [#uses=1]
+
+define {i32, i32} @read_pair({i32, i32}* %ptr) norecurse nounwind readonly uwtable {
+entry:
+  %value = load {i32, i32}, {i32, i32}* %ptr, align 4
+  ret {i32, i32} %value
+}
+
+; [#uses=0]
+define i32 @main() {
+entry:
+  %a = alloca {i32, i32}, align 4
+  %a0 = getelementptr {i32, i32}, {i32, i32}* %a, i32 0, i32 0
+  %a1 = getelementptr {i32, i32}, {i32, i32}* %a, i32 0, i32 1
+  %b = alloca {i32, i32}, align 4
+  %b0 = getelementptr {i32, i32}, {i32, i32}* %b, i32 0, i32 0
+  %b1 = getelementptr {i32, i32}, {i32, i32}* %b, i32 0, i32 1
+  
+  ; Initialize a with {1234, 5678} and b with {0, 0}
+  store i32 1234, i32* %a0, align 4
+  store i32 5678, i32* %a1, align 4
+  store i32 0, i32* %b0, align 4
+  store i32 0, i32* %b1, align 4
+  
+  ; This call should remain in the output.
+  %v = call {i32, i32} @read_pair({i32, i32}* %a)
+  store {i32, i32} %v, {i32, i32}* %b, align 4
+  
+  %b0v = load i32, i32* %b0, align 4
+  %b1v = load i32, i32* %b1, align 4
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %b0v, i32 %b1v) ; [#uses=0 type=i32]
+  ret i32 1
+}
+
+; [#uses=1]
+declare i32 @printf(i8*, ...)
diff --git a/tests/cases/rust_struct.txt b/tests/cases/rust_struct.txt
new file mode 100644
index 0000000000000..e05ddb157f22e
--- /dev/null
+++ b/tests/cases/rust_struct.txt
@@ -0,0 +1 @@
+*1234,5678*
diff --git a/tests/openal_playback.cpp b/tests/openal_playback.cpp
index 46c4f8a365bbf..116ed004d28c0 100644
--- a/tests/openal_playback.cpp
+++ b/tests/openal_playback.cpp
@@ -59,6 +59,16 @@ int main() {
   alListenerfv(AL_VELOCITY, listenerVel);
   alListenerfv(AL_ORIENTATION, listenerOri);
 
+  // check getting and setting global gain
+  ALfloat volume;
+  alGetListenerf(AL_GAIN, &volume);
+  assert(volume == 1.0);
+  alListenerf(AL_GAIN, 0.0);
+  alGetListenerf(AL_GAIN, &volume);
+  assert(volume == 0.0);
+
+  alListenerf(AL_GAIN, 1.0); // reset gain to default
+
   ALuint buffers[1];
 
   alGenBuffers(1, buffers);
diff --git a/tests/optimizer/test-function-eliminator-double-parsed-correctly-output.js b/tests/optimizer/test-function-eliminator-double-parsed-correctly-output.js
new file mode 100644
index 0000000000000..adab68feb486a
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-double-parsed-correctly-output.js
@@ -0,0 +1,14 @@
+// EMSCRIPTEN_START_ASM
+var asm = (function(global, env, buffer) {
+ "use asm";
+ var e = 0;
+ 
+// EMSCRIPTEN_START_FUNCS
+function a() {
+ var c = 0.0;
+ return 0;
+}
+// EMSCRIPTEN_END_FUNCS
+ var f = 0;
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
+// EMSCRIPTEN_END_ASM
diff --git a/tests/optimizer/test-function-eliminator-double-parsed-correctly.js b/tests/optimizer/test-function-eliminator-double-parsed-correctly.js
new file mode 100644
index 0000000000000..3ebb47215ab05
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-double-parsed-correctly.js
@@ -0,0 +1,22 @@
+// EMSCRIPTEN_START_ASM
+var asm = (function(global, env, buffer) {
+ "use asm";
+ var e = 0;
+ 
+// EMSCRIPTEN_START_FUNCS
+ function a() {
+  var c = +0;
+  return 0;
+ }
+ function b() {
+  var c = +0;
+  return 0;
+ }
+// EMSCRIPTEN_END_FUNCS
+ var f = 0;
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
+// EMSCRIPTEN_END_ASM
+// EMSCRIPTEN_GENERATED_FUNCTIONS
+
+
+
diff --git a/tests/optimizer/test-function-eliminator-replace-array-value-output.js b/tests/optimizer/test-function-eliminator-replace-array-value-output.js
new file mode 100644
index 0000000000000..f320d4589f958
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-replace-array-value-output.js
@@ -0,0 +1,25 @@
+// EMSCRIPTEN_START_ASM
+var asm = (function(global, env, buffer) {
+ "use asm";
+ 
+// EMSCRIPTEN_START_FUNCS
+function d() {
+ a();
+ e();
+ return;
+}
+
+function c() {
+ a();
+ return;
+}
+
+function a() {
+ return 0;
+}
+
+// EMSCRIPTEN_END_FUNCS
+
+ var f = [ a ];
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
+// EMSCRIPTEN_END_ASM
diff --git a/tests/optimizer/test-function-eliminator-replace-array-value-with-hash-info.js b/tests/optimizer/test-function-eliminator-replace-array-value-with-hash-info.js
new file mode 100644
index 0000000000000..56946b51da1e8
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-replace-array-value-with-hash-info.js
@@ -0,0 +1,32 @@
+var asm = (function(global, env, buffer) {
+"use asm";
+function a()
+{
+return 0;
+}
+
+function b()
+{
+return 0;
+}
+
+function c()
+{
+  a();
+  return;
+}
+
+function d()
+{
+  b();
+  
+  // We expect that b gets replaced by a below
+  var f = [b];
+  e();
+
+  return;
+}
+
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
+
+// {"b":"a"}
diff --git a/tests/optimizer/test-function-eliminator-replace-array-value.js b/tests/optimizer/test-function-eliminator-replace-array-value.js
new file mode 100644
index 0000000000000..d10d3cf3cd0b4
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-replace-array-value.js
@@ -0,0 +1,23 @@
+// EMSCRIPTEN_START_ASM
+var asm = (function(global, env, buffer) {
+ "use asm";
+// EMSCRIPTEN_START_FUNCS
+ function a() {
+  return 0;
+ }
+ function b() {
+  return 0;
+ }
+ function c() {
+  a();
+  return;
+ }
+ function d() {
+  b();
+  e();
+  return;
+ }
+// EMSCRIPTEN_END_FUNCS
+  var f = [ b ];
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
+// EMSCRIPTEN_END_ASM
\ No newline at end of file
diff --git a/tests/optimizer/test-function-eliminator-replace-function-call-output-with-hash-info.js b/tests/optimizer/test-function-eliminator-replace-function-call-output-with-hash-info.js
new file mode 100644
index 0000000000000..381dbe37a8356
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-replace-function-call-output-with-hash-info.js
@@ -0,0 +1,22 @@
+var asm = (function(global, env, buffer) {
+"use asm";
+function a()
+{
+return 0;
+}
+
+function c()
+{
+  a();
+  return;
+}
+
+function d()
+{
+  a();
+  return;
+}
+
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
+
+// {"d":"c"}
diff --git a/tests/optimizer/test-function-eliminator-replace-function-call-output.js b/tests/optimizer/test-function-eliminator-replace-function-call-output.js
new file mode 100644
index 0000000000000..2ddfffb20732d
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-replace-function-call-output.js
@@ -0,0 +1,14 @@
+var asm = (function(global, env, buffer) {
+ "use asm";
+ function a() {
+  return 0;
+ }
+ function c() {
+  a();
+  return;
+ }
+ function d() {
+  a();
+  return;
+ }
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
diff --git a/tests/optimizer/test-function-eliminator-replace-function-call-two-passes-output.js b/tests/optimizer/test-function-eliminator-replace-function-call-two-passes-output.js
new file mode 100644
index 0000000000000..1d92f637aff9f
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-replace-function-call-two-passes-output.js
@@ -0,0 +1,10 @@
+var asm = (function(global, env, buffer) {
+ "use asm";
+ function a() {
+  return 0;
+ }
+ function c() {
+  a();
+  return;
+ }
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
diff --git a/tests/optimizer/test-function-eliminator-replace-function-call-with-hash-info.js b/tests/optimizer/test-function-eliminator-replace-function-call-with-hash-info.js
new file mode 100644
index 0000000000000..4d120095a97fe
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-replace-function-call-with-hash-info.js
@@ -0,0 +1,27 @@
+var asm = (function(global, env, buffer) {
+"use asm";
+function a()
+{
+return 0;
+}
+
+function b()
+{
+return 0;
+}
+
+function c()
+{
+  a();
+  return;
+}
+
+function d()
+{
+  b();
+  return;
+}
+
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
+
+// {"b":"a"}
diff --git a/tests/optimizer/test-function-eliminator-replace-function-call.js b/tests/optimizer/test-function-eliminator-replace-function-call.js
new file mode 100644
index 0000000000000..894ed56f87e32
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-replace-function-call.js
@@ -0,0 +1,20 @@
+var asm = (function(global, env, buffer) {
+ "use asm";
+ function a() {
+  return 0;
+ }
+ function b() {
+  return 0;
+ }
+ function c() {
+  a();
+  return;
+ }
+ function d() {
+  b();
+  return;
+ }
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
+
+
+
diff --git a/tests/optimizer/test-function-eliminator-replace-object-value-assignment-output.js b/tests/optimizer/test-function-eliminator-replace-object-value-assignment-output.js
new file mode 100644
index 0000000000000..da4ef7be78d80
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-replace-object-value-assignment-output.js
@@ -0,0 +1,18 @@
+var asm = (function(global, env, buffer) {
+ "use asm";
+ function a() {
+  return 0;
+ }
+ function c() {
+  a();
+  return;
+ }
+ function d() {
+  a();
+  var f = {
+   g: a
+  };
+  e();
+  return;
+ }
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
diff --git a/tests/optimizer/test-function-eliminator-replace-object-value-assignment-with-hash-info.js b/tests/optimizer/test-function-eliminator-replace-object-value-assignment-with-hash-info.js
new file mode 100644
index 0000000000000..1d9147415e286
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-replace-object-value-assignment-with-hash-info.js
@@ -0,0 +1,34 @@
+var asm = (function(global, env, buffer) {
+"use asm";
+function a()
+{
+return 0;
+}
+
+function b()
+{
+return 0;
+}
+
+function c()
+{
+  a();
+  return;
+}
+
+function d()
+{
+  b();
+  
+  // We expect that b gets replaced by a below
+  var f = {
+    g: b
+  };
+  e();
+
+  return;
+}
+
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
+
+// {"b":"a"}
diff --git a/tests/optimizer/test-function-eliminator-replace-object-value-assignment.js b/tests/optimizer/test-function-eliminator-replace-object-value-assignment.js
new file mode 100644
index 0000000000000..6bd51dd9696f5
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-replace-object-value-assignment.js
@@ -0,0 +1,24 @@
+var asm = (function(global, env, buffer) {
+ "use asm";
+ function a() {
+  return 0;
+ }
+ function b() {
+  return 0;
+ }
+ function c() {
+  a();
+  return;
+ }
+ function d() {
+  b();
+  var f = {
+   g: b
+  };
+  e();
+  return;
+ }
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
+
+
+
diff --git a/tests/optimizer/test-function-eliminator-replace-variable-value-output.js b/tests/optimizer/test-function-eliminator-replace-variable-value-output.js
new file mode 100644
index 0000000000000..de2ae5a17c94c
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-replace-variable-value-output.js
@@ -0,0 +1,16 @@
+var asm = (function(global, env, buffer) {
+ "use asm";
+ function a() {
+  return 0;
+ }
+ function c() {
+  a();
+  return;
+ }
+ function d() {
+  a();
+  var e = a;
+  e();
+  return;
+ }
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
diff --git a/tests/optimizer/test-function-eliminator-replace-variable-value-with-hash-info.js b/tests/optimizer/test-function-eliminator-replace-variable-value-with-hash-info.js
new file mode 100644
index 0000000000000..34f0292118df7
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-replace-variable-value-with-hash-info.js
@@ -0,0 +1,32 @@
+var asm = (function(global, env, buffer) {
+"use asm";
+function a()
+{
+return 0;
+}
+
+function b()
+{
+return 0;
+}
+
+function c()
+{
+  a();
+  return;
+}
+
+function d()
+{
+  b();
+  
+  // We expect that b gets replaced by a below
+  var e = b;  
+  e();
+
+  return;
+}
+
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
+
+// {"b" : "a"}
diff --git a/tests/optimizer/test-function-eliminator-replace-variable-value.js b/tests/optimizer/test-function-eliminator-replace-variable-value.js
new file mode 100644
index 0000000000000..5eb6664124597
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-replace-variable-value.js
@@ -0,0 +1,22 @@
+var asm = (function(global, env, buffer) {
+ "use asm";
+ function a() {
+  return 0;
+ }
+ function b() {
+  return 0;
+ }
+ function c() {
+  a();
+  return;
+ }
+ function d() {
+  b();
+  var e = b;
+  e();
+  return;
+ }
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
+
+
+
diff --git a/tests/optimizer/test-function-eliminator-simple-output.js b/tests/optimizer/test-function-eliminator-simple-output.js
new file mode 100644
index 0000000000000..2fa7c4ecace97
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-simple-output.js
@@ -0,0 +1,6 @@
+var asm = (function(global, env, buffer) {
+ "use asm";
+ function a() {
+  return 0;
+ }
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
diff --git a/tests/optimizer/test-function-eliminator-simple-with-hash-info.js b/tests/optimizer/test-function-eliminator-simple-with-hash-info.js
new file mode 100644
index 0000000000000..04a8576894913
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-simple-with-hash-info.js
@@ -0,0 +1,15 @@
+var asm = (function(global, env, buffer) {
+"use asm";
+function a()
+{
+return 0;
+}
+
+function b()
+{
+return 0;
+}
+
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
+
+// {"b":"a"}
diff --git a/tests/optimizer/test-function-eliminator-simple.js b/tests/optimizer/test-function-eliminator-simple.js
new file mode 100644
index 0000000000000..803047a6cf25b
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-simple.js
@@ -0,0 +1,12 @@
+var asm = (function(global, env, buffer) {
+ "use asm";
+ function a() {
+  return 0;
+ }
+ function b() {
+  return 0;
+ }
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
+
+
+
diff --git a/tests/optimizer/test-function-eliminator-variable-clash-output.js b/tests/optimizer/test-function-eliminator-variable-clash-output.js
new file mode 100644
index 0000000000000..55c8ef83298dc
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-variable-clash-output.js
@@ -0,0 +1,18 @@
+var asm = (function(global, env, buffer) {
+ "use asm";
+ function a() {
+  return 0;
+ }
+ function b() {
+  return 0;
+ }
+ function c() {
+  a();
+  return;
+ }
+ function d() {
+  var a = 0;
+  b();
+  return;
+ }
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
diff --git a/tests/optimizer/test-function-eliminator-variable-clash-with-hash-info.js b/tests/optimizer/test-function-eliminator-variable-clash-with-hash-info.js
new file mode 100644
index 0000000000000..f97be66b5da1f
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-variable-clash-with-hash-info.js
@@ -0,0 +1,31 @@
+var asm = (function(global, env, buffer) {
+"use asm";
+function a()
+{
+return 0;
+}
+
+function b()
+{
+return 0;
+}
+
+function c()
+{
+  a();
+  return;
+}
+
+function d()
+{
+  // Because a is used both as a variable and a function, we will
+  // not use a as a candidate for replacement, nor will we replace
+  // calls to b with a.
+  var a = 0;
+  b();
+  return;
+}
+
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
+
+// {}
diff --git a/tests/optimizer/test-function-eliminator-variable-clash.js b/tests/optimizer/test-function-eliminator-variable-clash.js
new file mode 100644
index 0000000000000..c0f5c17143c45
--- /dev/null
+++ b/tests/optimizer/test-function-eliminator-variable-clash.js
@@ -0,0 +1,21 @@
+var asm = (function(global, env, buffer) {
+ "use asm";
+ function a() {
+  return 0;
+ }
+ function b() {
+  return 0;
+ }
+ function c() {
+  a();
+  return;
+ }
+ function d() {
+  var a = 0;
+  b();
+  return;
+ }
+})(Module.asmGlobalArg, Module.asmLibraryArg, buffer);
+
+
+
diff --git a/tests/parallel_test_core.py b/tests/parallel_test_core.py
index 56520e00017d7..4e06f931d88a8 100755
--- a/tests/parallel_test_core.py
+++ b/tests/parallel_test_core.py
@@ -14,7 +14,16 @@
 assert not os.environ.get('EM_SAVE_DIR'), 'Need separate directories to avoid the parallel tests clashing'
 
 # run slower ones first, to optimize total time
-optimal_order = ['asm2i', 'asm2nn', 'asm3', 'asm2', 'asm2g', 'asm2f', 'asm1', 'default']
+optimal_order = [
+  'asm2i',
+  'asm2nn',
+  'asm3',
+  'asm2',
+  'asm2g',
+  'asm2f',
+  'asm1',
+  'default'
+]
 assert set(optimal_order) == set(test_modes), 'need to update the list of slowest modes'
 
 # set up a background thread to report progress
diff --git a/tests/runner.py b/tests/runner.py
index 03e1ee43a3b03..57094158d36b0 100755
--- a/tests/runner.py
+++ b/tests/runner.py
@@ -96,7 +96,16 @@ def path_from_root(*pathelems):
 
 # Core test runner class, shared between normal tests and benchmarks
 checked_sanity = False
-test_modes = ['default', 'asm1', 'asm2', 'asm3', 'asm2f', 'asm2g', 'asm2i', 'asm2nn']
+test_modes = [
+  'default',
+  'asm1',
+  'asm2',
+  'asm3',
+  'asm2f',
+  'asm2g',
+  'asm2i',
+  'asm2nn'
+]
 test_index = 0
 
 use_all_engines = os.environ.get('EM_ALL_ENGINES') # generally js engines are equivalent, testing 1 is enough. set this
@@ -323,24 +332,23 @@ def build(self, src, dirname, filename, output_processor=None, main_file=None, a
         assert ('/* memory initializer */' not in src) or ('/* memory initializer */ allocate([]' in src)
 
   def validate_asmjs(self, err):
-    if "asm.js type error: 'Int8x16' is not a standard SIMD type" in err:
-      err = err.replace("asm.js type error: 'Int8x16' is not a standard SIMD type", "")
-      print >> sys.stderr, "\nWARNING: ignoring asm.js type error from Int8x16 due to implementation not yet available in SpiderMonkey. See https://bugzilla.mozilla.org/show_bug.cgi?id=1136226\n"
-    if "asm.js type error: 'Int16x8' is not a standard SIMD type" in err:
-      err = err.replace("asm.js type error: 'Int16x8' is not a standard SIMD type", "")
-      print >> sys.stderr, "\nWARNING: ignoring asm.js type error from Int16x8 due to implementation not yet available in SpiderMonkey. See https://bugzilla.mozilla.org/show_bug.cgi?id=1136226\n"
-    if "asm.js type error: 'Uint8x16' is not a standard SIMD type" in err:
-      err = err.replace("asm.js type error: 'Uint8x16' is not a standard SIMD type", "")
-      print >> sys.stderr, "\nWARNING: ignoring asm.js type error from Uint8x16 due to implementation not yet available in SpiderMonkey. See https://bugzilla.mozilla.org/show_bug.cgi?id=1244117\n"
-    if "asm.js type error: 'Uint16x8' is not a standard SIMD type" in err:
-      err = err.replace("asm.js type error: 'Uint16x8' is not a standard SIMD type", "")
-      print >> sys.stderr, "\nWARNING: ignoring asm.js type error from Uint16x8 due to implementation not yet available in SpiderMonkey. See https://bugzilla.mozilla.org/show_bug.cgi?id=1244117\n"
-    if "asm.js type error: 'Uint32x4' is not a standard SIMD type" in err:
-      err = err.replace("asm.js type error: 'Uint32x4' is not a standard SIMD type", "")
-      print >> sys.stderr, "\nWARNING: ignoring asm.js type error from Uint32x4 due to implementation not yet available in SpiderMonkey. See https://bugzilla.mozilla.org/show_bug.cgi?id=1240796\n"
-    if "asm.js type error: 'Float64x2' is not a standard SIMD type" in err:
-      err = err.replace("asm.js type error: 'Float64x2' is not a standard SIMD type", "")
-      print >> sys.stderr, "\nWARNING: ignoring asm.js type error from Float64x2 due to implementation not yet available in SpiderMonkey. See https://bugzilla.mozilla.org/show_bug.cgi?id=1124205\n"
+    m = re.search("asm.js type error: '(\w+)' is not a (standard|supported) SIMD type", err)
+    if m:
+      # Bug numbers for missing SIMD types:
+      bugs = {
+        'Int8x16'  : 1136226,
+        'Int16x8'  : 1136226,
+        'Uint8x16' : 1244117,
+        'Uint16x8' : 1244117,
+        'Uint32x4' : 1240796,
+        'Float64x2': 1124205,
+      }
+      simd = m.group(1)
+      if simd in bugs:
+        print >> sys.stderr, ("\nWARNING: ignoring asm.js type error from {} due to implementation not yet available in SpiderMonkey." +
+            " See https://bugzilla.mozilla.org/show_bug.cgi?id={}\n").format(simd, bugs[simd])
+        err = err.replace(m.group(0), '')
+
     if 'uccessfully compiled asm.js code' in err and 'asm.js link error' not in err:
       print >> sys.stderr, "[was asm.js'ified]"
     elif 'asm.js' in err: # if no asm.js error, then not an odin build
@@ -360,6 +368,22 @@ def get_func(self, src, name):
       t += 1
       assert t < len(src)
 
+  def count_funcs(self, javascript_file):
+    num_funcs = 0
+    start_tok = "// EMSCRIPTEN_START_FUNCS"
+    end_tok = "// EMSCRIPTEN_END_FUNCS"
+    start_off = 0
+    end_off = 0
+
+    with open (javascript_file, 'rt') as fin:
+      blob = "".join(fin.readlines())
+      start_off = blob.find(start_tok) + len(start_tok)
+      end_off = blob.find(end_tok)
+      asm_chunk = blob[start_off:end_off]
+      num_funcs = asm_chunk.count('function ')
+
+    return num_funcs
+
   def run_generated_code(self, engine, filename, args=[], check_timeout=True, output_nicerizer=None, assert_returncode=0):
     stdout = os.path.join(self.get_dir(), 'stdout') # use files, as PIPE can get too full and hang us
     stderr = os.path.join(self.get_dir(), 'stderr')
@@ -792,16 +816,15 @@ def reftest(self, expected):
         img.src = '%s';
       };
       Module['postRun'] = doReftest;
-      Module['preRun'].push(function() {
-        setTimeout(doReftest, 5000); // if run() throws an exception and postRun is not called, this will kick in
-      });
 
       if (typeof WebGLClient !== 'undefined') {
         // trigger reftest from RAF as well, needed for workers where there is no pre|postRun on the main thread
         var realRAF = window.requestAnimationFrame;
         window.requestAnimationFrame = function(func) {
-          realRAF(func);
-          setTimeout(doReftest, 5000);
+          realRAF(function() {
+            func();
+            realRAF(doReftest);
+          });
         };
 
         // trigger reftest from canvas render too, for workers not doing GL
@@ -809,7 +832,7 @@ def reftest(self, expected):
         worker.onmessage = function(event) {
           realWOM(event);
           if (event.data.target === 'canvas' && event.data.op === 'render') {
-            setTimeout(doReftest, 5000);
+            realRAF(doReftest);
           }
         };
       }
diff --git a/tests/sdl_wm_togglefullscreen.c b/tests/sdl_wm_togglefullscreen.c
index c76ced76dc3f0..3758019a1be7a 100644
--- a/tests/sdl_wm_togglefullscreen.c
+++ b/tests/sdl_wm_togglefullscreen.c
@@ -12,6 +12,8 @@ int inFullscreen = 0;
 
 int wasFullscreen = 0;
 
+int finished = 0;
+
 void render() {
   int width, height, isfs;
   emscripten_get_canvas_size(&width, &height, &isfs);
@@ -21,6 +23,9 @@ void render() {
 
 void mainloop() {
   render();
+
+  if (finished) return;
+
   SDL_Event event;
   int isInFullscreen = EM_ASM_INT_V(return !!(document.fullscreenElement || document.mozFullScreenElement || document.webkitFullscreenElement || document.msFullscreenElement));
   if (isInFullscreen && !wasFullscreen) {
@@ -35,7 +40,7 @@ void mainloop() {
     REPORT_RESULT();
 #endif
     wasFullscreen = isInFullscreen;
-    emscripten_cancel_main_loop();
+    finished = 1;
     return;
   }
 
@@ -55,7 +60,7 @@ void mainloop() {
 #ifdef REPORT_RESULT
           REPORT_RESULT();
 #endif
-          emscripten_cancel_main_loop();
+          finished = 1;
           return;
         } else {
           printf("Entering fullscreen...\n");
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index c405d706550fa..38d1e55fb0732 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -111,7 +111,7 @@ def process(filename):
                     '-O3', '-s', 'DOUBLE_MODE=0', '-s', 'PRECISE_I64_MATH=0',
                     '--memory-init-file', '0', '--js-transform', 'python hardcode.py',
                     '-s', 'TOTAL_MEMORY=128*1024*1024',
-                    '-s', 'NO_EXIT_RUNTIME=1', '-s', 'EXPORTED_RUNTIME_METHODS=[]',
+                    '-s', 'NO_EXIT_RUNTIME=1',
                     #'--profiling',
                     #'--closure', '1',
                     '-o', final] + shared_args + emcc_args + self.extra_args, stdout=PIPE, stderr=PIPE, env=self.env).communicate()
diff --git a/tests/test_browser.py b/tests/test_browser.py
index 044fe57c95e0f..c0d509291a5bb 100644
--- a/tests/test_browser.py
+++ b/tests/test_browser.py
@@ -108,7 +108,7 @@ def test_emscripten_log(self):
     src = os.path.join(self.get_dir(), 'src.cpp')
     open(src, 'w').write(self.with_report_result(open(path_from_root('tests', 'emscripten_log', 'emscripten_log.cpp')).read()))
 
-    Popen([PYTHON, EMCC, src, '--pre-js', path_from_root('src', 'emscripten-source-map.min.js'), '-g', '-o', 'page.html']).communicate()
+    Popen([PYTHON, EMCC, src, '--pre-js', path_from_root('src', 'emscripten-source-map.min.js'), '-g', '-o', 'page.html', '-s', 'DEMANGLE_SUPPORT=1']).communicate()
     self.run_browser('page.html', None, '/report_result?1')
   
   def build_native_lzma(self):
@@ -570,18 +570,22 @@ def test_glgears_proxy(self):
 
     original = open('test.js').read()
 
-    def copy(to, js_mod):
-      open(to + '.html', 'w').write(open('test.html').read().replace('test.js', to + '.js'))
+    def copy(to, js_mod, html_mod = lambda x: x):
+      open(to + '.html', 'w').write(html_mod(open('test.html').read().replace('test.js', to + '.js')))
       open(to + '.js', 'w').write(js_mod(open('test.js').read()))
 
     # run with noProxy, but make main thread fail
-    copy('two', lambda original: original.replace('function _main($argc,$argv) {', 'function _main($argc,$argv) { if (ENVIRONMENT_IS_WEB) { var xhr = new XMLHttpRequest(); xhr.open("GET", "http://localhost:8888/report_result?999");xhr.send(); }'))
+    copy('two', lambda original: original.replace('function _main($argc,$argv) {', 'function _main($argc,$argv) { if (ENVIRONMENT_IS_WEB) { var xhr = new XMLHttpRequest(); xhr.open("GET", "http://localhost:8888/report_result?999");xhr.send(); }'),
+                lambda original: original.replace('function doReftest() {', 'function doReftest() { return; ')) # don't reftest on main thread, it would race
     self.run_browser('two.html?noProxy', None, ['/report_result?999'])
+    copy('two', lambda original: original.replace('function _main($argc,$argv) {', 'function _main($argc,$argv) { if (ENVIRONMENT_IS_WEB) { var xhr = new XMLHttpRequest(); xhr.open("GET", "http://localhost:8888/report_result?999");xhr.send(); }'))
     self.run_browser('two.html', None, ['/report_result?0']) # this is still cool
 
     # run without noProxy, so proxy, but make worker fail
-    copy('three', lambda original: original.replace('function _main($argc,$argv) {', 'function _main($argc,$argv) { if (ENVIRONMENT_IS_WORKER) { var xhr = new XMLHttpRequest(); xhr.open("GET", "http://localhost:8888/report_result?999");xhr.send(); }'))
+    copy('three', lambda original: original.replace('function _main($argc,$argv) {', 'function _main($argc,$argv) { if (ENVIRONMENT_IS_WORKER) { var xhr = new XMLHttpRequest(); xhr.open("GET", "http://localhost:8888/report_result?999");xhr.send(); }'),
+                lambda original: original.replace('function doReftest() {', 'function doReftest() { return; ')) # don't reftest on main thread, it would race
     self.run_browser('three.html', None, ['/report_result?999'])
+    copy('three', lambda original: original.replace('function _main($argc,$argv) {', 'function _main($argc,$argv) { if (ENVIRONMENT_IS_WORKER) { var xhr = new XMLHttpRequest(); xhr.open("GET", "http://localhost:8888/report_result?999");xhr.send(); }'))
     self.run_browser('three.html?noProxy', None, ['/report_result?0']) # this is still cool
 
   def test_glgears_proxy_jstarget(self):
@@ -1010,7 +1014,7 @@ def test_file_db(self):
     open('moar.txt', 'w').write(secret)
     self.btest('file_db.cpp', '1', args=['--preload-file', 'moar.txt', '-DFIRST'])
     shutil.copyfile('test.html', 'first.html')
-    self.btest('file_db.cpp', secret)
+    self.btest('file_db.cpp', secret, args=['-s', 'FORCE_FILESYSTEM=1'])
     shutil.copyfile('test.html', 'second.html')
     open('moar.txt', 'w').write('aliantha')
     self.btest('file_db.cpp', secret, args=['--preload-file', 'moar.txt']) # even with a file there, we load over it
@@ -1349,7 +1353,7 @@ def test_chunked_synchronous_xhr(self):
       time.sleep(2)
 
   def test_glgears(self):
-    self.btest('hello_world_gles.c', reference='gears.png', reference_slack=2,
+    self.btest('hello_world_gles.c', reference='gears.png', reference_slack=3,
         args=['-DHAVE_BUILTIN_SINCOS'], outfile='something.html',
         message='You should see animating gears.')
 
@@ -2158,8 +2162,8 @@ def test_locate_file(self):
       </body>
     ''')
 
-    def in_html(expected):
-      Popen([PYTHON, EMCC, 'src.cpp', '-O2', '-g', '--shell-file', 'shell.html', '--pre-js', 'data.js', '-o', 'page.html']).communicate()
+    def in_html(expected, args=[]):
+      Popen([PYTHON, EMCC, 'src.cpp', '-O2', '-g', '--shell-file', 'shell.html', '--pre-js', 'data.js', '-o', 'page.html'] + args).communicate()
       self.run_browser('page.html', None, '/report_result?' + expected)
 
     in_html('1')
@@ -2179,7 +2183,7 @@ def in_html(expected):
 }
     '''))
 
-    in_html('200')
+    in_html('200', ['-s', 'FORCE_FILESYSTEM=1'])
 
   def test_glfw3(self):
     self.btest(path_from_root('tests', 'glfw3.c'), args=['-s', 'LEGACY_GL_EMULATION=1', '-s', 'USE_GLFW=3'], expected='1')
@@ -2743,6 +2747,14 @@ def test_memory_growth_during_startup(self):
 
   # pthreads tests
 
+  def prep_no_SAB(self):
+    open('html.html', 'w').write(open(path_from_root('src', 'shell_minimal.html')).read().replace('''<body>''', '''<body>
+      <script>
+        SharedArrayBuffer = undefined;
+        Atomics = undefined;
+      </script>
+    '''))
+
   # Test that the emscripten_ atomics api functions work.
   def test_pthread_atomics(self):
     self.btest(path_from_root('tests', 'pthread', 'test_pthread_atomics.cpp'), expected='0', args=['-O3', '-s', 'USE_PTHREADS=2', '--separate-asm', '-s', 'PTHREAD_POOL_SIZE=8'], timeout=120) # extra time on first test, to be sure to build all libraries
@@ -2795,6 +2807,10 @@ def test_pthread_create(self):
         print str(opt) + ' ' + str(pthreads)
         self.btest(path_from_root('tests', 'pthread', 'test_pthread_create.cpp'), expected='0', args=opt + pthreads + ['-s', 'PTHREAD_POOL_SIZE=8'], timeout=30)
 
+        if 'USE_PTHREADS=2' in pthreads:
+          self.prep_no_SAB()
+          self.btest(path_from_root('tests', 'pthread', 'test_pthread_create.cpp'), expected='0', args=opt + pthreads + ['-s', 'PTHREAD_POOL_SIZE=8', '--shell-file', 'html.html'], timeout=30)
+
   # Test that a pthread can spawn another pthread of its own.
   def test_pthread_create_pthread(self):
     self.btest(path_from_root('tests', 'pthread', 'test_pthread_create_pthread.cpp'), expected='1', args=['-O3', '-s', 'USE_PTHREADS=2', '--separate-asm', '-s', 'PTHREAD_POOL_SIZE=2', '-s', 'NO_EXIT_RUNTIME=1'], timeout=30)
@@ -2870,10 +2886,16 @@ def test_pthread_iostream(self):
   def test_pthread_setspecific_mainthread(self):
     self.btest(path_from_root('tests', 'pthread', 'test_pthread_setspecific_mainthread.cpp'), expected='0', args=['-O3', '-s', 'USE_PTHREADS=2', '--separate-asm'], timeout=30)
 
+    self.prep_no_SAB()
+    self.btest(path_from_root('tests', 'pthread', 'test_pthread_setspecific_mainthread.cpp'), expected='0', args=['-O3', '-s', 'USE_PTHREADS=2', '--separate-asm', '--shell-file', 'html.html'], timeout=30)
+
   # Test the -s PTHREAD_HINT_NUM_CORES=x command line variable.
   def test_pthread_num_logical_cores(self):
     self.btest(path_from_root('tests', 'pthread', 'test_pthread_num_logical_cores.cpp'), expected='0', args=['-O3', '-s', 'USE_PTHREADS=2', '--separate-asm', '-s', 'PTHREAD_HINT_NUM_CORES=2'], timeout=30)
 
+    self.prep_no_SAB()
+    self.btest(path_from_root('tests', 'pthread', 'test_pthread_num_logical_cores.cpp'), expected='0', args=['-O3', '-g', '-s', 'USE_PTHREADS=2', '--separate-asm', '-s', 'PTHREAD_HINT_NUM_CORES=2', '--shell-file', 'html.html'], timeout=30)
+
   # Test that pthreads have access to filesystem.
   def test_pthread_file_io(self):
     self.btest(path_from_root('tests', 'pthread', 'test_pthread_file_io.cpp'), expected='0', args=['-O3', '-s', 'USE_PTHREADS=2', '--separate-asm', '-s', 'PTHREAD_POOL_SIZE=1'], timeout=30)
@@ -2957,67 +2979,6 @@ def test_meminit_big(self):
     self.btest(d, expected='0', args=args + ["--closure", "0", "-g"])
     self.btest(d, expected='0', args=args + ["--closure", "1"])
 
-  def test_wasm_polyfill_prototype(self):
-    self.clear()
-    open('main.cpp', 'w').write(self.with_report_result(r'''
-      #include <iostream>
-      int main() {
-        std::cout << "Hello!\n";
-        int result = 7;
-        REPORT_RESULT();
-        return 0;
-      }
-    '''))
-    def separate():
-      print '*** verify that running the wasmator after emcc works'
-      Popen([PYTHON, EMCC, 'main.cpp', '-O2', '-o', 'test.o.html']).communicate()
-      subprocess.check_call([PYTHON, path_from_root('third_party', 'wasm-polyfill', 'wasmator.py'), 'test.o.js', 'test.o.wasm', 'Module'])
-    def together():
-      print '*** verify that running the wasmator using  emcc -s WASM=1  works'
-      Popen([PYTHON, EMCC, 'main.cpp', '-O2', '-o', 'test.o.html', '-s', 'WASM=1']).communicate()
-    def together_worker():
-      print '*** verify that running the wasmator using  emcc -s WASM=1  works, running in a worker'
-      Popen([PYTHON, EMCC, 'main.cpp', '-O2', '-o', 'test.o.html', '-s', 'WASM=1', '--proxy-to-worker']).communicate()
-    for build, check_error in [
-      (separate,        True),
-      (together,        True),
-      (together_worker, False) # onerror does not work in workers
-    ]:
-      build()
-      src = open('test.o.js').read()
-      open('test.o.js', 'w').write('''
-        onerror = function() {
-          Module.print('fail!');
-          var xhr = new XMLHttpRequest();
-          xhr.open('GET', 'http://localhost:8888/report_result?99');
-          xhr.onload = function() {
-            console.log('close!');
-            window.close();
-          };
-          setTimeout(xhr.onload, 2000); 
-          xhr.send();
-        };
-
-      ''' + src)
-      print 'browser'
-      self.run_browser('test.o.html', None, '/report_result?7')
-      print 'shell'
-      self.do_run('', 'Hello!', no_build=True, basename='test') # test in the shell too
-      assert os.path.exists('test.o.wasm')
-      os.unlink('test.o.wasm')
-      if check_error:
-        print 'error verify'
-        self.run_browser('test.o.html', None, '/report_result?99') # without the wasm, we failz
-        print 'shell'
-        ok = False
-        try:
-          self.do_run('', 'Hello!', no_build=True, basename='test') # test in the shell too
-          ok = True
-        except:
-          pass
-        assert not ok
-      os.unlink('test.o.js')
-
   def test_canvas_style_proxy(self):
     self.btest('canvas_style_proxy.c', expected='1', args=['--proxy-to-worker', '--shell-file', path_from_root('tests/canvas_style_proxy_shell.html'), '--pre-js', path_from_root('tests/canvas_style_proxy_pre.js')])
 
diff --git a/tests/test_core.py b/tests/test_core.py
index 0cf3327dc275f..67c0ad0463636 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -5840,6 +5840,49 @@ def test_sse2_full(self):
       self.emcc_args = orig_args + mode + ['-I' + path_from_root('tests'), '-msse2'] + args
       self.do_run(open(path_from_root('tests', 'test_sse2_full.cpp'), 'r').read(), native_result)
 
+  # Tests the full SSE3 API.
+  @SIMD
+  def test_sse3_full(self):
+    args = []
+    if '-O0' in self.emcc_args: args += ['-D_DEBUG=1']
+    Popen([CLANG, path_from_root('tests', 'test_sse3_full.cpp'), '-o', 'test_sse3_full', '-D_CRT_SECURE_NO_WARNINGS=1', '-msse3'] + args + get_clang_native_args(), env=get_clang_native_env(), stdout=PIPE).communicate()
+    native_result, err = Popen('./test_sse3_full', stdout=PIPE).communicate()
+    native_result = native_result.replace('\r\n', '\n') # Windows line endings fix
+
+    Settings.PRECISE_F32 = 1 # SIMD currently requires Math.fround
+    orig_args = self.emcc_args
+    for mode in [[], ['-s', 'SIMD=1']]:
+      self.emcc_args = orig_args + mode + ['-I' + path_from_root('tests'), '-msse3'] + args
+      self.do_run(open(path_from_root('tests', 'test_sse3_full.cpp'), 'r').read(), native_result)
+
+  @SIMD
+  def test_ssse3_full(self):
+    args = []
+    if '-O0' in self.emcc_args: args += ['-D_DEBUG=1']
+    Popen([CLANG, path_from_root('tests', 'test_ssse3_full.cpp'), '-o', 'test_ssse3_full', '-D_CRT_SECURE_NO_WARNINGS=1', '-mssse3'] + args + get_clang_native_args(), env=get_clang_native_env(), stdout=PIPE).communicate()
+    native_result, err = Popen('./test_ssse3_full', stdout=PIPE).communicate()
+    native_result = native_result.replace('\r\n', '\n') # Windows line endings fix
+
+    Settings.PRECISE_F32 = 1 # SIMD currently requires Math.fround
+    orig_args = self.emcc_args
+    for mode in [[], ['-s', 'SIMD=1']]:
+      self.emcc_args = orig_args + mode + ['-I' + path_from_root('tests'), '-mssse3'] + args
+      self.do_run(open(path_from_root('tests', 'test_ssse3_full.cpp'), 'r').read(), native_result)
+
+  @SIMD
+  def test_sse4_1_full(self):
+    args = []
+    if '-O0' in self.emcc_args: args += ['-D_DEBUG=1']
+    Popen([CLANG, path_from_root('tests', 'test_sse4_1_full.cpp'), '-o', 'test_sse4_1_full', '-D_CRT_SECURE_NO_WARNINGS=1', '-msse4.1'] + args + get_clang_native_args(), env=get_clang_native_env(), stdout=PIPE).communicate()
+    native_result, err = Popen('./test_sse4_1_full', stdout=PIPE).communicate()
+    native_result = native_result.replace('\r\n', '\n') # Windows line endings fix
+
+    Settings.PRECISE_F32 = 1 # SIMD currently requires Math.fround
+    orig_args = self.emcc_args
+    for mode in [[], ['-s', 'SIMD=1']]:
+      self.emcc_args = orig_args + mode + ['-I' + path_from_root('tests'), '-msse4.1'] + args
+      self.do_run(open(path_from_root('tests', 'test_sse4_1_full.cpp'), 'r').read(), native_result)
+
   @SIMD
   def test_simd(self):
     test_path = path_from_root('tests', 'core', 'test_simd')
@@ -6164,21 +6207,23 @@ def test():
 
   def test_poppler(self):
     if WINDOWS: return self.skip('test_poppler depends on freetype, which uses a ./configure script to build and therefore currently only runs on Linux and OS X.')
-    Settings.NO_EXIT_RUNTIME = 1
 
-    Building.COMPILER_TEST_OPTS += [
-      '-I' + path_from_root('tests', 'freetype', 'include'),
-      '-I' + path_from_root('tests', 'poppler', 'include')
-    ]
+    def test():
+      Settings.NO_EXIT_RUNTIME = 1
 
-    Settings.INVOKE_RUN = 0 # We append code that does run() ourselves
+      Building.COMPILER_TEST_OPTS += [
+        '-I' + path_from_root('tests', 'freetype', 'include'),
+        '-I' + path_from_root('tests', 'poppler', 'include')
+      ]
 
-    # See post(), below
-    input_file = open(os.path.join(self.get_dir(), 'paper.pdf.js'), 'w')
-    input_file.write(str(map(ord, open(path_from_root('tests', 'poppler', 'paper.pdf'), 'rb').read())))
-    input_file.close()
+      Settings.INVOKE_RUN = 0 # We append code that does run() ourselves
 
-    post = '''
+      # See post(), below
+      input_file = open(os.path.join(self.get_dir(), 'paper.pdf.js'), 'w')
+      input_file.write(str(map(ord, open(path_from_root('tests', 'poppler', 'paper.pdf'), 'rb').read())))
+      input_file.close()
+
+      post = '''
 def process(filename):
   # To avoid loading this large file to memory and altering it, we simply append to the end
   src = open(filename, 'a')
@@ -6191,28 +6236,44 @@ def process(filename):
   )
   src.close()
 '''
+ 
+ #fontconfig = self.get_library('fontconfig', [os.path.join('src', '.libs', 'libfontconfig.a')]) # Used in file, but not needed, mostly
 
-    #fontconfig = self.get_library('fontconfig', [os.path.join('src', '.libs', 'libfontconfig.a')]) # Used in file, but not needed, mostly
+      freetype = self.get_freetype()
 
-    freetype = self.get_freetype()
+      poppler = self.get_library('poppler',
+                                 [os.path.join('utils', 'pdftoppm.o'),
+                                  os.path.join('utils', 'parseargs.o'),
+                                  os.path.join('poppler', '.libs', 'libpoppler.a')],
+                                 env_init={ 'FONTCONFIG_CFLAGS': ' ', 'FONTCONFIG_LIBS': ' ' },
+                                 configure_args=['--disable-libjpeg', '--disable-libpng', '--disable-poppler-qt', '--disable-poppler-qt4', '--disable-cms', '--disable-cairo-output', '--disable-abiword-output', '--enable-shared=no'])
 
-    poppler = self.get_library('poppler',
-                               [os.path.join('utils', 'pdftoppm.o'),
-                                os.path.join('utils', 'parseargs.o'),
-                                os.path.join('poppler', '.libs', 'libpoppler.a')],
-                               env_init={ 'FONTCONFIG_CFLAGS': ' ', 'FONTCONFIG_LIBS': ' ' },
-                               configure_args=['--disable-libjpeg', '--disable-libpng', '--disable-poppler-qt', '--disable-poppler-qt4', '--disable-cms', '--disable-cairo-output', '--disable-abiword-output', '--enable-shared=no'])
+      # Combine libraries
 
-    # Combine libraries
+      combined = os.path.join(self.get_dir(), 'poppler-combined.bc')
+      Building.link(poppler + freetype, combined)
 
-    combined = os.path.join(self.get_dir(), 'poppler-combined.bc')
-    Building.link(poppler + freetype, combined)
+      self.do_ll_run(combined,
+                     map(ord, open(path_from_root('tests', 'poppler', 'ref.ppm'), 'r').read()).__str__().replace(' ', ''),
+                     args='-scale-to 512 paper.pdf filename'.split(' '),
+                     post_build=post)
+                     #, build_ll_hook=self.do_autodebug)
 
-    self.do_ll_run(combined,
-                   map(ord, open(path_from_root('tests', 'poppler', 'ref.ppm'), 'r').read()).__str__().replace(' ', ''),
-                   args='-scale-to 512 paper.pdf filename'.split(' '),
-                   post_build=post)
-                   #, build_ll_hook=self.do_autodebug)
+    test()
+    num_original_funcs = self.count_funcs('src.cpp.o.js')
+
+    # Run with duplicate function elimination turned on
+    dfe_supported_opt_levels = ['-O2', '-O3', '-Oz', '-Os']
+
+    for opt_level in dfe_supported_opt_levels:
+      if opt_level in self.emcc_args:
+        print >> sys.stderr, "Testing poppler with ELIMINATE_DUPLICATE_FUNCTIONS set to 1"
+        Settings.ELIMINATE_DUPLICATE_FUNCTIONS = 1
+        test()
+
+        # Make sure that DFE ends up eliminating more than 200 functions
+        assert (num_original_funcs - self.count_funcs('src.cpp.o.js')) > 200
+        break
 
   def test_openjpeg(self):
     Building.COMPILER_TEST_OPTS = filter(lambda x: x != '-g', Building.COMPILER_TEST_OPTS) # remove -g, so we have one test without it by default
@@ -7534,7 +7595,12 @@ def test_exit_status(self):
       int main() {
         atexit(cleanup); // this atexit should still be called
         printf("hello, world!\n");
-        exit(118); // Unusual exit status to make sure it's working!
+        // Unusual exit status to make sure it's working!
+        if (CAPITAL_EXIT) {
+          _Exit(118);
+        } else {
+          exit(118);
+        }
       }
     '''
     open('post.js', 'w').write('''
@@ -7544,7 +7610,8 @@ def test_exit_status(self):
       Module.callMain();
     ''')
     self.emcc_args += ['-s', 'INVOKE_RUN=0', '--post-js', 'post.js']
-    self.do_run(src, 'hello, world!\ncleanup\nI see exit status: 118')
+    self.do_run(src.replace('CAPITAL_EXIT', '0'), 'hello, world!\ncleanup\nI see exit status: 118')
+    self.do_run(src.replace('CAPITAL_EXIT', '1'), 'hello, world!\ncleanup\nI see exit status: 118')
 
   def test_noexitruntime(self):
     src = r'''
@@ -7895,7 +7962,6 @@ def setUp(self):
 asm2g = make_run("asm2g", compiler=CLANG, emcc_args=["-O2", "-g", "-s", "ASSERTIONS=1", "-s", "SAFE_HEAP=1"])
 asm2i = make_run("asm2i", compiler=CLANG, emcc_args=["-O2", '-s', 'EMTERPRETIFY=1'])
 #asm2m = make_run("asm2m", compiler=CLANG, emcc_args=["-O2", "--memory-init-file", "0", "-s", "MEM_INIT_METHOD=2", "-s", "ASSERTIONS=1"])
-#asm2w = make_run("asm2w", compiler=CLANG, emcc_args=["-O2", "-s", "WASM=1"])
 #binaryen = make_run("binaryen", compiler=CLANG, emcc_args=["-s", "BINARYEN='..path..'"])
 #normalyen = make_run("normalyen", compiler=CLANG, emcc_args=['-s', 'GLOBAL_BASE=1024']) # useful comparison to binaryen
 
diff --git a/tests/test_html5_mouse.c b/tests/test_html5_mouse.c
index 462cb9acda4de..01f54de3ef113 100644
--- a/tests/test_html5_mouse.c
+++ b/tests/test_html5_mouse.c
@@ -69,12 +69,9 @@ EM_BOOL mouse_callback(int eventType, const EmscriptenMouseEvent *e, void *userD
 
   if (e->screenX != 0 && e->screenY != 0 && e->clientX != 0 && e->clientY != 0 && e->canvasX != 0 && e->canvasY != 0 && e->targetX != 0 && e->targetY != 0)
   {
-    if (e->buttons != 0)
-    {
-      if (eventType == EMSCRIPTEN_EVENT_CLICK) gotClick = 1;
-      if (eventType == EMSCRIPTEN_EVENT_MOUSEDOWN) gotMouseDown = 1;
-      if (eventType == EMSCRIPTEN_EVENT_DBLCLICK) gotDblClick = 1;
-    }
+    if (eventType == EMSCRIPTEN_EVENT_CLICK) gotClick = 1;
+    if (eventType == EMSCRIPTEN_EVENT_MOUSEDOWN && e->buttons != 0) gotMouseDown = 1;
+    if (eventType == EMSCRIPTEN_EVENT_DBLCLICK) gotDblClick = 1;
     if (eventType == EMSCRIPTEN_EVENT_MOUSEUP) gotMouseUp = 1;
     if (eventType == EMSCRIPTEN_EVENT_MOUSEMOVE && (e->movementX != 0 || e->movementY != 0)) gotMouseMove = 1;
   }
@@ -107,6 +104,10 @@ EM_BOOL wheel_callback(int eventType, const EmscriptenWheelEvent *e, void *userD
 
 int main()
 {
+  // Make the canvas area stand out from the background.
+  emscripten_set_canvas_size(400, 300);
+  EM_ASM(Module['canvas'].style.backgroundColor = 'black';);
+
   EMSCRIPTEN_RESULT ret = emscripten_set_click_callback(0, 0, 1, mouse_callback);
   TEST_RESULT(emscripten_set_click_callback);
   ret = emscripten_set_mousedown_callback(0, 0, 1, mouse_callback);
@@ -142,10 +143,10 @@ int main()
       for(var d in data) event[d] = data[d];
       window.dispatchEvent(event);
     }
-    sendEvent('click', { screenX: -500000, screenY: -500000, clientX: -500000, clientY: -500000, button: 0, buttons: 1 }); // Send a dummy event that should not be received.
+    sendEvent('click', { screenX: -500000, screenY: -500000, clientX: -500000, clientY: -500000, button: 0, buttons: 0 }); // Send a dummy event that should not be received.
     sendEvent('mousedown', { screenX: 1, screenY: 1, clientX: 1, clientY: 1, button: 0, buttons: 1 });
     sendEvent('mouseup', { screenX: 1, screenY: 1, clientX: 1, clientY: 1, button: 0, buttons: 0 });
-    sendEvent('dblclick', { screenX: 1, screenY: 1, clientX: 1, clientY: 1, button: 0, buttons: 1 });
+    sendEvent('dblclick', { screenX: 1, screenY: 1, clientX: 1, clientY: 1, button: 0, buttons: 0 });
     sendEvent('mousemove', { screenX: 1, screenY: 1, clientX: 1, clientY: 1, button: 0, buttons: 0, 'movementX': 1, 'movementY': 1 });
     sendEvent('wheel', { screenX: 1, screenY: 1, clientX: 1, clientY: 1, button: 0, buttons: 0, 'deltaX': 1, 'deltaY': 1, 'deltaZ': 1, 'deltaMode': 1 });
     sendEvent('mousewheel', { screenX: 1, screenY: 1, clientX: 1, clientY: 1, button: 0, buttons: 0, 'wheelDeltaX': 1, 'wheelDeltaY': 1 });
diff --git a/tests/test_other.py b/tests/test_other.py
index 63e31595b47d3..c75d9c47ca7ea 100644
--- a/tests/test_other.py
+++ b/tests/test_other.py
@@ -1,3 +1,5 @@
+# coding=utf-8
+
 import multiprocessing, os, pipes, re, shutil, subprocess, sys
 import glob
 import tools.shared
@@ -2161,6 +2163,24 @@ def clean(txt):
     except ValueError:
       assert False
 
+  def test_file_packager_unicode(self):
+    unicode_name = 'unicode…☃'
+    if not os.path.exists(unicode_name):
+      try:
+        os.mkdir(unicode_name)
+      except:
+        print "we failed to even create a unicode dir, so on this OS, we can't test this"
+        return
+    full = os.path.join(unicode_name, 'data.txt')
+    open(full, 'w').write('data')
+    proc = Popen([PYTHON, FILE_PACKAGER, 'test.data', '--preload', full], stdout=PIPE, stderr=PIPE)
+    out, err = proc.communicate()
+    assert proc.returncode == 0, err
+    assert len(out) > 0, err
+    assert len(err) == 0, err
+    assert unicode_name in out, out
+    print len(err)
+
   def test_crunch(self):
     try:
       print 'Crunch is located at ' + CRUNCH
@@ -2520,36 +2540,47 @@ def test_module_print(self):
     assert r'<{(123456789)}>' in output, output
 
   def test_precompiled_headers(self):
-    self.clear()
+    for suffix in ['gch', 'pch']:
+      print suffix
+      self.clear()
 
-    open('header.h', 'w').write('#define X 5\n')
-    Popen([PYTHON, EMCC, '-xc++-header', 'header.h', '-c']).communicate()
-    assert os.path.exists('header.h.gch')
+      open('header.h', 'w').write('#define X 5\n')
+      Popen([PYTHON, EMCC, '-xc++-header', 'header.h', '-c']).communicate()
+      assert os.path.exists('header.h.gch') # default output is gch
+      if suffix != 'gch':
+        Popen([PYTHON, EMCC, '-xc++-header', 'header.h', '-o', 'header.h.' + suffix]).communicate()
+        assert open('header.h.gch').read() == open('header.h.' + suffix).read()
 
-    open('src.cpp', 'w').write(r'''
+      open('src.cpp', 'w').write(r'''
 #include <stdio.h>
 int main() {
   printf("|%d|\n", X);
   return 0;
 }
 ''')
-    Popen([PYTHON, EMCC, 'src.cpp', '-include', 'header.h']).communicate()
-
-    output = run_js(self.in_dir('a.out.js'), stderr=PIPE, full_output=True, engine=NODE_JS)
-    assert '|5|' in output, output
-
-    # also verify that the gch is actually used
-    err = Popen([PYTHON, EMCC, 'src.cpp', '-include', 'header.h', '-Xclang', '-print-stats'], stderr=PIPE).communicate()
-    self.assertTextDataContained('*** PCH/Modules Loaded:\nModule: header.h.gch', err[1])
-    # and sanity check it is not mentioned when not
-    try_delete('header.h.gch')
-    err = Popen([PYTHON, EMCC, 'src.cpp', '-include', 'header.h', '-Xclang', '-print-stats'], stderr=PIPE).communicate()
-    assert '*** PCH/Modules Loaded:\nModule: header.h.gch' not in err[1].replace('\r\n', '\n'), err[1]
-
-    # with specified target via -o
-    try_delete('header.h.gch')
-    Popen([PYTHON, EMCC, '-xc++-header', 'header.h', '-o', 'my.gch']).communicate()
-    assert os.path.exists('my.gch')
+      Popen([PYTHON, EMCC, 'src.cpp', '-include', 'header.h']).communicate()
+
+      output = run_js(self.in_dir('a.out.js'), stderr=PIPE, full_output=True, engine=NODE_JS)
+      assert '|5|' in output, output
+
+      # also verify that the gch is actually used
+      err = Popen([PYTHON, EMCC, 'src.cpp', '-include', 'header.h', '-Xclang', '-print-stats'], stderr=PIPE).communicate()
+      self.assertTextDataContained('*** PCH/Modules Loaded:\nModule: header.h.' + suffix, err[1])
+      # and sanity check it is not mentioned when not
+      try_delete('header.h.' + suffix)
+      err = Popen([PYTHON, EMCC, 'src.cpp', '-include', 'header.h', '-Xclang', '-print-stats'], stderr=PIPE).communicate()
+      assert '*** PCH/Modules Loaded:\nModule: header.h.' + suffix not in err[1].replace('\r\n', '\n'), err[1]
+
+      # with specified target via -o
+      try_delete('header.h.' + suffix)
+      Popen([PYTHON, EMCC, '-xc++-header', 'header.h', '-o', 'my.' + suffix]).communicate()
+      assert os.path.exists('my.' + suffix)
+
+      # -include-pch flag
+      Popen([PYTHON, EMCC, '-xc++-header', 'header.h', '-o', 'header.h.' + suffix]).communicate()
+      check_execute([PYTHON, EMCC, 'src.cpp', '-include-pch', 'header.h.' + suffix])
+      output = run_js('a.out.js')
+      assert '|5|' in output, output
 
   def test_warn_unaligned(self):
     open('src.cpp', 'w').write(r'''
@@ -4202,7 +4233,7 @@ def test_no_filesystem(self):
     assert FS_MARKER not in open('a.out.js').read()
     print 'yes fs, no fs:', yes_size, no_size
     assert yes_size - no_size > 100000 # 100K of FS code is removed
-    assert no_size < 315000
+    assert no_size < 360000
 
   def test_no_nuthin(self):
     print 'part one: check NO_FILESYSTEM is automatically set, and effective'
@@ -4229,7 +4260,7 @@ def do(name, source, moar_opts):
         assert sizes['no_nuthin'] < 0.975*sizes['no_fs']
       assert sizes['no_fs_manual'] < sizes['no_fs'] # manual can remove a tiny bit more
       assert sizes['no_fs'] < 1.02*sizes['no_fs_manual']
-    test([], 0.75, 320000)
+    test([], 0.75, 360000)
     test(['-O1'], 0.66, 210000)
     test(['-O2'], 0.50, 70000)
     test(['-O3', '--closure', '1'], 0.60, 50000)
@@ -4254,8 +4285,8 @@ def do(name, moar_opts):
       assert sizes['no_nuthin'] < absolute
       if '--closure' in opts: # no EXPORTED_RUNTIME_METHODS makes closure much more effective
         assert sizes['no_nuthin'] < 0.975*sizes['normal']
-    test([], 1, 200000)
-    test(['-O1'], 1, 200000)
+    test([], 1, 220000)
+    test(['-O1'], 1, 215000)
     test(['-O2'], 0.99, 75000)
     test(['-O3', '--closure', '1'], 0.975, 50000)
     test(['-O3', '--closure', '2'], 0.975, 41000) # might change now and then
@@ -5320,33 +5351,6 @@ def test_emcc_dev_null(self):
     assert proc.returncode == 0
     self.assertContained('#define __EMSCRIPTEN__ 1', out) # all our defines should show up
 
-  def test_emcc_wasm_0(self):
-    default_error_message = 'cannot use WASM=1 when full asm.js validation was disabled'
-    for args, ok, error_message in [
-      ([], False, ''),
-      (['-O1'], True, ''),
-      (['-O2'], True, ''),
-      (['-O3'], True, ''),
-      (['-O2', '-g'], True, ''),
-      (['-s', 'ASM_JS=1'], True, ''),
-      (['-s', 'WASM=0'], True, ''),
-      (['-s', 'WASM=1'], False, ''),
-      (['-s', 'ALLOW_MEMORY_GROWTH=1'], False, 'memory growth is not supported with WASM=1'),
-      (['-s', 'ALLOW_MEMORY_GROWTH=1', '-O1'], False, 'memory growth is not supported with WASM=1'),
-      (['-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-s', 'EMTERPRETIFY_WHITELIST=["_main"]', '-O2', '-s', 'ASSERTIONS=1'], True, ''),
-    ]:
-      print 'emcc_wasm_0', args, ok
-      if not error_message: error_message = default_error_message
-      proc = Popen([PYTHON, EMCC, path_from_root('tests', 'hello_world.c'), '-s', 'WASM=1'] + args, stdout=PIPE, stderr=PIPE)
-      out, err = proc.communicate()
-      if ok:
-        assert proc.returncode == 0
-        self.assertNotContained(error_message, err)
-        self.assertContained('hello, world!', run_js('a.out.js'))
-      else:
-        assert proc.returncode != 0
-        self.assertContained(error_message, err)
-
   def test_umask_0(self):
     open('src.c', 'w').write(r'''
 #include <sys/stat.h>
@@ -6042,3 +6046,170 @@ def test_override_environment(self):
               assert ('environment is %s? false' % other) in seen, seen
           print '-- verified proper env is shown'
 
+  def test_warn_no_filesystem(self):
+    WARNING = 'Filesystem support (FS) was not included. The problem is that you are using files from JS, but files were not used from C/C++, so filesystem support was not auto-included. You can force-include filesystem support with  -s FORCE_FILESYSTEM=1'
+
+    check_execute([PYTHON, EMCC, path_from_root('tests', 'hello_world.c')])
+    seen = run_js('a.out.js', stderr=PIPE)
+    assert WARNING not in seen
+
+    def test(contents):
+      open('src.cpp', 'w').write(r'''
+  #include<stdio.h>
+  #include<emscripten.h>
+  int main() {
+    EM_ASM({ %s });
+    printf("hello, world!\n");
+    return 0;
+  }
+  ''' % contents)
+      check_execute([PYTHON, EMCC, 'src.cpp'])
+      self.assertContained(WARNING, run_js('a.out.js', stderr=PIPE, assert_returncode=None))
+
+    # might appear in handwritten code
+    test("FS.init()")
+    test("FS.createPreloadedFile('waka waka, just warning check')");
+    test("FS.createDataFile('waka waka, just warning check')");
+    test("FS.analyzePath('waka waka, just warning check')");
+    test("FS.loadFilesFromDB('waka waka, just warning check')");
+    # might appear in filesystem code from a separate script tag
+    test("Module['FS_createDataFile']('waka waka, just warning check')");
+    test("Module['FS_createPreloadedFile']('waka waka, just warning check')");
+
+    # text is in the source when needed, but when forcing FS, it isn't there
+    check_execute([PYTHON, EMCC, 'src.cpp'])
+    self.assertContained(WARNING, open('a.out.js').read())
+    check_execute([PYTHON, EMCC, 'src.cpp', '-s', 'FORCE_FILESYSTEM=1']) # forcing FS means no need
+    self.assertNotContained(WARNING, open('a.out.js').read())
+    check_execute([PYTHON, EMCC, 'src.cpp', '-s', 'ASSERTIONS=0']) # no assertions, no need
+    self.assertNotContained(WARNING, open('a.out.js').read())
+    check_execute([PYTHON, EMCC, 'src.cpp', '-O2']) # optimized, so no assertions
+    self.assertNotContained(WARNING, open('a.out.js').read())
+
+  ############################################################
+  # Function eliminator tests
+  ############################################################
+  def normalize_line_endings(self, input):
+    return input.replace('\r\n', '\n').replace('\n\n', '\n').replace('\n\n', '\n')
+
+  def get_file_contents(self, file):
+    file_contents = ""
+    with open(file) as fout:
+      file_contents = "".join(fout.readlines())
+
+    file_contents = self.normalize_line_endings(file_contents)
+
+    return file_contents
+
+  def function_eliminator_test_helper(self, input_file, expected_output_file, use_hash_info=False):
+    input_file = path_from_root('tests', 'optimizer', input_file)
+    expected_output_file = path_from_root('tests', 'optimizer', expected_output_file)
+    command = [path_from_root('tools', 'eliminate-duplicate-functions.js'), input_file, '--no-minimize-whitespace', '--use-asm-ast']
+
+    if use_hash_info:
+      command.append('--use-hash-info')
+
+    output, err = Popen(NODE_JS + command, stdin=PIPE, stderr=PIPE, stdout=PIPE).communicate()
+    assert err == '', err
+    expected_output = self.get_file_contents(expected_output_file)
+    output = self.normalize_line_endings(output)
+
+    self.assertIdentical(expected_output, output)
+
+  def test_function_eliminator_simple(self):
+    self.function_eliminator_test_helper('test-function-eliminator-simple.js',
+                                         'test-function-eliminator-simple-output.js')
+
+  def test_function_eliminator_replace_function_call(self):
+    self.function_eliminator_test_helper('test-function-eliminator-replace-function-call.js',
+                                        'test-function-eliminator-replace-function-call-output.js')
+
+  def test_function_eliminator_replace_function_call_two_passes(self):
+    self.function_eliminator_test_helper('test-function-eliminator-replace-function-call-output.js',
+                                         'test-function-eliminator-replace-function-call-two-passes-output.js')
+
+  def test_function_eliminator_replace_array_value(self):
+    import tools.duplicate_function_eliminator
+    output_file = 'output.js'
+
+    try:
+      tools.shared.safe_copy(path_from_root('tests', 'optimizer', 'test-function-eliminator-replace-array-value.js'), output_file)
+
+      tools.duplicate_function_eliminator.run(output_file)
+
+      output_file_contents = self.get_file_contents(output_file)
+
+      expected_file_contents = self.get_file_contents(path_from_root('tests', 'optimizer', 'test-function-eliminator-replace-array-value-output.js'))
+
+      self.assertIdentical(output_file_contents, expected_file_contents)
+    finally:
+      tools.tempfiles.try_delete(output_file)
+
+  def test_function_eliminator_replace_object_value_assignment(self):
+    self.function_eliminator_test_helper('test-function-eliminator-replace-object-value-assignment.js',
+                                         'test-function-eliminator-replace-object-value-assignment-output.js')
+
+  def test_function_eliminator_variable_clash(self):
+    self.function_eliminator_test_helper('test-function-eliminator-variable-clash.js',
+                                         'test-function-eliminator-variable-clash-output.js')
+
+  def test_function_eliminator_replace_variable_value(self):
+    self.function_eliminator_test_helper('test-function-eliminator-replace-variable-value.js',
+                                         'test-function-eliminator-replace-variable-value-output.js')
+
+  def test_function_eliminator_double_parsed_correctly(self):
+    # This is a test that makes sure that when we perform final optimization on
+    # the JS file, doubles are preserved (and not converted to ints).
+    import tools.tempfiles
+    import tools.duplicate_function_eliminator
+    import tools.js_optimizer
+
+    output_file = 'output.js'
+
+    try:
+      tools.shared.safe_copy(path_from_root('tests', 'optimizer', 'test-function-eliminator-double-parsed-correctly.js'), output_file)
+
+      # Run duplicate function elimination
+      tools.duplicate_function_eliminator.run(output_file)
+
+      # Run last opts
+      tools.shared.safe_copy(tools.js_optimizer.run(output_file, ['last', 'asm']), output_file)
+      output_file_contents = self.get_file_contents(output_file)
+
+      # Compare
+      expected_file_contents = self.get_file_contents(path_from_root('tests', 'optimizer', 'test-function-eliminator-double-parsed-correctly-output.js'))
+      self.assertIdentical(output_file_contents, expected_file_contents)
+    finally:
+      tools.tempfiles.try_delete(output_file)
+
+  # Now do the same, but using a pre-generated equivalent function hash info that
+  # comes in handy for parallel processing
+  def test_function_eliminator_simple_with_hash_info(self):
+    self.function_eliminator_test_helper('test-function-eliminator-simple-with-hash-info.js',
+                                         'test-function-eliminator-simple-output.js',
+                                         use_hash_info=True)
+
+  def test_function_eliminator_replace_function_call_with_hash_info(self):
+    self.function_eliminator_test_helper('test-function-eliminator-replace-function-call-with-hash-info.js',
+                                         'test-function-eliminator-replace-function-call-output.js',
+                                         use_hash_info=True)
+
+  def test_function_eliminator_replace_function_call_two_passes_with_hash_info(self):
+    self.function_eliminator_test_helper('test-function-eliminator-replace-function-call-output-with-hash-info.js',
+                                         'test-function-eliminator-replace-function-call-two-passes-output.js',
+                                         use_hash_info=True)
+
+  def test_function_eliminator_replace_object_value_assignment_with_hash_info(self):
+    self.function_eliminator_test_helper('test-function-eliminator-replace-object-value-assignment-with-hash-info.js',
+                                         'test-function-eliminator-replace-object-value-assignment-output.js',
+                                         use_hash_info=True)
+
+  def test_function_eliminator_variable_clash_with_hash_info(self):
+    self.function_eliminator_test_helper('test-function-eliminator-variable-clash-with-hash-info.js',
+                                         'test-function-eliminator-variable-clash-output.js',
+                                         use_hash_info=True)
+
+  def test_function_eliminator_replace_variable_value_with_hash_info(self):
+    self.function_eliminator_test_helper('test-function-eliminator-replace-variable-value-with-hash-info.js',
+                                         'test-function-eliminator-replace-variable-value-output.js',
+                                         use_hash_info=True)
diff --git a/tests/test_sse1.cpp b/tests/test_sse1.cpp
index f53f997776403..ec04509e06525 100644
--- a/tests/test_sse1.cpp
+++ b/tests/test_sse1.cpp
@@ -321,8 +321,8 @@ int main()
 	_MM_SET_ROUNDING_MODE(roundingMode);
 	unsigned int csr = _mm_getcsr();
 	_mm_setcsr(csr);
-	unsigned char dummyData[4096];
 #endif
+	unsigned char dummyData[4096];
 	_mm_prefetch(dummyData, _MM_HINT_T0);
 	_mm_prefetch(dummyData, _MM_HINT_T1);
 	_mm_prefetch(dummyData, _MM_HINT_T2);
diff --git a/tests/test_sse3_full.cpp b/tests/test_sse3_full.cpp
new file mode 100644
index 0000000000000..c0e7db2d65c37
--- /dev/null
+++ b/tests/test_sse3_full.cpp
@@ -0,0 +1,40 @@
+// This file uses SSE3 by calling different functions with different interesting inputs and prints the results.
+// Use a diff tool to compare the results between platforms.
+
+#include <pmmintrin.h>
+#define ENABLE_SSE2
+#include "test_sse_full.h"
+
+#ifndef _DEBUG
+// The following tests break when optimizer is applied, so disable them for now. Baby steps.
+// See https://github.com/kripken/emscripten/issues/3789
+#define BREAKS_UNDER_OPTIMIZATION
+#endif
+
+float *interesting_floats = get_interesting_floats();
+int numInterestingFloats = sizeof(interesting_floats_)/sizeof(interesting_floats_[0]);
+uint32_t *interesting_ints = get_interesting_ints();
+int numInterestingInts = sizeof(interesting_ints_)/sizeof(interesting_ints_[0]);
+double *interesting_doubles = get_interesting_doubles();
+int numInterestingDoubles = sizeof(interesting_doubles_)/sizeof(interesting_doubles_[0]);
+
+int main()
+{
+	assert(numInterestingFloats % 4 == 0);
+	assert(numInterestingInts % 4 == 0);
+	assert(numInterestingDoubles % 4 == 0);	
+
+	Ret_M128d_M128d(__m128d, _mm_addsub_pd);
+	Ret_M128_M128(__m128, _mm_addsub_ps);
+	Ret_M128d_M128d(__m128d, _mm_hadd_pd);
+	Ret_M128_M128(__m128, _mm_hadd_ps);
+	Ret_M128d_M128d(__m128d, _mm_hsub_pd);
+	Ret_M128_M128(__m128, _mm_hsub_ps);
+#ifndef BREAKS_UNDER_OPTIMIZATION
+	Ret_IntPtr(__m128i, _mm_lddqu_si128, __m128i*, 4, 1);
+#endif
+	Ret_DoublePtr(__m128d, _mm_loaddup_pd, 1, 1);
+	Ret_M128d(__m128d, _mm_movedup_pd);
+	Ret_M128(__m128, _mm_movehdup_ps);
+	Ret_M128(__m128, _mm_moveldup_ps);
+}
diff --git a/tests/test_sse4_1_full.cpp b/tests/test_sse4_1_full.cpp
new file mode 100644
index 0000000000000..74c85814a84c4
--- /dev/null
+++ b/tests/test_sse4_1_full.cpp
@@ -0,0 +1,104 @@
+// This file uses SSE4.1 by calling different functions with different interesting inputs and prints the results.
+// Use a diff tool to compare the results between platforms.
+
+#include <smmintrin.h>
+#define ENABLE_SSE2
+#include "test_sse_full.h"
+
+float *interesting_floats = get_interesting_floats();
+int numInterestingFloats = sizeof(interesting_floats_)/sizeof(interesting_floats_[0]);
+uint32_t *interesting_ints = get_interesting_ints();
+int numInterestingInts = sizeof(interesting_ints_)/sizeof(interesting_ints_[0]);
+double *interesting_doubles = get_interesting_doubles();
+int numInterestingDoubles = sizeof(interesting_doubles_)/sizeof(interesting_doubles_[0]);
+
+int main()
+{
+	assert(numInterestingFloats % 4 == 0);
+	assert(numInterestingInts % 4 == 0);
+	assert(numInterestingDoubles % 4 == 0);	
+
+	Ret_M128i_M128i_Tint(__m128i, _mm_blend_epi16);
+	Ret_M128d_M128d_Tint(__m128d, _mm_blend_pd);
+	Ret_M128_M128_Tint(__m128, _mm_blend_ps);
+	// _mm_blendv_epi8
+	// _mm_blendv_pd
+	// _mm_blendv_ps
+	// _mm_ceil_pd
+	// _mm_ceil_ps
+	// _mm_ceil_sd
+	// _mm_ceil_ss
+	// M128i_M128i_M128i(_mm_cmpeq_epi64);
+	// Ret_M128i(__m128i, _mm_cvtepi16_epi32);
+	// Ret_M128i(__m128i, _mm_cvtepi16_epi64);
+	// Ret_M128i(__m128i, _mm_cvtepi32_epi64);
+	// Ret_M128i(__m128i, _mm_cvtepi8_epi16);
+	// Ret_M128i(__m128i, _mm_cvtepi8_epi32);
+	// Ret_M128i(__m128i, _mm_cvtepi8_epi64);
+	// _mm_cvtepu16_epi32
+	// _mm_cvtepu16_epi64
+	// _mm_cvtepu32_epi64
+	// _mm_cvtepu8_epi16
+	// _mm_cvtepu8_epi32
+	// _mm_cvtepu8_epi64
+	// _mm_dp_pd
+	// _mm_dp_ps
+	Ret_M128i_Tint(int, _mm_extract_epi32);
+	Ret_M128i_Tint(int, _mm_extract_epi8);
+	// Ret_M128i_Tint(long long, _mm_extract_epi64);
+	Ret_M128i_Tint(int, _mm_extract_epi8);
+	Ret_M128_Tint(float, _mm_extract_ps);
+	// _mm_floor_pd
+	// _mm_floor_ps
+	// _mm_floor_sd
+	// _mm_floor_ss
+	// _mm_insert_epi32
+	// _mm_insert_epi64
+	// _mm_insert_epi8
+	// _mm_insert_ps
+	M128i_M128i_M128i(_mm_max_epi32);
+	M128i_M128i_M128i(_mm_max_epi8);
+	M128i_M128i_M128i(_mm_max_epu16);
+	M128i_M128i_M128i(_mm_max_epu32);
+	M128i_M128i_M128i(_mm_min_epi32);
+	M128i_M128i_M128i(_mm_min_epi8);
+	M128i_M128i_M128i(_mm_min_epu16);
+	M128i_M128i_M128i(_mm_min_epu32);
+	// _mm_minpos_epu16
+	// _mm_mpsadbw_epu8
+	// M128i_M128i_M128i(_mm_mul_epi32);
+	M128i_M128i_M128i(_mm_mullo_epi32);
+	// _mm_packus_epi32
+	// _mm_round_pd
+	// _mm_round_ps
+	// _mm_round_sd
+	// _mm_round_ss
+	// _mm_stream_load_si128
+	// _mm_test_all_ones
+	// _mm_test_all_zeros
+	// _mm_test_mix_ones_zeros
+	// _mm_testc_si128
+	// _mm_testnzc_si128
+	// _mm_testz_si128
+
+	// SSE 4.2:
+	// _mm_cmpestra
+	// _mm_cmpestrc
+	// _mm_cmpestri
+	// _mm_cmpestrm
+	// _mm_cmpestro
+	// _mm_cmpestrs
+	// _mm_cmpestrz
+	// M128i_M128i_M128i(_mm_cmpgt_epi64);
+	// _mm_cmpistra
+	// _mm_cmpistrc
+	// _mm_cmpistri
+	// _mm_cmpistrm
+	// _mm_cmpistro
+	// _mm_cmpistrs
+	// _mm_cmpistrz
+	// _mm_crc32_u16
+	// _mm_crc32_u32
+	// _mm_crc32_u64
+	// _mm_crc32_u8
+}
diff --git a/tests/test_sse_full.h b/tests/test_sse_full.h
index 646b2b340ab37..b6d7bcb9b516c 100644
--- a/tests/test_sse_full.h
+++ b/tests/test_sse_full.h
@@ -276,6 +276,17 @@ __m128 ExtractInRandomOrder(float *arr, int i, int n, int prime)
 				printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \
 			}
 
+#define Ret_M128_Tint_body(Ret_type, func, Tint) \
+	for(int i = 0; i < numInterestingFloats / 4; ++i) \
+		for(int k = 0; k < 4; ++k) \
+		{ \
+			__m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \
+			Ret_type ret = func(m1, Tint); \
+			char str[256]; tostr(&m1, str); \
+			char str2[256]; tostr(&ret, str2); \
+			printf("%s(%s, %d) = %s\n", #func, str, Tint, str2); \
+		}
+
 #define Ret_M128i_Tint_body(Ret_type, func, Tint) \
 	for(int i = 0; i < numInterestingInts / 4; ++i) \
 		for(int k = 0; k < 4; ++k) \
@@ -313,6 +324,20 @@ __m128 ExtractInRandomOrder(float *arr, int i, int n, int prime)
 				printf("%s(%s, %s, %d) = %s\n", #func, str, str2, Tint, str3); \
 			}
 
+#define Ret_M128i_M128i_Tint_body(Ret_type, func, Tint) \
+	for(int i = 0; i < numInterestingInts / 4; ++i) \
+		for(int k = 0; k < 4; ++k) \
+			for(int j = 0; j < numInterestingInts / 4; ++j) \
+			{ \
+				__m128i m1 = E1(interesting_ints, i*4+k, numInterestingInts); \
+				__m128i m2 = E2(interesting_ints, j*4, numInterestingInts); \
+				Ret_type ret = func(m1, m2, Tint); \
+				char str[256]; tostr(&m1, str); \
+				char str2[256]; tostr(&m2, str2); \
+				char str3[256]; tostr(&ret, str3); \
+				printf("%s(%s, %s, %d) = %s\n", #func, str, str2, Tint, str3); \
+			}
+
 #define Ret_M128_M128_Tint_body(Ret_type, func, Tint) \
 	for(int i = 0; i < numInterestingFloats / 4; ++i) \
 		for(int k = 0; k < 4; ++k) \
@@ -354,8 +379,10 @@ __m128 ExtractInRandomOrder(float *arr, int i, int n, int prime)
 	F(Ret_type, func, 255); \
 	F(Ret_type, func, 309);
 
+#define Ret_M128_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128_Tint_body, func)
 #define Ret_M128i_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128i_Tint_body, func)
 #define Ret_M128i_int_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128i_int_Tint_body, func)
+#define Ret_M128i_M128i_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128i_M128i_Tint_body, func)
 #define Ret_M128d_M128d_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128d_M128d_Tint_body, func)
 #define Ret_M128_M128_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128_M128_Tint_body, func)
 
diff --git a/tests/test_ssse3_full.cpp b/tests/test_ssse3_full.cpp
new file mode 100644
index 0000000000000..6afc1d4bc47bb
--- /dev/null
+++ b/tests/test_ssse3_full.cpp
@@ -0,0 +1,37 @@
+// This file uses SSSE3 by calling different functions with different interesting inputs and prints the results.
+// Use a diff tool to compare the results between platforms.
+
+#include <tmmintrin.h>
+#define ENABLE_SSE2
+#include "test_sse_full.h"
+
+float *interesting_floats = get_interesting_floats();
+int numInterestingFloats = sizeof(interesting_floats_)/sizeof(interesting_floats_[0]);
+uint32_t *interesting_ints = get_interesting_ints();
+int numInterestingInts = sizeof(interesting_ints_)/sizeof(interesting_ints_[0]);
+double *interesting_doubles = get_interesting_doubles();
+int numInterestingDoubles = sizeof(interesting_doubles_)/sizeof(interesting_doubles_[0]);
+
+int main()
+{
+	assert(numInterestingFloats % 4 == 0);
+	assert(numInterestingInts % 4 == 0);
+	assert(numInterestingDoubles % 4 == 0);	
+
+	Ret_M128i(__m128i, _mm_abs_epi8);
+	Ret_M128i(__m128i, _mm_abs_epi16);
+	Ret_M128i(__m128i, _mm_abs_epi32);
+	Ret_M128i_M128i_Tint(__m128i, _mm_alignr_epi8);
+	M128i_M128i_M128i(_mm_hadd_epi16);
+	M128i_M128i_M128i(_mm_hadd_epi32);
+	M128i_M128i_M128i(_mm_hadds_epi16);
+	M128i_M128i_M128i(_mm_hsub_epi16);
+	M128i_M128i_M128i(_mm_hsub_epi32);
+	M128i_M128i_M128i(_mm_hsubs_epi16);
+	M128i_M128i_M128i(_mm_maddubs_epi16);
+	M128i_M128i_M128i(_mm_mulhrs_epi16);
+	M128i_M128i_M128i(_mm_shuffle_epi8);
+	M128i_M128i_M128i(_mm_sign_epi16);
+	M128i_M128i_M128i(_mm_sign_epi32);
+	M128i_M128i_M128i(_mm_sign_epi8);
+}
diff --git a/tools/client_mods.py b/tools/client_mods.py
index 399de5e0a85bb..8e6dea5d5dd0e 100644
--- a/tools/client_mods.py
+++ b/tools/client_mods.py
@@ -23,6 +23,10 @@ def get(settings, minified):
   var m = /var ([^=]+)=global\.Math\.fround;/.exec(code);
   var minified = m[1];
   if (!minified) throw 'fail';
+
+  // The minified JS variable for Math.fround might contain the '$' sign, so this must be escaped to \$ to be used as a search pattern.
+  minified = minified.replace(/\$/g, "\\\\$$");
+
   do {
     var moar = false; // we need to re-do, as x(x( will not be fixed
     code = code.replace(new RegExp('[^a-zA-Z0-9\\\\$\\\\_]' + minified + '\\\\(', 'g'), function(s) { moar = true; return s[0] + '(' });
@@ -71,59 +75,92 @@ def get(settings, minified):
   var atomics_or = /var\s+([^=]+?)\s*=\s*global\.Atomics\.or;/.exec(code)[1];
   var atomics_xor = /var\s+([^=]+?)\s*=\s*global\.Atomics\.xor;/.exec(code)[1];
 
+  // JS variables may contain the '$' sign, so these must be escaped. However,
+  // the '$' sign needs to be escaped differently depending on whether it's on the
+  // string to search for side (espace by '\\'), or the value to replace
+  // with side (escape by '$').
+  function escapeDollarForRegexSearch(str) { return str.replace(/\$/g, "\\\\$$"); }
+  function escapeDollarForRegexValue(str) { return str.replace(/\$/g, "$$$$"); }
+
+  var wb = '([^\\\\w\\\\$])'; // word break (one character, which is backinserted)
+
+  var s_heap8 = escapeDollarForRegexSearch(heap8);
+  var s_heap16 = escapeDollarForRegexSearch(heap16);
+  var s_heap32 = escapeDollarForRegexSearch(heap32);
+  var s_heapf32 = escapeDollarForRegexSearch(heapf32);
+  var s_heapf64 = escapeDollarForRegexSearch(heapf64);
+
   // The Atomics built-ins take as first parameter the heap object, however when replacing those with
   // polyfill versions, it is not possible to pass a heap object as the first parameter. Therefore
   // route each call to Atomics to a polyfill function for each type, e.g. "Atomics_add(HEAP32, index, val)" -> "Atomics_add_32(index, val)"
-  code = code.replace(new RegExp('\\\\b' + atomics_load + '\\\\('+heap8+',', 'g'), atomics_load + "_8(");
-  code = code.replace(new RegExp('\\\\b' + atomics_load + '\\\\('+heap16+',', 'g'), atomics_load + "_16(");
-  code = code.replace(new RegExp('\\\\b' + atomics_load + '\\\\('+heap32+',', 'g'), atomics_load + "_32(");
-  code = code.replace(new RegExp('\\\\b' + atomics_load + '\\\\('+heapf32+',', 'g'), atomics_load + "_f32(");
-  code = code.replace(new RegExp('\\\\b' + atomics_load + '\\\\('+heapf64+',', 'g'), atomics_load + "_f64(");
-
-  code = code.replace(new RegExp('\\\\b' + atomics_store + '\\\\('+heap8+',', 'g'), atomics_store + "_8(");
-  code = code.replace(new RegExp('\\\\b' + atomics_store + '\\\\('+heap16+',', 'g'), atomics_store + "_16(");
-  code = code.replace(new RegExp('\\\\b' + atomics_store + '\\\\('+heap32+',', 'g'), atomics_store + "_32(");
-  code = code.replace(new RegExp('\\\\b' + atomics_store + '\\\\('+heapf32+',', 'g'), atomics_store + "_f32(");
-  code = code.replace(new RegExp('\\\\b' + atomics_store + '\\\\('+heapf64+',', 'g'), atomics_store + "_f64(");
-
-  code = code.replace(new RegExp('\\\\b' + atomics_add + '\\\\('+heap8+',', 'g'), atomics_add + "_8(");
-  code = code.replace(new RegExp('\\\\b' + atomics_add + '\\\\('+heap16+',', 'g'), atomics_add + "_16(");
-  code = code.replace(new RegExp('\\\\b' + atomics_add + '\\\\('+heap32+',', 'g'), atomics_add + "_32(");
-
-  code = code.replace(new RegExp('\\\\b' + atomics_sub + '\\\\('+heap8+',', 'g'), atomics_sub + "_8(");
-  code = code.replace(new RegExp('\\\\b' + atomics_sub + '\\\\('+heap16+',', 'g'), atomics_sub + "_16(");
-  code = code.replace(new RegExp('\\\\b' + atomics_sub + '\\\\('+heap32+',', 'g'), atomics_sub + "_32(");
-
-  code = code.replace(new RegExp('\\\\b' + atomics_and + '\\\\('+heap8+',', 'g'), atomics_and + "_8(");
-  code = code.replace(new RegExp('\\\\b' + atomics_and + '\\\\('+heap16+',', 'g'), atomics_and + "_16(");
-  code = code.replace(new RegExp('\\\\b' + atomics_and + '\\\\('+heap32+',', 'g'), atomics_and + "_32(");
-
-  code = code.replace(new RegExp('\\\\b' + atomics_or + '\\\\('+heap8+',', 'g'), atomics_or + "_8(");
-  code = code.replace(new RegExp('\\\\b' + atomics_or + '\\\\('+heap16+',', 'g'), atomics_or + "_16(");
-  code = code.replace(new RegExp('\\\\b' + atomics_or + '\\\\('+heap32+',', 'g'), atomics_or + "_32(");
-
-  code = code.replace(new RegExp('\\\\b' + atomics_xor + '\\\\('+heap8+',', 'g'), atomics_xor + "_8(");
-  code = code.replace(new RegExp('\\\\b' + atomics_xor + '\\\\('+heap16+',', 'g'), atomics_xor + "_16(");
-  code = code.replace(new RegExp('\\\\b' + atomics_xor + '\\\\('+heap32+',', 'g'), atomics_xor + "_32(");
-
-  code = code.replace(new RegExp('\\\\b' + atomics_exchange + '\\\\('+heap8+',', 'g'), atomics_exchange + "_8(");
-  code = code.replace(new RegExp('\\\\b' + atomics_exchange + '\\\\('+heap16+',', 'g'), atomics_exchange + "_16(");
-  code = code.replace(new RegExp('\\\\b' + atomics_exchange + '\\\\('+heap32+',', 'g'), atomics_exchange + "_32(");
-
-  code = code.replace(new RegExp('\\\\b' + atomics_compareExchange + '\\\\('+heap8+',', 'g'), atomics_compareExchange + "_8(");
-  code = code.replace(new RegExp('\\\\b' + atomics_compareExchange + '\\\\('+heap16+',', 'g'), atomics_compareExchange + "_16(");
-  code = code.replace(new RegExp('\\\\b' + atomics_compareExchange + '\\\\('+heap32+',', 'g'), atomics_compareExchange + "_32(");
+  var s_atomics_load = escapeDollarForRegexSearch(atomics_load);
+  var v_atomics_load = escapeDollarForRegexValue(atomics_load);
+  code = code.replace(new RegExp(wb + s_atomics_load + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_load + "_8(");
+  code = code.replace(new RegExp(wb + s_atomics_load + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_load + "_16(");
+  code = code.replace(new RegExp(wb + s_atomics_load + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_load + "_32(");
+  code = code.replace(new RegExp(wb + s_atomics_load + '\\\\('+s_heapf32+',', 'g'), '$1' + v_atomics_load + "_f32(");
+  code = code.replace(new RegExp(wb + s_atomics_load + '\\\\('+s_heapf64+',', 'g'), '$1' + v_atomics_load + "_f64(");
+
+  var s_atomics_store = escapeDollarForRegexSearch(atomics_store);
+  var v_atomics_store = escapeDollarForRegexValue(atomics_store);
+  code = code.replace(new RegExp(wb + s_atomics_store + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_store + "_8(");
+  code = code.replace(new RegExp(wb + s_atomics_store + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_store + "_16(");
+  code = code.replace(new RegExp(wb + s_atomics_store + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_store + "_32(");
+  code = code.replace(new RegExp(wb + s_atomics_store + '\\\\('+s_heapf32+',', 'g'), '$1' + v_atomics_store + "_f32(");
+  code = code.replace(new RegExp(wb + s_atomics_store + '\\\\('+s_heapf64+',', 'g'), '$1' + v_atomics_store + "_f64(");
+
+  var s_atomics_add = escapeDollarForRegexSearch(atomics_add);
+  var v_atomics_add = escapeDollarForRegexValue(atomics_add);
+  code = code.replace(new RegExp(wb + s_atomics_add + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_add + "_8(");
+  code = code.replace(new RegExp(wb + s_atomics_add + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_add + "_16(");
+  code = code.replace(new RegExp(wb + s_atomics_add + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_add + "_32(");
+
+  var s_atomics_sub = escapeDollarForRegexSearch(atomics_sub);
+  var v_atomics_sub = escapeDollarForRegexValue(atomics_sub);
+  code = code.replace(new RegExp(wb + s_atomics_sub + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_sub + "_8(");
+  code = code.replace(new RegExp(wb + s_atomics_sub + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_sub + "_16(");
+  code = code.replace(new RegExp(wb + s_atomics_sub + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_sub + "_32(");
+
+  var s_atomics_and = escapeDollarForRegexSearch(atomics_and);
+  var v_atomics_and = escapeDollarForRegexValue(atomics_and);
+  code = code.replace(new RegExp(wb + s_atomics_and + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_and + "_8(");
+  code = code.replace(new RegExp(wb + s_atomics_and + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_and + "_16(");
+  code = code.replace(new RegExp(wb + s_atomics_and + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_and + "_32(");
+
+  var s_atomics_or = escapeDollarForRegexSearch(atomics_or);
+  var v_atomics_or = escapeDollarForRegexValue(atomics_or);
+  code = code.replace(new RegExp(wb + s_atomics_or + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_or + "_8(");
+  code = code.replace(new RegExp(wb + s_atomics_or + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_or + "_16(");
+  code = code.replace(new RegExp(wb + s_atomics_or + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_or + "_32(");
+
+  var s_atomics_xor = escapeDollarForRegexSearch(atomics_xor);
+  var v_atomics_xor = escapeDollarForRegexValue(atomics_xor);
+  code = code.replace(new RegExp(wb + s_atomics_xor + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_xor + "_8(");
+  code = code.replace(new RegExp(wb + s_atomics_xor + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_xor + "_16(");
+  code = code.replace(new RegExp(wb + s_atomics_xor + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_xor + "_32(");
+
+  var s_atomics_exchange = escapeDollarForRegexSearch(atomics_exchange);
+  var v_atomics_exchange = escapeDollarForRegexValue(atomics_exchange);
+  code = code.replace(new RegExp(wb + s_atomics_exchange + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_exchange + "_8(");
+  code = code.replace(new RegExp(wb + s_atomics_exchange + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_exchange + "_16(");
+  code = code.replace(new RegExp(wb + s_atomics_exchange + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_exchange + "_32(");
+
+  var s_atomics_compareExchange = escapeDollarForRegexSearch(atomics_compareExchange);
+  var v_atomics_compareExchange = escapeDollarForRegexValue(atomics_compareExchange);
+  code = code.replace(new RegExp(wb + s_atomics_compareExchange + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_compareExchange + "_8(");
+  code = code.replace(new RegExp(wb + s_atomics_compareExchange + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_compareExchange + "_16(");
+  code = code.replace(new RegExp(wb + s_atomics_compareExchange + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_compareExchange + "_32(");
 
   // Remove the import statements of Atomics built-ins.
-  code = code.replace(new RegExp("var " + atomics_load + "\\\\s*=\\\\s*global\\.Atomics\\.load;"), "");
-  code = code.replace(new RegExp("var " + atomics_store + "\\\\s*=\\\\s*global\\.Atomics\\.store;"), "");
-  code = code.replace(new RegExp("var " + atomics_exchange + "\\\\s*=\\\\s*global\\.Atomics\\.exchange;"), "");
-  code = code.replace(new RegExp("var " + atomics_compareExchange + "\\\\s*=\\\\s*global\\.Atomics\\.compareExchange;"), "");
-  code = code.replace(new RegExp("var " + atomics_add + "\\\\s*=\\\\s*global\\.Atomics\\.add;"), "");
-  code = code.replace(new RegExp("var " + atomics_sub + "\\\\s*=\\\\s*global\\.Atomics\\.sub;"), "");
-  code = code.replace(new RegExp("var " + atomics_and + "\\\\s*=\\\\s*global\\.Atomics\\.and;"), "");
-  code = code.replace(new RegExp("var " + atomics_or + "\\\\s*=\\\\s*global\\.Atomics\\.or;"), "");
-  code = code.replace(new RegExp("var " + atomics_xor + "\\\\s*=\\\\s*global\\.Atomics\\.xor;"), "");
+  code = code.replace(new RegExp("var " + s_atomics_load + "\\\\s*=\\\\s*global\\.Atomics\\.load;"), "");
+  code = code.replace(new RegExp("var " + s_atomics_store + "\\\\s*=\\\\s*global\\.Atomics\\.store;"), "");
+  code = code.replace(new RegExp("var " + s_atomics_exchange + "\\\\s*=\\\\s*global\\.Atomics\\.exchange;"), "");
+  code = code.replace(new RegExp("var " + s_atomics_compareExchange + "\\\\s*=\\\\s*global\\.Atomics\\.compareExchange;"), "");
+  code = code.replace(new RegExp("var " + s_atomics_add + "\\\\s*=\\\\s*global\\.Atomics\\.add;"), "");
+  code = code.replace(new RegExp("var " + s_atomics_sub + "\\\\s*=\\\\s*global\\.Atomics\\.sub;"), "");
+  code = code.replace(new RegExp("var " + s_atomics_and + "\\\\s*=\\\\s*global\\.Atomics\\.and;"), "");
+  code = code.replace(new RegExp("var " + s_atomics_or + "\\\\s*=\\\\s*global\\.Atomics\\.or;"), "");
+  code = code.replace(new RegExp("var " + s_atomics_xor + "\\\\s*=\\\\s*global\\.Atomics\\.xor;"), "");
 
   // Implement polyfill versions of Atomics intrinsics inside the asm.js scope.
   code = code.replace("// EMSCRIPTEN_START_FUNCS", "// EMSCRIPTEN_START_FUNCS\\n"
diff --git a/tools/duplicate_function_eliminator.py b/tools/duplicate_function_eliminator.py
new file mode 100644
index 0000000000000..1a673136e2cad
--- /dev/null
+++ b/tools/duplicate_function_eliminator.py
@@ -0,0 +1,378 @@
+
+import os, sys, subprocess, multiprocessing, re, string, json, shutil, logging, traceback
+import shared
+from js_optimizer import *
+
+DUPLICATE_FUNCTION_ELIMINATOR = path_from_root('tools', 'eliminate-duplicate-functions.js')
+
+def process_shell(js, js_engine, shell, equivalentfn_hash_info=None):
+  suffix = '.eliminatedupes'
+
+  temp_file = temp_files.get(suffix + '.js').name
+  f = open(temp_file, 'w')
+  f.write(shell)
+  f.write('\n')
+
+  f.write(equivalentfn_hash_info)
+  f.close()
+
+  (output,error) = subprocess.Popen(js_engine +
+      [DUPLICATE_FUNCTION_ELIMINATOR, temp_file, '--use-hash-info', '--no-minimize-whitespace'],
+      stdout=subprocess.PIPE,stderr=subprocess.PIPE).communicate()
+  assert len(output) > 0
+  assert len(error) == 0
+
+  return output
+
+def run_on_chunk(command):
+  try:
+    file_suffix = '.js'
+    index = command.index(DUPLICATE_FUNCTION_ELIMINATOR)
+    filename = command[index + 1]
+
+    if '--gen-hash-info' in command:
+      file_suffix = '.json'
+
+    if os.environ.get('EMCC_SAVE_OPT_TEMP') and os.environ.get('EMCC_SAVE_OPT_TEMP') != '0':
+      saved = 'save_' + os.path.basename(filename)
+      while os.path.exists(saved): saved = 'input' + str(int(saved.replace('input', '').replace('.txt', ''))+1) + '.txt'
+      print >> sys.stderr, 'running DFE command', ' '.join(map(lambda c: c if c != filename else saved, command))
+      shutil.copyfile(filename, os.path.join(shared.get_emscripten_temp_dir(), saved))
+
+    if shared.EM_BUILD_VERBOSE_LEVEL >= 3: print >> sys.stderr, 'run_on_chunk: ' + str(command)
+
+    proc = subprocess.Popen(command, stdout=subprocess.PIPE)
+    output = proc.communicate()[0]
+    assert proc.returncode == 0, 'Error in optimizer (return code ' + str(proc.returncode) + '): ' + output
+    assert len(output) > 0 and not output.startswith('Assertion failed'), 'Error in optimizer: ' + output
+    filename = temp_files.get(os.path.basename(filename) + '.jo' + file_suffix).name
+
+    # Important to write out in binary mode, because the data we are writing contains Windows line endings '\r\n' because it was PIPED from console.
+    # Otherwise writing \r\n to ascii mode file will result in Windows amplifying \n to \r\n, generating bad \r\r\n line endings.
+    f = open(filename, 'wb')
+    f.write(output)
+    f.close()
+    if DEBUG and not shared.WINDOWS: print >> sys.stderr, '.' # Skip debug progress indicator on Windows, since it doesn't buffer well with multiple threads printing to console.
+    return filename
+  except KeyboardInterrupt:
+    # avoid throwing keyboard interrupts from a child process
+    raise Exception()
+  except (TypeError, ValueError) as e:
+    formatted_lines = traceback.format_exc().splitlines()
+
+    print >> sys.stderr, ">>>>>>>>>>>>>>>>>"
+    for formatted_line in formatted_lines:
+        print >> sys.stderr, formatted_line
+    print >> sys.stderr, "<<<<<<<<<<<<<<<<<"
+
+    raise
+
+def dump_equivalent_functions(passed_in_filename, global_data):
+  # Represents the sets of equivalent functions for the passed in filename
+  equivalent_fn_info = {}
+  equivalent_fn_json_file = passed_in_filename + ".equivalent_functions.json"
+
+  # If we are running more than one pass, then we want to merge
+  # all the hash infos into one
+  if os.path.isfile(equivalent_fn_json_file):
+    print >> sys.stderr, "Merging data from current pass for {} into {}".format(passed_in_filename, equivalent_fn_json_file)
+    with open(equivalent_fn_json_file) as data_file:
+      equivalent_fn_info = json.load(data_file)
+  else:
+    print >> sys.stderr, "Writing equivalent functions for {} to {}".format(passed_in_filename, equivalent_fn_json_file)
+
+  # Merge the global data's fn_hash_to_fn_name structure into
+  # the equivalent function info hash.
+  for fn_hash, fn_names in global_data['fn_hash_to_fn_name'].iteritems():
+    if fn_hash not in equivalent_fn_info:
+      # Exclude single item arrays as they are of no use to us.
+      if len(fn_names) > 1:
+        equivalent_fn_info[fn_hash] = fn_names[:]
+    else:
+      for fn_name in fn_names:
+        if fn_name not in equivalent_fn_info[fn_hash]:
+          equivalent_fn_info[fn_hash].append(fn_name)
+
+  with open(equivalent_fn_json_file, 'w') as fout:
+    fout.write(json.dumps(equivalent_fn_info))
+
+def write_equivalent_fn_hash_to_file(f, json_files, passed_in_filename):
+  # Represents the aggregated info for all the json files passed in
+  # Each json file contains info for one of the processed chunks
+  global_data = {}
+  global_data['fn_hash_to_fn_name'] = {}
+  global_data['fn_hash_to_fn_body'] = {}
+  global_data['variable_names'] = {}
+
+  for json_file in json_files:
+    with open(json_file) as data_file:
+      data = json.load(data_file)
+
+      # Merge the data's fn_hash_to_fn_name structure into
+      # the global data hash.
+      for fn_hash, fn_names in data['fn_hash_to_fn_name'].iteritems():
+        if fn_hash not in global_data['fn_hash_to_fn_name']:
+            global_data['fn_hash_to_fn_name'][fn_hash] = fn_names[:]
+            global_data['fn_hash_to_fn_body'][fn_hash] = data['fn_hash_to_fn_body'][fn_hash]
+        else:
+          assert(data['fn_hash_to_fn_body'][fn_hash] == global_data['fn_hash_to_fn_body'][fn_hash])
+
+          for fn_name in fn_names:
+            if fn_name not in global_data['fn_hash_to_fn_name'][fn_hash]:
+              global_data['fn_hash_to_fn_name'][fn_hash].append(fn_name)
+
+      # Merge the data's variable_names structure into
+      # the global data hash.
+      for variable, value in data['variable_names'].iteritems():
+        if variable not in global_data['variable_names']:
+            global_data['variable_names'][variable] = value
+
+  variable_names = global_data['variable_names']
+
+  # Lets generate the equivalent function hash from the global data set
+  equivalent_fn_hash = {}
+  for fn_hash, fn_names in global_data['fn_hash_to_fn_name'].iteritems():
+    shortest_fn = None
+    for fn_name in fn_names:
+      if (fn_name not in variable_names) and (shortest_fn is None or (len(fn_name) < len(shortest_fn))):
+        shortest_fn = fn_name
+
+    if shortest_fn is not None:
+      for fn_name in fn_names:
+        if fn_name not in variable_names and fn_name != shortest_fn:
+          equivalent_fn_hash[fn_name] = shortest_fn
+
+  # Dump the sets of equivalent functions if the user desires it
+  # This comes in handy for debugging
+  if shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS:
+    dump_equivalent_functions(passed_in_filename, global_data)
+
+  # Now write the equivalent function hash to the last line of the file
+  f.write('// ' + json.dumps(equivalent_fn_hash, separators=(',',':')))
+
+# gen_hash_info is used to determine whether we are generating
+# the global set of function implementation hashes. If set to
+# False, we assume that we have to use the global hash info to
+# reduce the set of duplicate functions
+def run_on_js(filename, gen_hash_info=False):
+  js_engine=shared.NODE_JS
+
+  js = open(filename).read()
+  if os.linesep != '\n':
+    js = js.replace(os.linesep, '\n') # we assume \n in the splitting code
+
+  equivalentfn_hash_info = None
+  passed_in_filename = filename
+
+  # Find markers
+  start_funcs = js.find(start_funcs_marker)
+  end_funcs = js.rfind(end_funcs_marker)
+
+  if start_funcs < 0 or end_funcs < start_funcs:
+    logging.critical('Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s)' % (start_funcs, end_funcs))
+    sys.exit(1)
+
+  if not gen_hash_info:
+    equivalentfn_hash_info = js[js.rfind('//'):]
+
+    start_asm = js.find(start_asm_marker)
+    end_asm = js.rfind(end_asm_marker)
+    assert (start_asm >= 0) == (end_asm >= 0)
+
+    # We need to split out the asm shell as well, for minification
+    pre = js[:start_asm + len(start_asm_marker)]
+    post = js[end_asm:]
+    asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + '''
+EMSCRIPTEN_FUNCS();
+''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)]
+    js = js[start_funcs + len(start_funcs_marker):end_funcs]
+
+    # we assume there is a maximum of one new name per line
+    asm_shell_pre, asm_shell_post = process_shell(js, js_engine, asm_shell, equivalentfn_hash_info).split('EMSCRIPTEN_FUNCS();');
+    asm_shell_post = asm_shell_post.replace('});', '})');
+    pre += asm_shell_pre + '\n' + start_funcs_marker
+    post = end_funcs_marker + asm_shell_post + post
+
+    if not gen_hash_info:
+      # We don't need the extra info at the end
+      post = post[:post.rfind('//')].strip()
+  else:
+    pre = js[:start_funcs + len(start_funcs_marker)]
+    post = js[end_funcs + len(end_funcs_marker):]
+    js = js[start_funcs + len(start_funcs_marker):end_funcs]
+    post = end_funcs_marker + post
+
+  total_size = len(js)
+  funcs = split_funcs(js, False)
+
+  js = None
+
+  # if we are making source maps, we want our debug numbering to start from the
+  # top of the file, so avoid breaking the JS into chunks
+  cores = int(os.environ.get('EMCC_CORES') or multiprocessing.cpu_count())
+
+  intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE))
+  chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks))
+  chunks = shared.chunkify(funcs, chunk_size)
+
+  chunks = filter(lambda chunk: len(chunk) > 0, chunks)
+  if DEBUG and len(chunks) > 0: print >> sys.stderr, 'chunkification: num funcs:', len(funcs), 'actual num chunks:', len(chunks), 'chunk size range:', max(map(len, chunks)), '-', min(map(len, chunks))
+  funcs = None
+
+  if len(chunks) > 0:
+    def write_chunk(chunk, i):
+      temp_file = temp_files.get('.jsfunc_%d.js' % i).name
+      f = open(temp_file, 'w')
+      f.write(chunk)
+
+      if not gen_hash_info:
+        f.write('\n')
+        f.write(equivalentfn_hash_info)
+      f.close()
+      return temp_file
+    filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))]
+  else:
+    filenames = []
+
+  old_filenames = filenames[:]
+  if len(filenames) > 0:
+    commands = map(lambda filename: js_engine + [DUPLICATE_FUNCTION_ELIMINATOR, filename, '--gen-hash-info' if gen_hash_info else '--use-hash-info', '--no-minimize-whitespace'], filenames)
+
+    if DEBUG and commands is not None:
+      print >> sys.stderr, [' '.join(command if command is not None else '(null)') for command in commands]
+
+    cores = min(cores, len(filenames))
+    if len(chunks) > 1 and cores >= 2:
+      # We can parallelize
+      if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores  (total: %.2f MB)' % (len(chunks), cores, total_size/(1024*1024.))
+      pool = multiprocessing.Pool(processes=cores)
+      filenames = pool.map(run_on_chunk, commands, chunksize=1)
+    else:
+      # We can't parallize, but still break into chunks to avoid uglify/node memory issues
+      if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % (len(chunks))
+      filenames = [run_on_chunk(command) for command in commands]
+  else:
+    filenames = []
+
+  json_files = []
+
+  # We're going to be coalescing the files back at the end
+  # Just replace the file list with the ones provided in
+  # the command list - and save off the generated Json
+  if gen_hash_info:
+    json_files = filenames[:]
+    filenames = old_filenames[:]
+
+  for filename in filenames: temp_files.note(filename)
+
+  filename += '.jo.js'
+  f = open(filename, 'w')
+  f.write(pre);
+  pre = None
+
+  # sort functions by size, to make diffing easier and to improve aot times
+  funcses = []
+  for out_file in filenames:
+    funcses.append(split_funcs(open(out_file).read(), False))
+  funcs = [item for sublist in funcses for item in sublist]
+  funcses = None
+  def sorter(x, y):
+    diff = len(y[1]) - len(x[1])
+    if diff != 0: return diff
+    if x[0] < y[0]: return 1
+    elif x[0] > y[0]: return -1
+    return 0
+  if not os.environ.get('EMCC_NO_OPT_SORT'):
+    funcs.sort(sorter)
+
+  for func in funcs:
+    f.write(func[1])
+  funcs = None
+
+  f.write('\n')
+  f.write(post);
+  # No need to write suffix: if there was one, it is inside post which exists when suffix is there
+  f.write('\n')
+
+  if gen_hash_info and len(json_files) > 0:
+    write_equivalent_fn_hash_to_file(f, json_files, passed_in_filename)
+  f.close()
+
+  return filename
+
+def save_temp_file(file_to_process):
+  if os.environ.get('EMSCRIPTEN_SAVE_TEMP_FILES') and os.environ.get('EMSCRIPTEN_TEMP_FILES_DIR'):
+    destinationFile = file_to_process
+
+    temp_dir_name = os.environ.get('TEMP_DIR')
+    destinationFile = destinationFile.replace(temp_dir_name, os.environ.get('EMSCRIPTEN_TEMP_FILES_DIR'))
+
+    if not os.path.exists(os.path.dirname(destinationFile)):
+      os.makedirs(os.path.dirname(destinationFile))
+
+    print >> sys.stderr, "Copying {} to {}".format(file_to_process, destinationFile)
+    shutil.copyfile(file_to_process, destinationFile)
+
+def get_func_names(javascript_file):
+  func_names = []
+  start_tok = "// EMSCRIPTEN_START_FUNCS"
+  end_tok = "// EMSCRIPTEN_END_FUNCS"
+  start_off = 0
+  end_off = 0
+
+  with open (javascript_file, 'rt') as fin:
+    blob = "".join(fin.readlines())
+    start_off = blob.find(start_tok) + len(start_tok)
+    end_off = blob.find(end_tok)
+    asm_chunk = blob[start_off:end_off]
+
+    for match in re.finditer('function (\S+?)\s*\(', asm_chunk):
+      func_names.append(match.groups(1)[0])
+
+  return func_names
+
+def eliminate_duplicate_funcs(file_name):
+  if shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS != 0:
+    # Remove previous log file if it exists
+    equivalent_fn_json_file = file_name + ".equivalent_functions.json"
+    if os.path.isfile(equivalent_fn_json_file):
+      print >> sys.stderr, "Deleting old json: " + equivalent_fn_json_file
+      os.remove(equivalent_fn_json_file)
+
+    old_funcs = get_func_names(file_name)
+
+  for pass_num in range(shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS_PASSES):
+    if DEBUG: print >> sys.stderr, "[PASS {}]: eliminating duplicate functions in: {}.".format(pass_num, file_name)
+
+    # Generate the JSON for the equivalent hash first
+    processed_file = run_on_js(filename=file_name, gen_hash_info=True)
+
+    save_temp_file(processed_file)
+
+    # Use the hash to reduce the JS file
+    final_file = run_on_js(filename=processed_file, gen_hash_info=False)
+
+    save_temp_file(final_file)
+
+    shared.safe_move(final_file, file_name)
+
+  if shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS != 0:
+    new_funcs = get_func_names(file_name)
+
+    eliminated_funcs_file = file_name + ".eliminated_functions.json"
+    print >> sys.stderr, "Writing eliminated functions to file: {}".format(eliminated_funcs_file)
+
+    with open(eliminated_funcs_file, 'w') as fout:
+      eliminated_functions = list(set(old_funcs)-set(new_funcs))
+      eliminated_functions.sort()
+      for eliminated_function in eliminated_functions:
+        fout.write('{}\n'.format(eliminated_function))
+
+def run(filename, js_engine=shared.NODE_JS):
+  js_engine = shared.listify(js_engine)
+
+  return temp_files.run_and_clean(lambda: eliminate_duplicate_funcs(filename))
+
+if __name__ == '__main__':
+  out = run(sys.argv[1], sys.argv[2:])
+
diff --git a/tools/eliminate-duplicate-functions.js b/tools/eliminate-duplicate-functions.js
new file mode 100644
index 0000000000000..ce015d9a4289b
--- /dev/null
+++ b/tools/eliminate-duplicate-functions.js
@@ -0,0 +1,541 @@
+///////////////////////////////////////////////////////////////////////////////////////////////
+// Eliminate-Duplicate-Functions.js
+//
+// This is a Javascript file that is used to post-process an Emscripten transpiled JS file.
+// It will remove all the duplicate functions from the generated ASM. In its current form,
+// the input JS file is expected to be a 'chunk' from an Emscripten generated ASM.JS file.
+//
+// An ASM JS chunk consists of a number of ASM.JS function definitions. It can also represent
+// the ASM JS 'shell' which consists of the global variable declarations for the generated ASM JS.
+//
+// The file will remove all the generated functions that are deemed to be identical. Currently,
+// the file will only run one pass of the algorithm. The caller of this JS file can run multiple
+// passes to ensure that higher level functions which will become identical after a pass can
+// be further eliminated.
+//
+// Usually, 4 or at most 5 passes will result in an optimal reduction - i.e., in a file that
+// cannot be reduced any further.
+///////////////////////////////////////////////////////////////////////////////////////////////
+var crypto = require('crypto');
+var uglify = require('../tools/eliminator/node_modules/uglify-js');
+
+var nodeFS = require('fs');
+var nodePath = require('path');
+var debug = false;
+var debugFile = undefined;
+var debugFileName = 'function_eliminator.log';
+var genHashInfo = false;
+var useHashInfo = false;
+var useAsmAst = false;
+
+// Variables that helps control verbosity of debug spew
+// Set appropriate zones here (to 0 or 1) for debugging various
+// parts of the algorithm.
+var ZONE_IDENTIFY_DUPLICATE_FUNCS = 1;
+var ZONE_REPLACE_FUNCTION_REFERENCES = 1;
+var ZONE_REPLACE_DUPLICATE_FUNCS = 1;
+var ZONE_EQUIVALENT_FUNCTION_HASH = 1;
+var ZONE_TOP_LEVEL = 1;
+var ZONE_DUMP_AST = 0;
+
+if (!nodeFS.existsSync) {
+  nodeFS.existsSync = function(path) {
+    try {
+      return !!nodeFS.readFileSync(path);
+    } catch (e) {
+      return false;
+    }
+  }
+}
+
+function srcToAst(src) {
+  return uglify.parser.parse(src, false, false);
+}
+
+function astToSrc(ast, minifyWhitespace) {
+  return uglify.uglify.gen_code(ast, {
+    debug: debug,
+    ascii_only: true,
+    beautify: !minifyWhitespace,
+    indent_level: 1
+  });
+}
+
+// Traverses the children of a node. If the traverse function returns an object,
+// replaces the child. If it returns true, stop the traversal and return true.
+function traverseChildren(node, traverse, pre, post) {
+  for (var i = 0; i < node.length; i++) {
+    var subnode = node[i];
+    if (Array.isArray(subnode)) {
+      var subresult = traverse(subnode, pre, post);
+      if (subresult === true) return true;
+      if (subresult !== null && typeof subresult === 'object') node[i] = subresult;
+    }
+  }
+}
+
+print = function(x) {
+  process['stdout'].write(x + '\n');
+};
+
+printErr = function(x) {
+  process['stderr'].write(x + '\n');
+};
+
+function debugLog(zone, str) {
+  if (debug && (zone !== 0)) {
+    nodeFS.writeSync(debugFile, str + '\n');
+  }
+}
+
+// Traverses a JavaScript syntax tree rooted at the given node calling the given
+// callback for each node.
+//   @arg node: The root of the AST.
+//   @arg pre: The pre to call for each node. This will be called with
+//     the node as the first argument and its type as the second. If true is
+//     returned, the traversal is stopped. If an object is returned,
+//     it replaces the passed node in the tree. If null is returned, we stop
+//     traversing the subelements (but continue otherwise).
+//   @arg post: A callback to call after traversing all children.
+//   @returns: If the root node was replaced, the new root node. If the traversal
+//     was stopped, true. Otherwise undefined.
+function traverse(node, pre, post) {
+  var type = node[0],
+    result, len;
+  var relevant = typeof type === 'string';
+  if (relevant) {
+    var result = pre(node, type);
+    if (result === true) return true;
+    if (result && result !== null) node = result; // Continue processing on this node
+  }
+  if (result !== null) {
+    if (traverseChildren(node, traverse, pre, post) === true) return true;
+  }
+  if (relevant) {
+    if (post) {
+      var postResult = post(node, type);
+      result = result || postResult;
+    }
+  }
+  return result;
+}
+
+function dumpAst(ast) {
+  debugLog(ZONE_DUMP_AST, JSON.stringify(ast, null, '  '));
+}
+
+function getFunctionBody(node) {
+  // Remove the function <name> part of the source for the function
+  var functionSrc = astToSrc(node, true);
+  var functionNameRegex = /(function .*?)\(/;
+  return functionSrc.replace(functionNameRegex, "(");
+}
+
+function traverseFunctions(ast, callback) {
+  var topLevelList = useAsmAst ? ast : ast[1];
+
+  for (var listIndex = 0; listIndex < topLevelList.length; ++listIndex) {
+    var node = topLevelList[listIndex];
+
+    if (node[0] === 'defun') {
+      callback(node);
+    }
+  }
+}
+
+function identifyDuplicateFunctions(ast) {
+  debugLog(ZONE_TOP_LEVEL, "identifyDuplicateFunctions");
+
+  var functionHashToFunctionName = {};
+
+  traverseFunctions(ast, function(node) {
+    debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, "Node: " + node);
+    var functionBody = getFunctionBody(node);
+
+    debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, "Function Body: " + functionBody + "\n");
+    var functionHash = crypto.createHash('sha256').update(functionBody).digest('hex');
+
+    if (functionHashToFunctionName[functionHash] === undefined) {
+      functionHashToFunctionName[functionHash] = [];
+    }
+
+    debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, typeof node[1]);
+    functionHashToFunctionName[functionHash].push(node[1]);
+    debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, functionHash + '->' + node[1]);
+  });
+
+  if (debug) {
+    for (var key in functionHashToFunctionName) {
+      debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, key + "->" + functionHashToFunctionName[key]);
+    }
+  }
+
+  return functionHashToFunctionName;
+}
+
+function getVariableNames(ast) {
+  var variableNames = {};
+  traverse(ast, function(node, type) {
+    if (type === 'var') {
+
+      var vars = node[1];
+
+      if (Array.isArray(vars)) {
+        for (var i = 0; i < vars.length; i++) {
+          var ident = vars[i][0];
+
+          variableNames[ident] = 1;
+        }
+      }
+    }
+  });
+
+  return variableNames;
+}
+
+function replaceFunctionDefinitions(ast, equivalentFunctionHash) {
+  debugLog(ZONE_TOP_LEVEL, 'replaceFunctionDefinitions');
+
+  var topLevelList = useAsmAst ? ast : ast[1];
+  var indicesToRemove = [];
+  for (var listIndex = 0; listIndex < topLevelList.length; ++listIndex) {
+    var node = topLevelList[listIndex];
+
+    if (node[0] === 'defun' && equivalentFunctionHash[node[1]] !== undefined) {
+      indicesToRemove.push(listIndex);
+    }
+  }
+
+  if (indicesToRemove.length > 0) {
+    for (var i = indicesToRemove.length - 1; i >= 0; --i) {
+      debugLog(ZONE_REPLACE_DUPLICATE_FUNCS, "Removing " + topLevelList[indicesToRemove[i]][1]);
+      topLevelList.splice(indicesToRemove[i], 1);
+    }
+  }
+}
+
+function replaceFunctionReferences(ast, equivalentFunctionHash) {
+  debugLog(ZONE_TOP_LEVEL, 'replaceFunctionReferences');
+  traverse(ast, function(node, type) {
+    if (type === 'call') {
+      var functionName = node[1][1];
+
+      // Replace the call with a call to the equivalent function if there is one
+      if (equivalentFunctionHash[functionName] !== undefined) {
+        node[1][1] = equivalentFunctionHash[functionName];
+      }
+    } else if (type === 'var') {
+      var vars = node[1];
+      for (var i = 0; i < vars.length; i++) {
+        debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, 'Variable: ' + vars[i]);
+        var value = vars[i][1][1];
+        debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, 'Variable value: ' + value);
+
+        if (equivalentFunctionHash[value] !== undefined) {
+          debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, 'Variable value replacement: ' + equivalentFunctionHash[value]);
+          vars[i][1][1] = equivalentFunctionHash[value];
+        }
+      }
+    } else if (type === 'assign') {
+      if (node[3][0] === 'name' && equivalentFunctionHash[node[3][1]] !== undefined) {
+        node[3][1] = equivalentFunctionHash[node[3][1]];
+      }
+    } else if (type === 'object') {
+      var assignments = node[1];
+
+      for (var i = 0; i < assignments.length; i++) {
+        debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, 'Object Value Assignment: ' + assignments[i][1][1]);
+
+        if (equivalentFunctionHash[assignments[i][1][1]] !== undefined) {
+          assignments[i][1][1] = equivalentFunctionHash[assignments[i][1][1]];
+        }
+      }
+    } else if (type === 'array') {
+      var arrayVars = node[1];
+
+      if (Array.isArray(arrayVars)) {
+        for (var i = 0; i < arrayVars.length; i++) {
+          debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, "Array: " + arrayVars[i][0] + ", " + arrayVars[i][1]);
+          // First element contains type, 2nd contains value
+          if (arrayVars[i][0] == 'name' && equivalentFunctionHash[arrayVars[i][1]] !== undefined) {
+            debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, "Replacing array value " + arrayVars[i][1]);
+            arrayVars[i][1] = equivalentFunctionHash[arrayVars[i][1]];
+          }
+        }
+      } else {
+        debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, "ArrayVars (not an array): " + arrayVars + ", node: " + node);
+      }
+    }
+  });
+}
+
+function replaceDuplicateFuncs(ast, equivalentFunctionHash) {
+  debugLog(ZONE_TOP_LEVEL, "replaceDuplicateFuncs");
+
+  // Replace references to all functions with their equivalent function
+  replaceFunctionReferences(ast, equivalentFunctionHash);
+
+  // Now lets replace the function definitions
+  replaceFunctionDefinitions(ast, equivalentFunctionHash);
+}
+
+function logEquivalentFunctionHash(equivalentFunctionHash) {
+  if (debug && ZONE_EQUIVALENT_FUNCTION_HASH != 0) {
+    debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, "Equivalent Function Hash:");
+    for (var fn in equivalentFunctionHash) {
+      debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, fn + "->" + equivalentFunctionHash[fn]);
+    }
+  }
+}
+
+function generateEquivalentFunctionHash(functionHashToFunctionName, variableNames) {
+  var equivalentFunctionHash = {};
+
+  debugLog(ZONE_TOP_LEVEL, "generateEquivalentFunctionHash");
+
+  if (debug && ZONE_EQUIVALENT_FUNCTION_HASH != 0) {
+    debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, "Equivalent Functions:");
+
+    for (var fnHash in functionHashToFunctionName) {
+      if (functionHashToFunctionName[fnHash].length > 1) {
+        debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, JSON.stringify(functionHashToFunctionName[fnHash], null, '  '));
+      }
+    }
+  }
+
+  for (var fnHash in functionHashToFunctionName) {
+    var equivalentFunctions = functionHashToFunctionName[fnHash];
+    var shortestFunction = undefined;
+    var equivalentFn = undefined;
+
+    // From each list of equivalent functions, pick the
+    // shortest one that is not also a variable name
+    for (var index in equivalentFunctions) {
+      equivalentFn = equivalentFunctions[index];
+
+      // If one of the variables is not the same name as the equivalent function,
+      // and the equivalent function is shorter than the shortest function.
+      if ((variableNames[equivalentFn] === undefined) &&
+        (shortestFunction === undefined || equivalentFn.length < shortestFunction.length)) {
+        shortestFunction = equivalentFn;
+      }
+
+      if (debug && variableNames[equivalentFn] !== undefined) {
+        debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, equivalentFn + " is a variable");
+      }
+    }
+
+    if (shortestFunction !== undefined) {
+      // Populate the equivalent function hash with this info
+      for (var index in equivalentFunctions) {
+        equivalentFn = equivalentFunctions[index];
+
+        // If we're not the shortest function, and
+        // we are not a variable name
+        if ((equivalentFn !== shortestFunction) && variableNames[equivalentFn] === undefined) {
+          equivalentFunctionHash[equivalentFn] = shortestFunction;
+          debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, equivalentFn + "->" + shortestFunction);
+        }
+      }
+    }
+  }
+
+  return equivalentFunctionHash;
+}
+
+function getBodyForFunction(ast, functionName) {
+  var functionBody = undefined;
+  var topLevelList = ast[1];
+
+  for (var listIndex = 0; listIndex < topLevelList.length; ++listIndex) {
+    var node = topLevelList[listIndex];
+
+    if (node[0] === 'defun' && node[1] === functionName) {
+      functionBody = getFunctionBody(node);
+      break;
+    }
+  }
+
+  return functionBody;
+}
+
+function checkForHashCollisions(ast, functionHashToFunctionName) {
+  var functionHashToFunctionBody = {};
+
+  for (var functionHash in functionHashToFunctionName) {
+    var equivalentFunctions = functionHashToFunctionName[functionHash];
+    var functionBody = getBodyForFunction(ast, equivalentFunctions[0]);
+
+    functionHashToFunctionBody[functionHash] = functionBody;
+
+    // If we have more than one equivalent function, make sure
+    // that the bodies are the same from the hash values
+    if (equivalentFunctions.length > 1) {
+      for (var functionIndex = 1; functionIndex < equivalentFunctions.length; ++functionIndex) {
+        var curFunctionBody = getBodyForFunction(ast, equivalentFunctions[functionIndex]);
+
+        if (curFunctionBody !== functionBody) {
+          printErr("ERROR!!! Function bodies for two hash-equivalent functions differ!!! Candidates: "
+                  + equivalentFunctions[0] + ", " + equivalentFunctions[functionIndex]);
+          process.exit(1);
+        }
+      }
+    }
+  }
+
+  return functionHashToFunctionBody;
+}
+
+function eliminateDuplicateFuncs(ast) {
+  debugLog(ZONE_TOP_LEVEL, "eliminateDuplicateFuncs");
+
+  // Phase 1 - identify duplicate functions
+  var functionHashToFunctionName = identifyDuplicateFunctions(ast);
+
+  // Phase 1.1 - Check for hash collisions
+  checkForHashCollisions(ast, functionHashToFunctionName);
+
+  // Phase 2 - identify variables that conflict with function names
+  var variableNames = getVariableNames(ast);
+
+  // Phase 3 - generate the equivalent function hash
+  var equivalentFunctionHash = generateEquivalentFunctionHash(functionHashToFunctionName, variableNames);
+
+  // Phase 4 - for each set of equivalent functions, pick one and
+  // use it to replace the other equivalent functions.
+  replaceDuplicateFuncs(ast, equivalentFunctionHash);
+
+  return;
+}
+
+function find(filename) {
+  var prefixes = [nodePath.join(__dirname, '..', 'src'), process.cwd()];
+  for (var i = 0; i < prefixes.length; ++i) {
+    var combined = nodePath.join(prefixes[i], filename);
+    if (nodeFS.existsSync(combined)) {
+      return combined;
+    }
+  }
+  return filename;
+}
+
+function findAsmAst(ast) {
+  var asmNode = undefined;
+  traverse(ast, function(node, type) {
+    if (type === 'var') {
+
+      var vars = node[1];
+      for (var i = 0; i < vars.length; i++) {
+        var ident = vars[i][0];
+
+        if (ident === 'asm') {
+          asmNode = vars[i][1][1][3]; // asm->call->toplevel-ast
+        }
+      }
+    }
+  });
+
+  return asmNode;
+}
+
+function printHashInfo(ast) {
+  debugLog(ZONE_TOP_LEVEL, "printHashInfo");
+
+  var infoHash = {};
+  infoHash['variable_names'] = getVariableNames(ast);
+  infoHash['fn_hash_to_fn_name'] = identifyDuplicateFunctions(ast);
+  infoHash['fn_hash_to_fn_body'] = checkForHashCollisions(ast, infoHash['fn_hash_to_fn_name']);
+
+  print(JSON.stringify(infoHash));
+}
+
+read = function(filename) {
+  var absolute = find(filename);
+  return nodeFS['readFileSync'](absolute).toString();
+};
+
+// Main
+var arguments_ = process['argv'].slice(2);
+var noMinimizeWhitespace = false; // Eliminate whitespace by default
+var functionName = undefined;
+var src = undefined; 
+
+for (var argIndex = 0; argIndex < arguments_.length; ++argIndex) {
+  var arg = arguments_[argIndex];
+  if (arg === '--debug') {
+    debug = true;
+    debugFile = nodeFS.openSync(debugFileName, 'w');
+  } else if (arg === '--no-minimize-whitespace') {
+    noMinimizeWhitespace = true;
+  } else if (arg === '--gen-hash-info') {
+    genHashInfo = true;
+  } else if (arg === '--use-hash-info') {
+    useHashInfo = true;
+  } else if (arg === '--use-asm-ast') {
+    useAsmAst = true;
+  } else if (arg === '--get-function-body') {
+    if (argIndex === arguments_.length_ - 1) {
+      throw new Error('Please specify valid arguments!');
+    }
+
+    functionName = arguments_[argIndex+1];
+    argIndex += 1;
+  } else if (/^--/.test(arg)) {
+    throw new Error('Please specify valid arguments!');
+  } else if (src === undefined) {
+    src = read(arg);
+  } else {
+    throw new Error('Please specify valid arguments!');
+  }
+}
+
+var ast = srcToAst(src);
+var asmAst = ast;
+
+if (useAsmAst) {
+  asmAst = findAsmAst(ast);
+}
+
+if (debug) {
+  dumpAst(ast);
+}
+
+if (functionName !== undefined) {
+  var functionBody = getBodyForFunction(ast, functionName);
+
+  if (functionBody === undefined) {
+    throw new Error('Could not find body for function ' + functionName + '!!!');
+  }
+
+  print(functionBody);
+} else if (genHashInfo) {
+  printHashInfo(asmAst);
+} else {
+  equivalentFunctionHash = {};
+
+  if (useHashInfo) {
+    // The last line has the required info
+    infoHashJsonStart = src.lastIndexOf("//") + 2 // 2 for going past the //
+
+    if (infoHashJsonStart == -1) {
+      throw new Error('--use-hash-info specified but no JSON found at the end of the file!');
+    }
+
+    equivalentFunctionHash = JSON.parse(src.substring(infoHashJsonStart));
+
+    logEquivalentFunctionHash(equivalentFunctionHash);
+    replaceDuplicateFuncs(asmAst, equivalentFunctionHash);
+  } else {
+    eliminateDuplicateFuncs(asmAst);
+  }
+
+  var minimizeWhitespace = (debug || noMinimizeWhitespace) ? false : true;
+  var js = astToSrc(ast, minimizeWhitespace);
+
+  print(js);
+}
+
+if (debug && debugFile !== undefined) {
+  printErr('Wrote debug log to ' + debugFileName);
+  nodeFS.close(debugFile);
+}
\ No newline at end of file
diff --git a/tools/file_packager.py b/tools/file_packager.py
index 30e085878d6ff..832ceef59b581 100644
--- a/tools/file_packager.py
+++ b/tools/file_packager.py
@@ -209,7 +209,7 @@ def has_hidden_attribute(filepath):
     attrs = ctypes.windll.kernel32.GetFileAttributesW(unicode(filepath))
     assert attrs != -1
     result = bool(attrs & 2)
-  except (AttributeError, AssertionError):
+  except:
     result = False
   return result
 
diff --git a/tools/gen_struct_info.py b/tools/gen_struct_info.py
index e2b3b78fe274f..65a2153879d12 100644
--- a/tools/gen_struct_info.py
+++ b/tools/gen_struct_info.py
@@ -312,10 +312,10 @@ def gen_inspect_code(path, struct, code):
   c_descent(path[-1], code)
   
   if len(path) == 1:
-    c_set('__size__', 'i%u', 'sizeof (' + prefix + path[0] + ')', code)
+    c_set('__size__', 'i%zu', 'sizeof (' + prefix + path[0] + ')', code)
   else:
-    c_set('__size__', 'i%u', 'sizeof ((' + prefix + path[0] + ' *)0)->' + '.'.join(path[1:]), code)
-    #c_set('__offset__', 'i%u', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:]) + ')', code)
+    c_set('__size__', 'i%zu', 'sizeof ((' + prefix + path[0] + ' *)0)->' + '.'.join(path[1:]), code)
+    #c_set('__offset__', 'i%zu', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:]) + ')', code)
   
   for field in struct:
     if isinstance(field, dict):
@@ -323,7 +323,7 @@ def gen_inspect_code(path, struct, code):
       fname = field.keys()[0]
       gen_inspect_code(path + [fname], field[fname], code)
     else:
-      c_set(field, 'i%u', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:] + [field]) + ')', code)
+      c_set(field, 'i%zu', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:] + [field]) + ')', code)
   
   c_ascent(code)
 
diff --git a/tools/js_optimizer.py b/tools/js_optimizer.py
index 45b4836270d71..be42d06bb5caf 100644
--- a/tools/js_optimizer.py
+++ b/tools/js_optimizer.py
@@ -422,9 +422,6 @@ def write_chunk(chunk, i):
   else:
     filenames = []
 
-  if shared.Settings.WASM:
-    passes = filter(lambda p: p != 'minifyWhitespace', passes) # if we are going to wasmify the asm module, no need to minify it before hand
-
   if len(filenames) > 0:
     if not use_native(passes, source_map) or not get_native_optimizer():
       commands = map(lambda filename: js_engine +
diff --git a/tools/shared.py b/tools/shared.py
index 06887033233a6..933fe44968016 100644
--- a/tools/shared.py
+++ b/tools/shared.py
@@ -1654,6 +1654,11 @@ def js_optimizer(filename, passes, debug=False, extra_info=None, output_filename
       ret = output_filename
     return ret
 
+  @staticmethod
+  def eliminate_duplicate_funcs(filename):
+    import duplicate_function_eliminator
+    duplicate_function_eliminator.eliminate_duplicate_funcs(filename)
+
   @staticmethod
   def closure_compiler(filename, pretty=True):
     if not check_closure_compiler():